# Final project

In [2]:
import pandas as pd

In [3]:
# gt = pd.read_csv('climate-change-earth-surface-temperature-data/GlobalTemperatures.zip')
# gtc = pd.read_csv('climate-change-earth-surface-temperature-data/GlobalLandTemperaturesByCity.zip')
# gtmc = pd.read_csv('climate-change-earth-surface-temperature-data/GlobalLandTemperaturesByMajorCity.zip')
# gts = pd.read_csv('climate-change-earth-surface-temperature-data/GlobalLandTemperaturesByState.zip')
gtco = pd.read_csv('climate-change-earth-surface-temperature-data/GlobalLandTemperaturesByCountry.zip')

In [4]:
import matplotlib.pyplot as plt 
%matplotlib inline

import altair as alt
alt.data_transformers.disable_max_rows()
from jupyterthemes import jtplot
jtplot.style('chesterish')

# Temperature dynamics by country

In [5]:
import geopandas as gpd
from tqdm.notebook import tqdm

In [6]:
# world = gpd.GeoDataFrame.from_file('https://raw.githubusercontent.com/AndrewGolovko/DataVisualization/master/hw4/world.geojson')
world = gpd.GeoDataFrame.from_file('world.geojson')
world.region_un = world.region_un.apply(lambda x: 'Asia' if x == 'Oceania' else x)
world = json.loads(world.to_json())

In [7]:
country_name_map = \
{
    'Bosnia and Herz.': 'Bosnia And Herzegovina',
    'Central African Rep.': 'Central African Republic',
    "Côte d'Ivoire": "Côte D'Ivoire",
    'Czech Rep.': 'Czech Republic',
    'Dem. Rep. Congo': 'Congo (Democratic Republic Of The)',
    'Dominican Rep.': 'Dominican Republic',
    'Denmark': 'Denmark (Europe)',
    'Eq. Guinea': 'Equatorial Guinea',
    'Falkland Is.' : 'Falkland Islands (Islas Malvinas)',
    'France': 'France (Europe)',
    'Guinea-Bissau': 'Guinea-Bissau',
    'Lao PDR': 'Laos',
    'N. Cyprus': 'Cyprus',
    'Netherlands': 'Netherlands (Europe)',
    'Solomon Is.': 'Solomon Islands',
    'Timor-Leste': 'Timor Leste',
    'Trinidad and Tobago': 'Trinidad And Tobago',
    'United Kingdom': 'United Kingdom (Europe)',
    'W. Sahara': 'Western Sahara',
}

def country_name_mapper(name):
    if name in country_name_map.keys():
        return country_name_map[name]
    return name

In [8]:
gtco_cleaned = gtco.dropna(axis=0)

In [9]:
map_data = []
# gtco_cleaned = gtco.groupby(['year', 'Country']).mean().reset_index() #gtco.fillna(0)#.dropna(axis=0)
for d in tqdm(world['features']):
#     print(d['properties']['name'])
    country_name = country_name_mapper(d['properties']['name'])
    temp_df = gtco_cleaned[gtco_cleaned.Country == country_name]
    if temp_df.shape[0] > 0:
        map_data.append(
            {
                'id': d['id'],
                'type': d['type'],
                'properties': {
                    'name': country_name,
                    'avg_temp': temp_df['AverageTemperature'].mean(),
                },
                'geometry': d['geometry']
            }
        )
        

gtco_cleaned = gtco_cleaned[gtco_cleaned.Country.isin(pd.DataFrame(list(pd.DataFrame(map_data)['properties'])).name.values)]

HBox(children=(FloatProgress(value=0.0, max=175.0), HTML(value='')))




In [60]:
bg_color = '#F3F7F7'
titlecolor = '#3A3F4A'
titlesize = 36
textcolor = '#5D646F'
textsize = 12
font = 'Ubuntu Mono'

In [107]:
continent_selection = alt.selection_single(
    fields = ['Country']
)

map_ = alt.Chart(
    alt.Data(
        values = map_data
    )
).mark_geoshape(
    stroke=None
).encode(
    color = alt.Color(
        'avg_temp:Q',
        scale=alt.Scale(
            range = ["#00D5E9", "#FF5872"]
        ), 
        legend = None
    ),
    opacity = alt.condition(
        continent_selection, 
        alt.value(0.9), 
        alt.value(0.15)
    ),
    tooltip = alt.Tooltip('Country:N')
).add_selection(
    continent_selection
).transform_calculate(
    Country = 'datum.properties.name',
    avg_temp = 'datum.properties.avg_temp'
).properties(
    width = 800, 
    height = 480
)

map_;

In [108]:
xaxis = alt.Axis(
        title='year',
#                  titleFontWeight='lighter',
                titleFontSize=textsize+2,
                 titleFont=font,
                 titleAnchor='middle',
#                  titleY=200,
                 titleColor=textcolor,
     domain=False, 
     ticks=False,
                 )

yaxis = alt.Axis(
        title='temperature moving average (40 years window), °C',
#                  titleFontWeight='lighter',
                titleFontSize=textsize+2,
                 titleFont=font,
                 titleAnchor='middle',
#                  titleY=200,
                 titleColor=textcolor,
     domain=False, 
     ticks=False,
                 )

chart = alt.Chart(gtco_cleaned).mark_line().encode(
    x=alt.X(
        'year(dt):T',
        axis=xaxis
    ),
    y=alt.Y(
        'mean(moving_average):Q',
        scale=alt.Scale(
            domain=[-20,30]
        ),
        axis=yaxis
    ),
    color=alt.Color(
        'Country',
        legend=None
    ),
    opacity = alt.condition(
        continent_selection, 
        alt.value(1), 
        alt.value(0.05)
    ),
    tooltip = alt.Tooltip('Country')
).transform_window(
    frame=[-40, 0],
    moving_average='mean(AverageTemperature)'
).add_selection(
    continent_selection
).transform_filter(
    'year(datum.dt) > 1880'
).properties(
    width = 800, 
    height = 480,
)

chart;

In [None]:
alt.hconcat(chart, map_).configure_title(
    color=titlecolor,
    font=font,
    fontSize=titlesize,
    anchor='start'
).properties(
    padding={'left': 20, 'top': 20, 'right': 25, 'bottom': 20},
    title=alt.TitleParams('Temperature dynamics by country',anchor='middle', dx=-0),
    background=bg_color
).configure_view(
    stroke=None
).configure_axis( 
     domain=False, 
     ticks=False,
     labelColor = textcolor,
     labelFontSize = textsize,
     labelFont = font
)

# Temperature growth by country

In [36]:
year = gtco.dt.str[:4].astype(int)
temp1900 = gtco[(1900 < year) & (year < 1910)].groupby('Country').mean()[['AverageTemperature']].rename(columns={'AverageTemperature' : '1900s'})

In [37]:
year = gtco.dt.str[:4].astype(int)
temp2000 = gtco[(2000 < year) & (year < 2010)].groupby('Country').mean()[['AverageTemperature']].rename(columns={'AverageTemperature' : '2000s'})

In [38]:
tempdiff = pd.concat([temp1900, temp2000], axis=1)
tempdiff['diff'] = temp2000['2000s'] - temp1900['1900s']
tempdiff = tempdiff.sort_values('1900s').reset_index().rename(columns={'index': 'country'}).dropna(axis=0)
tempdiff

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,country,1900s,2000s,diff
0,Greenland,-19.045880,-17.002259,2.043620
1,Denmark,-18.508231,-16.472602,2.035630
2,Svalbard And Jan Mayen,-7.757380,-5.856889,1.900491
3,Russia,-5.783537,-4.010009,1.773528
4,Canada,-5.477667,-3.833963,1.643704
...,...,...,...,...
235,Burkina Faso,27.508102,28.921935,1.413833
236,Senegal,27.553713,28.793694,1.239981
237,Aruba,27.665769,28.724204,1.058435
238,Mali,27.910769,29.305500,1.394731


In [39]:
domain = (0.6, 2.2)
color_scale = ["#00D5E9", "#FF5872"]


bg_color = '#F3F7F7'
titlecolor = '#3A3F4A'
titlesize = 36
textcolor = '#5D646F'
textsize = 12
font = 'Ubuntu Mono'
opacity=0.9

In [40]:
countries_to_show = ('Greenland', 'Svalbard And Jan Mayen', 'Russia',
       'Canada', 'Mongolia', 'Norway', 'Finland', 'Iceland',
       'North America', 'Sweden', 'Kyrgyzstan', 'Tajikistan',
       'Liechtenstein', 'Estonia',
       'Kazakhstan', 'Latvia', 'Belarus', 'Lithuania',
       'Austria', 'China',
       'Switzerland', 'Poland',
       'Slovakia', 'Czech Republic', 'Ukraine',
       'Denmark (Europe)', 'Germany', 'Georgia', 'Armenia',
       'United States', 'United Kingdom (Europe)',
       'Romania', 'Moldova', 'Monaco', 'Netherlands (Europe)',
       'Luxembourg', 'Belgium', 'Chile', 'Ireland',
       'Slovenia', 'Hungary', 'Serbia', 'New Zealand',
       'Montenegro', 'France (Europe)', 'Bosnia And Herzegovina',
       'Macedonia', 'Bulgaria', 'Andorra',
       'Azerbaijan', 'Croatia', 'Turkey', 'Japan', 'South Korea',
       'Bhutan', 'Uzbekistan', 'Albania', 'Italy', 'France', 'Lesotho',
       'Spain', 'San Marino', 'Afghanistan', 'Argentina', 'Portugal',
       'Turkmenistan', 'Greece', 'Nepal', 'South Africa', 'Uruguay',
       'Morocco', 'Iran', 'Lebanon', 'Syria', 'Malta', 'Swaziland',
       'Cyprus', 'Rwanda', 'Jordan', 'Israel', 'Tunisia',
       'Peru', 'Burundi', 'Namibia', 'Mexico', 'Pakistan', 'Bolivia',
       'Zimbabwe', 'Zambia', 'Malawi', 'Australia', 'Botswana',
       'Ecuador', 'Angola', 'Iraq', 'Taiwan', 'Libya',
       'Tanzania', 'Egypt',
       'Madagascar', 'Ethiopia', 'Algeria', 'Guatemala',
       'Tonga', 'Uganda', 'Paraguay',
       'Mozambique', 'Congo (Democratic Republic Of The)', 'Laos',
       'Vietnam', 'Burma', 'Kenya', 'Gabon',
       'India', 'Cameroon', 'Papua New Guinea', 'Congo', 'Honduras',
       'Colombia', 'Equatorial Guinea', 'El Salvador', 'Fiji', 'Brazil',
       'Belize', 'Venezuela', 'Bangladesh', 'Kuwait', 'Bahamas',
       'Central African Republic', 'Liberia', 'Guinea', 'Cuba',
        'Costa Rica', 'Puerto Rico', 'Saudi Arabia',
       'Dominican Republic', 'Indonesia',
        'Comoros', "Côte D'Ivoire",
       'Nicaragua', 'Malaysia',
       'Sierra Leone', 'Bahrain',
       'Trinidad And Tobago', 'Dominica', 'Jamaica',
       'Thailand', 'Yemen', 'Samoa', 'Panama', 'Philippines',
       'Haiti',
       'Virgin Islands', 'Nigeria', 
       'Barbados', 'Ghana', 'Solomon Islands', 'Antigua And Barbuda',
       'Singapore', 'Seychelles', 'Kiribati', 'Togo', 
       'Guinea Bissau', 
       'Federated States Of Micronesia', 'Eritrea',
        'Grenada', 'Somalia', 'Cambodia',
       'Benin', 'Qatar', 'Oman',
       'Palau', 'Chad', 'Sudan', 'Niger',
       'Sri Lanka',
       'Gambia', 'Mauritania', 'United Arab Emirates', 'Burkina Faso',
       'Senegal', 'Mali', 'Djibouti')

In [41]:
legend = pd.DataFrame({'diff':np.linspace(0.6,2.2, 9).round(3)})

xaxis = alt.Axis(
                 title='temperature growth between 1900s and 2000s, °C',
                 titleFontWeight='lighter',
                 titleFont=font,
                titleFontSize=textsize,
                 titleAnchor='start',
                 titleY=-25,
                 titleColor=textcolor,
                 grid=False, 
                 domain=False, 
                 ticks=False,
                 labelAngle = 0,
                 labelColor = textcolor,
                 labelFontSize = textsize,
                 labelFont = font,
                 labelAlign='center'
                 )

legend = alt.Chart(legend).mark_rect(opacity=opacity).encode(
    x = alt.X('diff:N', axis=xaxis),
    color = alt.Color('diff:Q', legend=None,
        scale=alt.Scale(domain=domain, range=color_scale)
    )
).properties(
    height=8, width=1540, title=''
)

In [48]:
open_close_color = alt.condition("datum.1900s < datum.2000s",
                                 alt.value("#06982d"),
                                 alt.value("#ae1325"))

xaxis = alt.Axis(
        title='The lower bound of bar = mean temperature between 1900th and 1910th. The upper bound of bar = mean temperature between 2000th and 2010th.',
                 titleFontWeight='lighter',
                titleFontSize=textsize+2,
                 titleFont=font,
                 titleAnchor='start',
                 titleY=200,
                 titleColor=textcolor
                 )

yaxis = alt.Axis(
        title='mean temperature, °C',
#                  titleFontWeight='lighter',
                titleFontSize=textsize+2,
                 titleFont=font,
                 titleAnchor='middle',
#                  titleY=200,
                 titleColor=textcolor,
                 )

bar = alt.Chart(tempdiff[tempdiff.country.isin(countries_to_show)]).mark_bar().encode(
    x=alt.X(
        'country:N',
        sort=alt.Sort(field='1900s', order='descending'),\
        axis=xaxis
    ),
    y=alt.Y('1900s', axis=yaxis),
    y2='2000s',
    color = alt.Color(
        'diff:Q', 
        legend=None,
        scale=alt.Scale(domain=domain, range=["#00D5E9", "#FF5872"])
    )
).properties(
    height=480, width=1540, title=''
)

In [49]:
chart = alt.vconcat(legend, bar).configure_title(
    color=titlecolor,
    font=font,
    fontSize=titlesize,
    anchor='start'
).properties(
    padding={'left': 20, 'top': 20, 'right': 25, 'bottom': 20},
    title=alt.TitleParams('Temperature growth by country',anchor='middle', dx=-50),
    background=bg_color
).configure_view(
    stroke=None
).configure_axis(
     title=None,
     grid=True, 
     domain=False, 
     ticks=False,
     labelColor = textcolor,
     labelFontSize = textsize-2,
     labelFont = font
).configure_axisY(
    labelFontSize = textsize
)

chart