In [1]:
import altair as alt
from altair import datum
import pandas as pd
import os 
from vega_datasets import data as vegadata

# Convert datasets into one

In [2]:
def modifyDataset(temps, GlobalTemps, hdi, countries):
    yearTemps = [[0]*12, [0]*12]
    totalTemps = [[], []]
    GlobalTemps['year'] = pd.to_datetime(GlobalTemps.year)
    for i in range(len(GlobalTemps)):
        monthData = GlobalTemps.iloc[i]
        date = GlobalTemps['year'].iloc[i]
        yearTemps[0][monthData['year'].month-1] = monthData['LandMaxTemperature']
        yearTemps[1][monthData['year'].month-1] = monthData['LandMinTemperature']
        if monthData['year'].month == 12:
            totalTemps[0].append(max(yearTemps[0]))
            totalTemps[1].append(min(yearTemps[1]))
    GlobalTemps = GlobalTemps.drop(GlobalTemps[GlobalTemps['year'].dt.month != 12].index)
    GlobalTemps['year'] = GlobalTemps['year'].apply(lambda x: int(x.strftime('%Y-%m-%d')[0:4]))
    GlobalTemps['LandMaxTemperature'] = totalTemps[0]
    GlobalTemps['LandMinTemperature'] = totalTemps[1]
    
    temps = temps.drop(temps[temps['dt'].str[6] != '6'].index)
    temps = temps.rename(columns={'dt': 'year', 'Country': 'country'})
    temps['year'] = temps['year'].apply(lambda x: int(x[0:4]))
    
    hdi = hdi.rename(columns={'Year': 'year', 'Entity': 'country', 'Historical Index of Human Development (Prados de la Escosura)': 'HDI'})
    countries = countries.rename(columns={'name': 'country', 'country-code': 'id'})
    return temps, GlobalTemps, hdi, countries

In [3]:
# Pull the data
co2 = pd.read_csv(os.getcwd() + '/Data/CO2.csv', sep=',')
countries = pd.read_csv(os.getcwd() + '/Data/Continents.csv', sep=',')
temps = pd.read_csv(os.getcwd() + '/Data/GlobalLandTemperaturesByCountry.csv', sep=',')
hdi = pd.read_csv(os.getcwd() + '/Data/HDI.csv', sep=',')
GlobalTemps = pd.read_csv(os.getcwd() + '/Data/GlobalTemperatures.csv', sep=',')
relevant_columns = ['country', 'year', 'co2', 'AverageTemperature', 'population', 'share_global_co2', 'id',
                   'AverageTemperatureUncertainty', 'gdp', 'HDI', 'LandMaxTemperature', 'LandMinTemperature']

# Modify the datasets to match, and eliminates non-June months
temps, GlobalTemps, hdi, countries = modifyDataset(temps, GlobalTemps, hdi, countries)

# Unionise the DataFrames
data = pd.merge(co2, temps, on=['year', 'country'])
data = pd.merge(data, hdi, on=['year', 'country'])
data = pd.merge(data, GlobalTemps, on=['year'])
data = pd.merge(data, countries, on=['country'])
data = data[relevant_columns]
data.to_csv("Final_data.csv")

In [4]:
def temp_change(dataset, start_year, end_year):
    result = []
    for country in set(dataset.country):
        rows = dataset.loc[(dataset['country'] == country) & (dataset['year'] >= start_year) & (dataset['year'] <= end_year)]
        result.append([country, float(rows.AverageTemperature.max() - rows.AverageTemperature.min())])
        
    result = pd.DataFrame(result, columns=['country', 'TempChange'])
    dataset = dataset.drop(columns=['TempChange'], errors='ignore')
    dataset = pd.merge(data, result, on=['country'])
    return dataset

In [5]:
source = alt.topo_feature(vegadata.world_110m.url, 'countries')
max_year = max(data['year'])
data = temp_change(data, min(data['year']), max_year)
relevant_columns.append('TempChange')
single_year_data = data.drop(data[data['year'] < max_year].index)

# Create the map

In [18]:
selector = alt.selection_multi()
dropdown = alt.binding_select(options=sorted(list(dict.fromkeys(data['country']))))
selector_dropdown = alt.selection_single(fields=['country'], bind=dropdown, name='Selected')

In [7]:
chart = alt.Chart(source).mark_geoshape(stroke='blue').properties(
    width=700,
    height=400
).encode(
    color=alt.condition(selector, alt.value('Blue'), alt.Color("TempChange:Q", scale=alt.Scale(scheme="orangered"))),
    tooltip=[alt.Tooltip('country:N', title='Country'), alt.Tooltip('co2:Q', title='CO2'), 
             alt.Tooltip('AverageTemperature:Q', title='Ave Temperature'), alt.Tooltip('population:Q', title='Population')]
).transform_lookup(
    lookup="id",
    from_=alt.LookupData(single_year_data, "id", relevant_columns),
).add_selection(selector)

In [8]:
parallel_coords = alt.Chart(source).transform_window(
    index='count()'
).transform_lookup(
    lookup="id",
    from_=alt.LookupData(single_year_data, "id", relevant_columns),
).transform_fold(
    ['AverageTemperature', 'share_global_co2', 'population','gdp', 'HDI']
).transform_joinaggregate(
     min='min(value)',
     max='max(value)',
     groupby=['key']
).transform_calculate(
    normalised=((datum.value-datum.min)/(datum.max-datum.min))*100,
    mid=((datum.min+datum.max)/2)*100
).mark_line().encode(
    alt.X('key:N', title=''),
    alt.Y('normalised:Q', title='Normalised Value'),
    detail='index:N',
    color=alt.condition(selector, alt.value('Red'), alt.value('Blue')),
    opacity=alt.condition(selector, alt.value(1.25), alt.value(0.5))
).properties(width=500, height=350)

In [9]:
chart & parallel_coords

In [42]:
scale = alt.Scale(domain=['LandMinTemperature', 'AverageTemperature', 'LandMaxTemperature'], 
                  range=['blue', 'green', 'red'])

temp_chart = alt.Chart(data).mark_line().transform_fold(
    fold=['LandMinTemperature', 'LandMaxTemperature', 'AverageTemperature'], 
    as_=['variable', 'Temperature']
).encode(
    x='year',
    y='Temperature:Q',
    color=alt.Color('variable:N', legend=alt.Legend(orient='bottom-left'), scale=scale)
).properties(width=300, height=300
).add_selection(selector_dropdown).transform_filter(selector_dropdown)

In [43]:
def make_chart(data, dataline, selector):
    dataline = dataline + ":Q"
    return alt.Chart(data).mark_line(
        ).encode(
            x='year:Q', y=(dataline), detail='country:N'
        ).properties(width=300, height=100).transform_filter(selector)

In [44]:
a = make_chart(data, "HDI", selector_dropdown)
b = make_chart(data, "gdp", selector_dropdown)
c = make_chart(data, "population", selector_dropdown)

temp_chart | (a & b & c)