Create Master Dataset of countries with temperature data.

In [1]:
# Dependencies.
import pandas as pd

In [2]:
# Import countries.
countries_df = pd.read_csv('countries_happiness_init_temps.csv')
countries_df.head()

Unnamed: 0,Country name,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2021,Initial Temperature
0,Afghanistan,4.758,3.832,3.783,3.572,3.131,3.983,4.22,2.662,2.694,2.375,2.523,14.02
1,Albania,5.269,5.867,5.51,4.551,4.814,4.607,4.511,4.64,5.004,4.995,5.117,12.93
2,Argentina,6.441,6.776,6.468,6.582,6.671,6.697,6.427,6.039,5.793,6.086,5.929,14.75
3,Armenia,4.368,4.26,4.32,4.277,4.453,4.348,4.325,4.288,5.062,5.488,5.283,7.78
4,Australia,7.45,7.406,7.196,7.364,7.289,7.309,7.25,7.257,7.177,7.234,7.183,21.72


In [3]:
# Rename columns to not mix up new temperature columns.
countries_df.rename(columns={
    '2010': 'LL_2010',
    '2011': 'LL_2011',
    '2012': 'LL_2012',
    '2013': 'LL_2013',
    '2014': 'LL_2014',
    '2015': 'LL_2015',
    '2016': 'LL_2016',
    '2017': 'LL_2017',
    '2018': 'LL_2018',
    '2019': 'LL_2019',
    '2021': 'LL_2021',
}, inplace=True)
countries_df.head()

Unnamed: 0,Country name,LL_2010,LL_2011,LL_2012,LL_2013,LL_2014,LL_2015,LL_2016,LL_2017,LL_2018,LL_2019,LL_2021,Initial Temperature
0,Afghanistan,4.758,3.832,3.783,3.572,3.131,3.983,4.22,2.662,2.694,2.375,2.523,14.02
1,Albania,5.269,5.867,5.51,4.551,4.814,4.607,4.511,4.64,5.004,4.995,5.117,12.93
2,Argentina,6.441,6.776,6.468,6.582,6.671,6.697,6.427,6.039,5.793,6.086,5.929,14.75
3,Armenia,4.368,4.26,4.32,4.277,4.453,4.348,4.325,4.288,5.062,5.488,5.283,7.78
4,Australia,7.45,7.406,7.196,7.364,7.289,7.309,7.25,7.257,7.177,7.234,7.183,21.72


In [4]:
# Get lists of countries and initial temps.
countries = countries_df['Country name'].tolist()
init_temps = countries_df['Initial Temperature'].tolist()
print(countries[:5])
print(init_temps[:5])

['Afghanistan', 'Albania', 'Argentina', 'Armenia', 'Australia']
[14.02, 12.93, 14.75, 7.78, 21.72]


In [5]:
# Choose list of years to take June temps from.
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]

In [6]:
# Store every June temperature in the years for every country.
all_temps = []

# Loop through every country.
for i in range(len(countries)):
    # Read in the temperature data as a DF.
    temp_df = pd.read_csv(f'be_csv/{countries[i]}_{init_temps[i]}_temps.csv')
    
    # Find only June temps in chosen years, and add the initial value to them.
    june_temps = (temp_df.loc[(temp_df['Year'].isin(years)) & (temp_df['Month'] == 6)]['Monthly Anomaly'] + init_temps[i]).round(3).to_list()
    
    # Add June values to big list to hold all temperatures for all countries.
    all_temps.append(june_temps)

In [7]:
# Set column names for temperatures.
temp_cols = ['2010_TEMP',
             '2011_TEMP',
             '2012_TEMP',
             '2013_TEMP',
             '2014_TEMP',
             '2015_TEMP',
             '2016_TEMP',
             '2017_TEMP',
             '2018_TEMP',
             '2019_TEMP']

In [8]:
# Create DataFrame of all June temps for all countries.
june_temps_df = pd.DataFrame(all_temps, index=countries, columns=temp_cols)
june_temps_df.head()

Unnamed: 0,2010_TEMP,2011_TEMP,2012_TEMP,2013_TEMP,2014_TEMP,2015_TEMP,2016_TEMP,2017_TEMP,2018_TEMP,2019_TEMP
Afghanistan,14.629,16.487,14.373,16.156,15.647,15.427,15.79,16.166,15.942,13.956
Albania,13.779,14.201,15.986,13.827,13.731,13.802,14.754,15.55,14.259,15.781
Argentina,15.394,14.905,15.211,16.265,15.631,16.866,14.273,15.934,14.258,16.619
Armenia,11.088,9.183,10.238,9.049,9.722,11.219,9.15,9.795,9.855,11.905
Australia,22.088,21.167,21.212,22.482,22.13,23.099,22.988,22.175,22.015,21.936


In [9]:
# Set index of countries DataFrame to the country names.
countries_df.set_index('Country name', inplace=True)
countries_df.head()

Unnamed: 0_level_0,LL_2010,LL_2011,LL_2012,LL_2013,LL_2014,LL_2015,LL_2016,LL_2017,LL_2018,LL_2019,LL_2021,Initial Temperature
Country name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Afghanistan,4.758,3.832,3.783,3.572,3.131,3.983,4.22,2.662,2.694,2.375,2.523,14.02
Albania,5.269,5.867,5.51,4.551,4.814,4.607,4.511,4.64,5.004,4.995,5.117,12.93
Argentina,6.441,6.776,6.468,6.582,6.671,6.697,6.427,6.039,5.793,6.086,5.929,14.75
Armenia,4.368,4.26,4.32,4.277,4.453,4.348,4.325,4.288,5.062,5.488,5.283,7.78
Australia,7.45,7.406,7.196,7.364,7.289,7.309,7.25,7.257,7.177,7.234,7.183,21.72


In [10]:
# Merge the two datasets.
full_df = countries_df.join(june_temps_df)
full_df.head()

Unnamed: 0_level_0,LL_2010,LL_2011,LL_2012,LL_2013,LL_2014,LL_2015,LL_2016,LL_2017,LL_2018,LL_2019,...,2010_TEMP,2011_TEMP,2012_TEMP,2013_TEMP,2014_TEMP,2015_TEMP,2016_TEMP,2017_TEMP,2018_TEMP,2019_TEMP
Country name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,4.758,3.832,3.783,3.572,3.131,3.983,4.22,2.662,2.694,2.375,...,14.629,16.487,14.373,16.156,15.647,15.427,15.79,16.166,15.942,13.956
Albania,5.269,5.867,5.51,4.551,4.814,4.607,4.511,4.64,5.004,4.995,...,13.779,14.201,15.986,13.827,13.731,13.802,14.754,15.55,14.259,15.781
Argentina,6.441,6.776,6.468,6.582,6.671,6.697,6.427,6.039,5.793,6.086,...,15.394,14.905,15.211,16.265,15.631,16.866,14.273,15.934,14.258,16.619
Armenia,4.368,4.26,4.32,4.277,4.453,4.348,4.325,4.288,5.062,5.488,...,11.088,9.183,10.238,9.049,9.722,11.219,9.15,9.795,9.855,11.905
Australia,7.45,7.406,7.196,7.364,7.289,7.309,7.25,7.257,7.177,7.234,...,22.088,21.167,21.212,22.482,22.13,23.099,22.988,22.175,22.015,21.936


In [11]:
# Save the full country and temps DataFrame to a CSV.
full_df.to_csv('MAIN_countries_happiness_temps_full.csv')