### Citation: University of Dayton - Environmental Protection Agency Average Daily Temperature Archive,
http://academic.udayton.edu/kissock/http/Weather/default.htm

### Terms of Use: Source data for this site are from the National Climatic Data Center. The data is available for research and non-commercial purposes only. 

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('../city_temperature.csv', low_memory=False)

In [3]:
df.drop(['State', 'Region'], axis=1, inplace=True)

In [4]:
df.sort_values(by='Country', ascending=True)
df.head()

Unnamed: 0,Country,City,Month,Day,Year,AvgTemperature
0,Algeria,Algiers,1,1,1995,64.2
1,Algeria,Algiers,1,2,1995,49.4
2,Algeria,Algiers,1,3,1995,48.8
3,Algeria,Algiers,1,4,1995,46.4
4,Algeria,Algiers,1,5,1995,47.9


In [5]:
df[df['Country'] == 'Albania']

Unnamed: 0,Country,City,Month,Day,Year,AvgTemperature
623377,Albania,Tirana,1,1,1995,-99.0
623378,Albania,Tirana,1,2,1995,-99.0
623379,Albania,Tirana,1,3,1995,-99.0
623380,Albania,Tirana,1,4,1995,-99.0
623381,Albania,Tirana,1,5,1995,-99.0
...,...,...,...,...,...,...
632638,Albania,Tirana,5,9,2020,62.4
632639,Albania,Tirana,5,10,2020,60.5
632640,Albania,Tirana,5,11,2020,65.2
632641,Albania,Tirana,5,12,2020,68.5


In [6]:
table = pd.pivot_table(df, values='AvgTemperature', index=['Country', 'Year'], aggfunc=np.mean)

In [7]:
table.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,AvgTemperature
Country,Year,Unnamed: 2_level_1
Albania,1995,-99.0
Albania,1996,-99.0
Albania,1997,-88.228767
Albania,1998,-46.19589
Albania,1999,26.696164


In [8]:
table.reset_index(inplace=True)

In [9]:
new_df = table

In [10]:
new_df.head()

Unnamed: 0,Country,Year,AvgTemperature
0,Albania,1995,-99.0
1,Albania,1996,-99.0
2,Albania,1997,-88.228767
3,Albania,1998,-46.19589
4,Albania,1999,26.696164


In [11]:
new_df = new_df[new_df['Year'] >= 2008]

In [12]:
new_df.head()

Unnamed: 0,Country,Year,AvgTemperature
13,Albania,2008,59.79071
14,Albania,2009,59.401096
15,Albania,2010,60.568767
16,Albania,2011,59.91863
17,Albania,2012,59.884426


In [13]:
len(new_df['Country'].unique())

121

In [14]:
len(df['Country'].unique())

125

In [15]:
# Check missing countries
for country in df['Country'].unique():
    if country not in new_df['Country'].unique():
        print(country)
        
# These countries do not have data beyond 2007

Burundi
Georgia
Serbia-Montenegro
Israel


In [16]:
new_df.reset_index(drop=True, inplace=True)
new_df.head(10)

Unnamed: 0,Country,Year,AvgTemperature
0,Albania,2008,59.79071
1,Albania,2009,59.401096
2,Albania,2010,60.568767
3,Albania,2011,59.91863
4,Albania,2012,59.884426
5,Albania,2013,60.413151
6,Albania,2014,60.215616
7,Albania,2015,60.090164
8,Albania,2016,58.986885
9,Albania,2017,60.384658


In [17]:
base_df = pd.read_csv('resources/environment_happiness.csv', index_col=0)
base_df.head(10)

Unnamed: 0,Country,Code,Year,Annual_CO2_emission_tonnes,Life_ladder,Basic_drinking_water,Air_pollutant_pm2.5
0,Afghanistan,AFG,2008,4198329.64,3.724,43.31,
1,Afghanistan,AFG,2009,6760313.49,4.402,45.78,
2,Afghanistan,AFG,2010,8452224.14,4.758,48.29,49.8
3,Afghanistan,AFG,2011,12226017.38,3.832,50.83,50.3
4,Afghanistan,AFG,2012,10742056.72,3.783,53.4,54.67
5,Afghanistan,AFG,2013,10000414.29,3.572,56.01,54.89
6,Afghanistan,AFG,2014,9789185.57,3.131,58.66,59.05
7,Afghanistan,AFG,2015,10107428.68,3.983,61.34,54.89
8,Afghanistan,AFG,2016,12257907.69,4.22,64.05,55.14
9,Afghanistan,AFG,2017,13014739.66,2.662,66.81,


In [18]:
all_df = base_df.merge(new_df, how='left', on=['Country', 'Year'])
all_df.head(10)

Unnamed: 0,Country,Code,Year,Annual_CO2_emission_tonnes,Life_ladder,Basic_drinking_water,Air_pollutant_pm2.5,AvgTemperature
0,Afghanistan,AFG,2008,4198329.64,3.724,43.31,,
1,Afghanistan,AFG,2009,6760313.49,4.402,45.78,,
2,Afghanistan,AFG,2010,8452224.14,4.758,48.29,49.8,
3,Afghanistan,AFG,2011,12226017.38,3.832,50.83,50.3,
4,Afghanistan,AFG,2012,10742056.72,3.783,53.4,54.67,
5,Afghanistan,AFG,2013,10000414.29,3.572,56.01,54.89,
6,Afghanistan,AFG,2014,9789185.57,3.131,58.66,59.05,
7,Afghanistan,AFG,2015,10107428.68,3.983,61.34,54.89,
8,Afghanistan,AFG,2016,12257907.69,4.22,64.05,55.14,
9,Afghanistan,AFG,2017,13014739.66,2.662,66.81,,


In [19]:
all_df['AvgTempCelcius'] = (all_df['AvgTemperature']-32)*(5/9)

In [26]:
missing_temp = all_df[all_df['AvgTempCelcius'].isnull() == True]
print(missing_temp['Country'].unique())

['Afghanistan' 'Angola' 'Armenia' 'Azerbaijan' 'Bangladesh' 'Bhutan'
 'Bosnia and Herzegovina' 'Botswana' 'Burkina Faso' 'Burundi' 'Cambodia'
 'Cameroon' 'Chad' 'Chile' 'Comoros' 'Cyprus' 'Djibouti' 'Ecuador'
 'El Salvador' 'Estonia' 'Georgia' 'Ghana' 'Greece' 'Iran' 'Iraq' 'Israel'
 'Jamaica' 'Kenya' 'Lesotho' 'Liberia' 'Libya' 'Lithuania' 'Luxembourg'
 'Malawi' 'Mali' 'Malta' 'Mauritius' 'Moldova' 'Montenegro' 'Myanmar'
 'Netherlands' 'Niger' 'Paraguay' 'Peru' 'Rwanda' 'Serbia' 'Sierra Leone'
 'Slovenia' 'Sudan' 'Swaziland' 'Thailand' 'Trinidad and Tobago'
 'United States' 'Yemen' 'Zambia' 'Zimbabwe']


In [28]:
all_df[all_df['Country'] == 'Tanzania']

Unnamed: 0,Country,Code,Year,Annual_CO2_emission_tonnes,Life_ladder,Basic_drinking_water,Air_pollutant_pm2.5,AvgTemperature,AvgTempCelcius
1020,Tanzania,TZA,2008,5937408.29,4.385,,,70.028142,21.126746
1021,Tanzania,TZA,2009,5693926.63,3.408,,,76.101644,24.500913
1022,Tanzania,TZA,2010,6840606.6,3.229,,,43.10137,6.167428
1023,Tanzania,TZA,2011,7807018.47,4.074,,,76.930959,24.961644
1024,Tanzania,TZA,2012,9238081.98,4.007,,,78.35,25.75
1025,Tanzania,TZA,2013,10912008.98,3.852,,,78.637808,25.909893
1026,Tanzania,TZA,2014,11241286.62,3.483,,,76.685792,24.82544
1027,Tanzania,TZA,2015,12453503.48,3.661,,,77.398087,25.22116
1028,Tanzania,TZA,2016,13604691.94,2.903,,,77.290437,25.161354
1029,Tanzania,TZA,2017,14230527.46,3.347,,,79.815342,26.564079


In [21]:
all_df[all_df['AvgTempCelcius'].isnull() == False].head(10)

Unnamed: 0,Country,Code,Year,Annual_CO2_emission_tonnes,Life_ladder,Basic_drinking_water,Air_pollutant_pm2.5,AvgTemperature,AvgTempCelcius
10,Albania,ALB,2009,4374816.0,5.485,90.93,,59.401096,15.222831
11,Albania,ALB,2010,4594656.0,5.269,91.38,20.09,60.568767,15.871537
12,Albania,ALB,2011,5235856.0,5.867,91.81,22.77,59.91863,15.51035
13,Albania,ALB,2012,4906096.0,5.51,92.23,20.82,59.884426,15.491348
14,Albania,ALB,2013,5059984.0,4.551,92.63,19.91,60.413151,15.785084
15,Albania,ALB,2014,5712176.0,4.814,93.02,19.8,60.215616,15.675342
16,Albania,ALB,2015,5947724.0,4.607,93.39,19.06,60.090164,15.605647
17,Albania,ALB,2016,6272858.0,4.511,93.75,18.07,58.986885,14.992714
18,Albania,ALB,2017,6379207.0,4.64,94.1,,60.384658,15.769254
19,Algeria,DZA,2011,119806900.0,5.317,92.51,29.67,64.960822,18.311568


In [30]:
all_df.to_csv('resources/env_hap_temp.csv', index=False)