In [18]:

#%matplotlib tk
%matplotlib notebook
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os 
import matplotlib.animation as animation
import seaborn as sns
import numpy as np
#%matplotlib inline



In [2]:
data_root = 'data/'
print(os.listdir(data_root))

['ghg-emissions.csv', 'history_data_20001231-20210227.csv', 'location_lookup.csv']


In [3]:
#temp data import 
temp_data_path = 'data/'+ 'history_data_20001231-20210227.csv'
temp_df= pd.read_csv(temp_data_path)
temp_df.head()


Unnamed: 0,location_id,date,maxTemp,minTemp,prcp
0,ST801,20010106,32.7,15.2,0.42
1,ST801,20010113,26.7,8.7,0.08
2,ST801,20010120,23.8,6.6,0.03
3,ST801,20010127,31.4,14.4,0.05
4,ST801,20010203,35.4,15.3,0.03


In [4]:

#location data 
location_path = data_root + 'location_lookup.csv'
loc_df = pd.read_csv(location_path)
loc_df.head()



Unnamed: 0,location_id,city,state_abbreviation,country_name,latitude,longitude
0,ST422,Edmonton,AB,CANADA,53.317,-113.583
1,ST500,Toronto,ON,CANADA,43.667,-79.633
2,ST515,Quebec,QC,CANADA,46.783,-71.367
3,ST593,Vancouver,BC,CANADA,49.2,-123.167
4,ST801,Beijing,,CHINA,39.933,116.283


In [5]:

#merge the datasets together 
tempLocDf  = pd.merge(loc_df,temp_df,on="location_id")
print(tempLocDf.dtypes)
tempLocDf['date'] = pd.to_datetime(tempLocDf['date'],format='%Y%m%d')
tempLocDf.head()

location_id            object
city                   object
state_abbreviation     object
country_name           object
latitude              float64
longitude             float64
date                    int64
maxTemp               float64
minTemp               float64
prcp                  float64
dtype: object


Unnamed: 0,location_id,city,state_abbreviation,country_name,latitude,longitude,date,maxTemp,minTemp,prcp
0,ST422,Edmonton,AB,CANADA,53.317,-113.583,2001-01-06,38.2,19.4,0.02
1,ST422,Edmonton,AB,CANADA,53.317,-113.583,2001-01-13,35.7,11.3,0.0
2,ST422,Edmonton,AB,CANADA,53.317,-113.583,2001-01-20,32.7,10.4,0.03
3,ST422,Edmonton,AB,CANADA,53.317,-113.583,2001-01-27,35.9,10.1,0.0
4,ST422,Edmonton,AB,CANADA,53.317,-113.583,2001-02-03,37.7,17.6,0.02


In [32]:

#gas data 
gasDataPath = data_root + 'ghg-emissions.csv'
gasData = pd.read_csv(gasDataPath)

# cleaning gas data
#print(gasData.dtypes)

# drop rows that are just the informational stuff, if we want to use a different source (not CAIT) this will need to be adjusted
gasData = gasData[gasData["unit"] == "MtCO2e"] 

gasData.head()

Unnamed: 0,Country/Region,unit,1990,1991,1992,1993,1994,1995,1996,1997,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,China,MtCO2e,2901.74,3051.31,3182.64,3405.76,3556.72,3946.17,3980.46,3989.5,...,8398.83,8992.51,9828.01,10784.95,11110.34,11568.63,11583.36,11578.25,11577.98,11780.99
1,United States,MtCO2e,5669.57,5627.93,5709.22,5819.3,5906.34,5970.68,6069.33,6302.62,...,6267.37,5865.85,6111.83,6235.82,5936.1,6056.33,6097.71,5973.16,5833.01,5766.92
2,India,MtCO2e,1184.08,1239.12,1271.34,1303.07,1349.41,1415.03,1460.58,1519.01,...,2185.08,2339.28,2449.3,2798.04,2947.94,3003.54,3175.79,3188.03,3230.06,3356.7
3,Russia,MtCO2e,3559.43,3463.05,3223.12,2965.8,2682.65,2602.13,2508.31,2317.88,...,2616.58,2281.05,2401.69,2478.24,2598.33,2468.51,2479.35,2356.08,2434.46,2460.27
4,Indonesia,MtCO2e,1509.17,1526.51,1547.09,1563.34,1579.97,1610.76,1399.99,2386.18,...,2043.38,2478.22,2079.39,2335.3,2357.19,2294.34,2729.18,2776.53,2228.89,2275.4


In [184]:
# join the gas data and temperature data, and create other relevant data frames
# temperature has many less countries, this combined set loses that data, important to keep in mind

gasData["Country/Region"] = gasData["Country/Region"].str.upper()

gas_and_temp_base = tempLocDf #.merge(gasData, left_on="country_name", right_on="Country/Region")

# convert into annual average instead of daily value (This seems like the most logical way to connect with emmissions)
gas_and_temp_base["year"] = gas_and_temp_base["date"].apply(lambda x: x.year)


gas_and_temp_national = gas_and_temp_base.drop(columns=["location_id", "city", "state_abbreviation", "latitude", "longitude", 'date'])
gas_and_temp_national = gas_and_temp_national.groupby(["country_name", "year"]).aggregate({
    'maxTemp': 'mean', 'minTemp': 'mean', "prcp": 'mean'
})

# naive solution
gas_and_temp_national["MtCo2e"] = np.zeros(len(gas_and_temp_national))
index = gas_and_temp_national.index
for i in range(len(gas_and_temp_national)):
    (country, year) = index[i]
    series          = gasData[gasData["Country/Region"] == country]
    value           = (series.get(str(year)))
    if value is not None:
        value = value.values[0] # value is returned as an array or none
    gas_and_temp_national["MtCo2e"].iloc[i] = value

gas_and_temp_national["MtCo2e"] = gas_and_temp_national["MtCo2e"].astype("float64")
    
# remove years with no gas data
gas_and_temp_national = gas_and_temp_national.dropna()

gas_and_temp_national

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Unnamed: 0_level_0,Unnamed: 1_level_0,maxTemp,minTemp,prcp,MtCo2e
country_name,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CANADA,2001,54.077885,35.959615,0.592644,890.59
CANADA,2002,52.735577,34.926923,0.508942,981.61
CANADA,2003,52.129808,34.145192,0.688798,1001.53
CANADA,2004,52.695192,35.077404,0.676779,961.03
CANADA,2005,53.001887,35.452830,0.704623,970.63
...,...,...,...,...,...
UNITED STATES,2013,70.038615,50.973154,0.665123,6056.33
UNITED STATES,2014,70.442769,50.996538,0.688792,6097.71
UNITED STATES,2015,71.720385,52.665231,0.735977,5973.16
UNITED STATES,2016,71.965962,52.834868,0.668445,5833.01


In [7]:
#For intial Analysis we will only do Canada we can add more countries later. 
CanadaTemp = tempLocDf[tempLocDf["country_name"]=="CANADA"]
CanadaTemp.head()

Unnamed: 0,location_id,city,state_abbreviation,country_name,latitude,longitude,date,maxTemp,minTemp,prcp
0,ST422,Edmonton,AB,CANADA,53.317,-113.583,2001-01-06,38.2,19.4,0.02
1,ST422,Edmonton,AB,CANADA,53.317,-113.583,2001-01-13,35.7,11.3,0.0
2,ST422,Edmonton,AB,CANADA,53.317,-113.583,2001-01-20,32.7,10.4,0.03
3,ST422,Edmonton,AB,CANADA,53.317,-113.583,2001-01-27,35.9,10.1,0.0
4,ST422,Edmonton,AB,CANADA,53.317,-113.583,2001-02-03,37.7,17.6,0.02


In [11]:
dates  = CanadaTemp.date.unique()
fig,ax = plt.subplots(1,2)

def animate(i):
    date = dates[i]
    currDateTemp = CanadaTemp[CanadaTemp["date"]==date]
    sns.barplot(ax=ax[0],y=currDateTemp["maxTemp"], x=currDateTemp["city"],data=currDateTemp,color="red")
    #title = ("Temperature Variation on: ", date)
    #print(title)
    #fig.suptitle(title)
    ax[0].set_title("Maximum Temperature")
    sns.barplot(ax=ax[1],y=currDateTemp["minTemp"], x=currDateTemp["city"],data=currDateTemp,color="orange")
    ax[1].set_title("Minimum Temperature")
    
ani = animation.FuncAnimation(fig,animate,frames=len(dates), repeat=True)
plt.show()

<IPython.core.display.Javascript object>

In [186]:
canada_temp_and_gas = gas_and_temp_national.loc["CANADA"]

years = canada_temp_and_gas.index

fig, ax1 = plt.subplots()

ax1.set_xlabel("Year")
ax1.set_ylabel("MtCo2e")

ax1.plot(years, canada_temp_and_gas["MtCo2e"], color="green", label="CO2 Emmissions", linestyle="-")
ax1.tick_params(axis='y', labelcolor="green")

ax2 = ax1.twinx()
ax2.set_ylabel("Temperature") # I do not know what measure it's in

ax2.plot(years, canada_temp_and_gas["maxTemp"], color="red", label="Max Temp", linestyle="--")
ax2.plot(years, canada_temp_and_gas["minTemp"], color="blue", label="Min Temp", linestyle=":")

plt.title("Changing Temperature and Co2 In Canada")
plt.legend()
fig.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

In [185]:
# basic correlation matrix
print(gas_and_temp_national.dtypes)
corr = gas_and_temp_national.loc[:,["minTemp", "maxTemp", "prcp", "MtCo2e"]].corr()
fig, ax1 = plt.subplots()
sns.heatmap(corr, annot=True)
plt.show()

maxTemp    float64
minTemp    float64
prcp       float64
MtCo2e     float64
dtype: object


<IPython.core.display.Javascript object>