In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

In [17]:
emissions = pd.read_csv("../Data/CO2_Emissions.csv")

print("Data Types:")
print(emissions.dtypes)
emissions

Data Types:
Country          object
Country_code     object
Year              int64
%_change        float64
dtype: object


Unnamed: 0,Country,Country_code,Year,%_change
0,United States,USA,1801,1.465602e+04
1,United States,USA,1802,2.198400e+04
2,United States,USA,1803,7.328000e+03
3,United States,USA,1804,3.664000e+04
4,United States,USA,1805,7.328000e+03
...,...,...,...,...
216,United States,USA,2017,-4.080026e+07
217,United States,USA,2018,1.657001e+08
218,United States,USA,2019,-1.175132e+08
219,United States,USA,2020,-5.434532e+08


Note: the %_change column here is the percent change in CO2 emissions for that year

In [18]:
# Date formatting for CO2 emission dataset

emissions['Year'] = emissions['Year'].astype(str)
emissions.head()

Unnamed: 0,Country,Country_code,Year,%_change
0,United States,USA,1801,14656.016
1,United States,USA,1802,21984.0
2,United States,USA,1803,7328.0
3,United States,USA,1804,36640.0
4,United States,USA,1805,7328.0


In [21]:
transportation = pd.read_csv("../Data/Transportation_Fuels.csv")

print("Data Types:")
print(transportation.dtypes)
transportation.head()

Data Types:
Date                                                             object
East Coast Production of Gasoline (Thousand Barrels per Day)      int64
East Coast Production of Jet Fuel (Thousand Barrels per Day)    float64
U.S. Production of Gasoline (Thousand Barrels per Day)            int64
U.S. Gasoline Demand (Thousand Barrels per Day)                   int64
U.S. Production of Jet Fuel (Thousand Barrels per Day)            int64
dtype: object


Unnamed: 0,Date,East Coast Production of Gasoline (Thousand Barrels per Day),East Coast Production of Jet Fuel (Thousand Barrels per Day),U.S. Production of Gasoline (Thousand Barrels per Day),U.S. Gasoline Demand (Thousand Barrels per Day),U.S. Production of Jet Fuel (Thousand Barrels per Day)
0,04/14/2023,3161,127.0,9475,8519,1625
1,04/07/2023,3244,114.0,9818,8936,1677
2,03/31/2023,3180,89.0,9851,9295,1686
3,03/24/2023,3199,76.0,10038,9145,1662
4,03/17/2023,3182,84.0,9503,8960,1639


In [22]:
# Date formatting for transportation fuel dataset

years = []
for d in transportation['Date']:
    years.append(str(d[6:]))

transportation = transportation.assign(Year=years)
transportation["Year"].astype(str)

transportation


Unnamed: 0,Date,East Coast Production of Gasoline (Thousand Barrels per Day),East Coast Production of Jet Fuel (Thousand Barrels per Day),U.S. Production of Gasoline (Thousand Barrels per Day),U.S. Gasoline Demand (Thousand Barrels per Day),U.S. Production of Jet Fuel (Thousand Barrels per Day),Year
0,04/14/2023,3161,127.0,9475,8519,1625,2023
1,04/07/2023,3244,114.0,9818,8936,1677,2023
2,03/31/2023,3180,89.0,9851,9295,1686,2023
3,03/24/2023,3199,76.0,10038,9145,1662,2023
4,03/17/2023,3182,84.0,9503,8960,1639,2023
...,...,...,...,...,...,...,...
1548,08/13/1993,700,70.0,7205,7581,1376,1993
1549,08/06/1993,718,67.0,7305,8033,1370,1993
1550,07/30/1993,786,85.0,7313,7336,1388,1993
1551,07/23/1993,738,107.0,7469,7916,1401,1993


In [10]:
# Format transprtation fuel dataset for merging

transportation_merge = transportation.groupby('Year').mean().reset_index().sort_values('Year')
transportation_merge['Year'].astype(str)

transportation_merge.head()

Unnamed: 0,Year,East Coast Production of Gasoline (Thousand Barrels per Day),East Coast Production of Jet Fuel (Thousand Barrels per Day),U.S. Production of Gasoline (Thousand Barrels per Day),U.S. Gasoline Demand (Thousand Barrels per Day),U.S. Production of Jet Fuel (Thousand Barrels per Day)
0,1993,761.36,78.44,7495.72,7639.84,1338.16
1,1994,744.153846,75.076923,7285.0,7574.5,1439.615385
2,1995,867.115385,87.192308,7623.480769,7822.884615,1407.384615
3,1996,807.442308,75.557692,7583.134615,7831.211538,1519.365385
4,1997,984.961538,94.0,7814.653846,7988.634615,1564.673077


In [12]:
# Merging datasets

merged_data = emissions.merge(transportation,how='inner',on='Year').dropna()
merged_data.head()

Unnamed: 0,Country,Country_code,Year,%_change,Date,East Coast Production of Gasoline (Thousand Barrels per Day),East Coast Production of Jet Fuel (Thousand Barrels per Day),U.S. Production of Gasoline (Thousand Barrels per Day),U.S. Gasoline Demand (Thousand Barrels per Day),U.S. Production of Jet Fuel (Thousand Barrels per Day)
0,United States,USA,1993,99142660.0,12/31/1993,841,95.0,7503,7567,1426
1,United States,USA,1993,99142660.0,12/24/1993,754,80.0,7379,7411,1452
2,United States,USA,1993,99142660.0,12/17/1993,789,90.0,7650,7684,1392
3,United States,USA,1993,99142660.0,12/10/1993,797,76.0,7719,8074,1409
4,United States,USA,1993,99142660.0,12/03/1993,836,80.0,7888,7430,1371


In [16]:
groupby_year = merged_data.groupby('Year').mean()
groupby_year.head()

Unnamed: 0_level_0,%_change,East Coast Production of Gasoline (Thousand Barrels per Day),East Coast Production of Jet Fuel (Thousand Barrels per Day),U.S. Production of Gasoline (Thousand Barrels per Day),U.S. Gasoline Demand (Thousand Barrels per Day),U.S. Production of Jet Fuel (Thousand Barrels per Day)
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1993,99142660.0,761.36,78.44,7495.72,7639.84,1338.16
1994,91215870.0,744.153846,75.076923,7285.0,7574.5,1439.615385
1995,62219776.0,867.115385,87.192308,7623.480769,7822.884615,1407.384615
1996,188632060.0,807.442308,75.557692,7583.134615,7831.211538,1519.365385
1997,75433980.0,984.961538,94.0,7814.653846,7988.634615,1564.673077


In [23]:
# Correlating % change CO2 emissions and U.S. Production of Gasoline (Thousand Barrels per Day)
# Not significant: p > 0.05

r, p = pearsonr(groupby_year['%_change'], groupby_year['U.S. Production of Gasoline (Thousand Barrels per Day)'])
print('r = ', r)
print('p = ', p)

r =  -0.22227276905223897
p =  0.2465001613915193


In [24]:
# Correlating % change CO2 emissions and U.S. Gasoline Demand (Thousand Barrels per Day)
# Not significant: p > 0.05

r, p = pearsonr(groupby_year['%_change'], groupby_year['U.S. Gasoline Demand (Thousand Barrels per Day)'])
print('r = ', r)
print('p = ', p)

r =  -0.15914415287288103
p =  0.40960200110110384


In [25]:
# Correlating % change CO2 emissions and U.S. Production of Jet Fuel (Thousand Barrels per Day)
# Not significant: p > 0.05

r, p = pearsonr(groupby_year['%_change'], groupby_year['U.S. Production of Jet Fuel (Thousand Barrels per Day)'])

print('r = ', r)
print('p = ', p)

r =  0.28603692024634003
p =  0.13252189144779802
