## Data Importing Notebook

In [1]:
# import the necessary libraries
import pandas as pd
import numpy as np
import requests
import json
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression




from pathlib import Path

from api_keys import Openweather_api_key
from api_keys import Geoapify_key


#Here are the relative paths for assorted resource folders. Feel free to save to a file within these paths. 
APIDataPath = '../Resources/API_Data/'
AnalyzedDataPath = '../Resources/Data_Analysis/'
FiguresPath = '../Resources/Figures/'
#e.g. TVdata = f'{APIDataPath}TV_dat84_23.csv'


## Kevin Starts Here

In [2]:
#Kevin's code starts here
pd.options.mode.chained_assignment = None # eliminates SettingwithCopyWarning for this instance
year = 2013 # Gives cut off year for data distillation
Norm_Factor = 100 # Multiple for normalization value for better visualization

# reads in csv from Resources/API_Data folder
TVdataPath = APIDataPath + "TV_dat84_23.csv"
TVdataOriginal_df = pd.read_csv(TVdataPath)
TVdataOriginal_df.head(3)

Unnamed: 0,Series ID,Year,Period,Label,Value,3-Month Net Change,3-Month % Change
0,EIUIR41200,1984,M12,1984 Dec,132.2,,
1,EIUIR41200,1985,M03,1985 Mar,133.5,,1.0
2,EIUIR41200,1985,M06,1985 Jun,129.5,,-3.0


In [3]:
# removes unneeded columns
TV_data_Clean = TVdataOriginal_df.copy()
TV_data_Clean = TV_data_Clean.drop(columns=['Series ID', '3-Month Net Change'])
TV_data_Clean.head(2)

Unnamed: 0,Year,Period,Label,Value,3-Month % Change
0,1984,M12,1984 Dec,132.2,
1,1985,M03,1985 Mar,133.5,1.0


In [4]:
# can eliminate first row if needed to resolve NA data prob
# takes Label column and coverts to Pandas date/time and reinserts back into DataFrame
#TV_data_First = TV_data_First.drop([0])
TV_data_date = pd.to_datetime(TV_data_Clean['Label'])
TV_data_Clean['Label'] = TV_data_date
TV_data_Clean = TV_data_Clean.rename(columns={"Label": "Date", "Period": "Month"})
TV_data_Clean.head(2)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,1984,M12,1984-12-01,132.2,
1,1985,M03,1985-03-01,133.5,1.0


In [5]:
# elimates rows prior to set year and rests index to start bat 0

TV_data_2013 = TV_data_Clean[(TV_data_Clean['Date'].dt.year >= year)]
TV_data_2013.reset_index(drop = True, inplace = True)
TV_data_2013.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,2013,M01,2013-01-01,52.0,-3.0
1,2013,M02,2013-02-01,51.9,-0.6
2,2013,M03,2013-03-01,51.1,-2.1


In [6]:
Normalized_Data = (TV_data_2013['Value']-TV_data_2013['Value'].min())/(TV_data_2013['Value'].max() - 
                                                              TV_data_2013['Value'].min())
Normalized_Data = Normalized_Data * Norm_Factor                                                             
TV_data_2013['Norm TV Values'] = Normalized_Data
TV_data_2013.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change,Norm TV Values
0,2013,M01,2013-01-01,52.0,-3.0,100.0
1,2013,M02,2013-02-01,51.9,-0.6,99.264706
2,2013,M03,2013-03-01,51.1,-2.1,93.382353


In [7]:
TV_data_path = APIDataPath + 'Cleaned_TV_2013.csv'
TV_data_2013.to_csv(TV_data_path, index=False, header=True)

In [8]:
# 'Eggs' price table

In [9]:
# reads in csv from Resources/API_Data folder

EggsDataPath = APIDataPath + "eggs_84_23.csv"
EggsDataOriginal_df = pd.read_csv(EggsDataPath)
EggsDataOriginal_df.head(3)

Unnamed: 0,Series ID,Year,Period,Label,Value,3-Month % Change
0,APU0000708111,1984,M01,1984 Jan,1.301,35.2
1,APU0000708111,1984,M02,1984 Feb,1.324,32.9
2,APU0000708111,1984,M03,1984 Mar,1.153,1.9


In [10]:
# removes unneeded columns
EggsDataClean = EggsDataOriginal_df.copy() #original copy kept if needed
EggsDataClean = EggsDataClean.drop(columns=['Series ID'])
EggsDataClean.head(2)

Unnamed: 0,Year,Period,Label,Value,3-Month % Change
0,1984,M01,1984 Jan,1.301,35.2
1,1984,M02,1984 Feb,1.324,32.9


In [11]:
# takes Label column and coverts to Pandas date/time and reinserts back into DataFrame

Eggs_data_date = pd.to_datetime(EggsDataClean['Label'])
EggsDataClean['Label'] = Eggs_data_date
EggsDataClean = EggsDataClean.rename(columns={"Label": "Date", "Period": "Month"})
EggsDataClean.head(2)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,1984,M01,1984-01-01,1.301,35.2
1,1984,M02,1984-02-01,1.324,32.9


In [12]:
# elimates rows prior to set year and resets index to 0

EggsData_2013 = EggsDataClean[(EggsDataClean['Date'].dt.year >= year)]
EggsData_2013.reset_index(drop = True, inplace = True)
EggsData_2013.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,2013,M01,2013-01-01,1.933,-1.4
1,2013,M02,2013-02-01,1.965,0.1
2,2013,M03,2013-03-01,1.925,-4.1


In [13]:
Normalized_Data_Eggs = (EggsData_2013['Value']-EggsData_2013['Value'].min())/(EggsData_2013['Value'].max() - 
                                                              EggsData_2013['Value'].min())
Normalized_Data_Eggs = Normalized_Data_Eggs * Norm_Factor                                                             
EggsData_2013['Norm Egg Values'] = Normalized_Data_Eggs
EggsData_2013.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change,Norm Egg Values
0,2013,M01,2013-01-01,1.933,-1.4,20.165746
1,2013,M02,2013-02-01,1.965,0.1,21.049724
2,2013,M03,2013-03-01,1.925,-4.1,19.944751


In [14]:
#write cleaned data to csv file
Eggs_data_path_out = APIDataPath + 'Cleaned_Eggs_2013.csv'
EggsData_2013.to_csv(Eggs_data_path_out, index=False, header=True)

In [15]:
# New Automobiles and Automobile Parts Price Table

In [16]:
# reads in csv from Resources/API_Data folder

AutoDataPath = APIDataPath + "automobile_2016_2023.csv"
AutoDataOriginal_df = pd.read_csv(AutoDataPath)
AutoDataOriginal_df.head(3)

Unnamed: 0,Series ID,Year,Period,Label,Value,3-Month % Change
0,WPS5861,2016,M01,2016 Jan,99.0,
1,WPS5861,2016,M02,2016 Feb,98.7,
2,WPS5861,2016,M03,2016 Mar,96.8,


In [17]:
# removes unneeded columns
AutoDataClean = AutoDataOriginal_df.copy() #original copy kept if needed
AutoDataClean = AutoDataClean.drop(columns=['Series ID'])
AutoDataClean.head(2)

Unnamed: 0,Year,Period,Label,Value,3-Month % Change
0,2016,M01,2016 Jan,99.0,
1,2016,M02,2016 Feb,98.7,


In [18]:
# takes Label column and coverts to Pandas date/time and reinserts back into DataFrame
# renames columns as needed
Auto_data_date = pd.to_datetime(AutoDataClean['Label'])
AutoDataClean['Label'] = Auto_data_date
AutoDataClean = AutoDataClean.rename(columns={"Label": "Date", "Period": "Month"})
AutoDataClean.head(2)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,2016,M01,2016-01-01,99.0,
1,2016,M02,2016-02-01,98.7,


In [19]:
Normalized_Data_Auto = (AutoDataClean['Value']-AutoDataClean['Value'].min())/(AutoDataClean['Value'].max() - 
                                                              AutoDataClean['Value'].min())
Normalized_Data_Auto = Normalized_Data_Auto * Norm_Factor                                                             
AutoDataClean['Norm Auto Values'] = Normalized_Data_Auto
AutoDataClean.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change,Norm Auto Values
0,2016,M01,2016-01-01,99.0,,7.21805
1,2016,M02,2016-02-01,98.7,,6.974745
2,2016,M03,2016-03-01,96.8,,5.433813


In [20]:
#write cleaned data to csv file
Auto_data_path_out = APIDataPath + 'Cleaned_Automobile_2016.csv'
AutoDataClean.to_csv(Auto_data_path_out, index=False, header=True)

In [21]:
# Bananas Price Table

In [22]:
# reads in csv from Resources/API_Data folder
BananasDataPath = APIDataPath + "bananas_84_23.csv"
BananasDataOriginal_df = pd.read_csv(BananasDataPath)
BananasDataOriginal_df.head(3)

Unnamed: 0,Series ID,Year,Period,Label,Value,3-Month % Change
0,APU0000711211,1984,M01,1984 Jan,0.344,-10.6
1,APU0000711211,1984,M02,1984 Feb,0.373,14.1
2,APU0000711211,1984,M03,1984 Mar,0.373,18.4


In [23]:
#makes copy of original to work with and drops unneeded column(s)
BananasDataClean = BananasDataOriginal_df.copy()
BananasDataClean = BananasDataClean.drop(columns=['Series ID'])
BananasDataClean.head(2)

Unnamed: 0,Year,Period,Label,Value,3-Month % Change
0,1984,M01,1984 Jan,0.344,-10.6
1,1984,M02,1984 Feb,0.373,14.1


In [24]:
# takes Label column and coverts to Pandas date/time and reinserts back into DataFrame
# Renames columns for more clarity

Bananas_data_date = pd.to_datetime(BananasDataClean['Label'])
BananasDataClean['Label'] = Bananas_data_date
BananasDataClean = BananasDataClean.rename(columns={"Label": "Date", "Period": "Month"})
BananasDataClean.head(2)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,1984,M01,1984-01-01,0.344,-10.6
1,1984,M02,1984-02-01,0.373,14.1


In [25]:
# Removes rows older than given year and rests index

BananasData_2013 = BananasDataClean[(BananasDataClean['Date'].dt.year >= year)]
BananasData_2013.reset_index(drop = True, inplace = True)
BananasData_2013.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,2013,M01,2013-01-01,0.609,1.2
1,2013,M02,2013-02-01,0.611,1.8
2,2013,M03,2013-03-01,0.609,0.5


In [26]:
# code if needed to divide into quarter periods
#BananasData_2013a = BananasData_2013[(BananasData_2013['Month'] == "M09") | (BananasData_2013['Month'] == "M03") |
#                                   (BananasData_2013['Month'] == "M06") | (BananasData_2013['Month'] == "M12")]
#BananasData_2013a.head()

In [27]:
Normalized_Data_Bananas = (BananasData_2013['Value']-BananasData_2013['Value'].min())/(BananasData_2013['Value'].max() - 
                                                              BananasData_2013['Value'].min())
Normalized_Data_Bananas = Normalized_Data_Bananas * Norm_Factor                                                             
BananasData_2013['Norm Bananas Values'] = Normalized_Data_Bananas
BananasData_2013.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change,Norm Bananas Values
0,2013,M01,2013-01-01,0.609,1.2,65.656566
1,2013,M02,2013-02-01,0.611,1.8,67.676768
2,2013,M03,2013-03-01,0.609,0.5,65.656566


In [28]:
#write cleaned data to csv file
Bananas_data_path_out = APIDataPath + 'Cleaned_Bananas_2013.csv'
BananasData_2013.to_csv(Bananas_data_path_out, index=False, header=True)

In [29]:
# Toilet Paper Price Table

In [30]:
# reads in csv from Resources/API_Data folder

TPDataPath = APIDataPath + "toilet_paper_03_23.csv"
TPDataOriginal_df = pd.read_csv(TPDataPath)
TPDataOriginal_df.head(3)

Unnamed: 0,Series ID,Year,Period,Label,Value,3-Month % Change
0,PCU3222913222915,2003,M12,2003 Dec,100.0,
1,PCU3222913222915,2004,M01,2004 Jan,100.0,
2,PCU3222913222915,2004,M02,2004 Feb,100.2,


In [31]:
#makes copy of original to work with and drops unneeded column(s)
TPDataClean = TPDataOriginal_df.copy()
TPDataClean = TPDataClean.drop(columns=['Series ID'])
TPDataClean.head(2)

Unnamed: 0,Year,Period,Label,Value,3-Month % Change
0,2003,M12,2003 Dec,100.0,
1,2004,M01,2004 Jan,100.0,


In [32]:
# takes Label column and coverts to Pandas date/time and reinserts back into DataFrame
# Renames columns for more clarity

TP_data_date = pd.to_datetime(TPDataClean['Label'])
TPDataClean['Label'] = TP_data_date
TPDataClean = TPDataClean.rename(columns={"Label": "Date", "Period": "Month"})
TPDataClean.head(2)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,2003,M12,2003-12-01,100.0,
1,2004,M01,2004-01-01,100.0,


In [33]:
# Removes rows older than given year and resets index

TP_2013 = TPDataClean[(TPDataClean['Date'].dt.year >= year)]
TP_2013.reset_index(drop = True, inplace = True)
TP_2013.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,2013,M01,2013-01-01,132.3,0.0
1,2013,M02,2013-02-01,132.3,0.0
2,2013,M03,2013-03-01,131.9,-0.3


In [34]:
Normalized_Data_TP = (TP_2013['Value']-TP_2013['Value'].min())/(TP_2013['Value'].max() - 
                                                              TP_2013['Value'].min())
Normalized_Data_TP = Normalized_Data_TP * Norm_Factor                                                             
TP_2013['Norm TP Values'] = Normalized_Data_TP
TP_2013.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change,Norm TP Values
0,2013,M01,2013-01-01,132.3,0.0,13.846643
1,2013,M02,2013-02-01,132.3,0.0,13.846643
2,2013,M03,2013-03-01,131.9,-0.3,12.938666


In [35]:
# write cleaned data to csv file
TP_data_path_out = APIDataPath + 'Cleaned_TP_2013.csv'
TP_2013.to_csv(TP_data_path_out, index=False, header=True)

In [36]:
# Cost of Shelter Rent Table

In [37]:
# reads in csv from Resources/API_Data folder

ShelterDataPath = APIDataPath + "shelter_90_23.csv"
ShelterDataOriginal_df = pd.read_csv(ShelterDataPath)
ShelterDataOriginal_df.head(3)

Unnamed: 0,Series ID,Year,Period,Label,Value,3-Month % Change
0,CUSR0000SAS2RS,1990,M01,1990 Jan,141.7,
1,CUSR0000SAS2RS,1990,M02,1990 Feb,142.0,
2,CUSR0000SAS2RS,1990,M03,1990 Mar,143.1,


In [38]:
#makes copy of original to work with and drops unneeded column(s)
ShelterDataClean = ShelterDataOriginal_df.copy()
ShelterDataClean = ShelterDataClean.drop(columns=['Series ID'])
ShelterDataClean.head(2)

Unnamed: 0,Year,Period,Label,Value,3-Month % Change
0,1990,M01,1990 Jan,141.7,
1,1990,M02,1990 Feb,142.0,


In [39]:
# takes Label column and coverts to Pandas date/time and reinserts back into DataFrame
# Renames columns for more clarity

Shelter_data_date = pd.to_datetime(ShelterDataClean['Label'])
ShelterDataClean['Label'] = Shelter_data_date
ShelterDataClean = ShelterDataClean.rename(columns={"Label": "Date", "Period": "Month"})
ShelterDataClean.head(2)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,1990,M01,1990-01-01,141.7,
1,1990,M02,1990-02-01,142.0,


In [40]:
# Removes rows older than given year and resets index

Shelter_2013 = ShelterDataClean[(ShelterDataClean['Date'].dt.year >= year)]
Shelter_2013.reset_index(drop = True, inplace = True)
Shelter_2013.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,2013,M01,2013-01-01,271.027,0.5
1,2013,M02,2013-02-01,271.592,0.5
2,2013,M03,2013-03-01,272.07,0.6


In [41]:
Normalized_Data_Shelter = (Shelter_2013['Value']-Shelter_2013['Value'].min())/(Shelter_2013['Value'].max() - 
                                                              Shelter_2013['Value'].min())
Normalized_Data_Shelter = Normalized_Data_Shelter * Norm_Factor                                                             
Shelter_2013['Norm Shelter Values'] = Normalized_Data_Shelter
Shelter_2013.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change,Norm Shelter Values
0,2013,M01,2013-01-01,271.027,0.5,0.0
1,2013,M02,2013-02-01,271.592,0.5,0.461473
2,2013,M03,2013-03-01,272.07,0.6,0.851888


In [42]:
# write cleaned data to csv file
Shelter_data_path_out = APIDataPath + 'Cleaned_Shelter_2013.csv'
Shelter_2013.to_csv(Shelter_data_path_out, index=False, header=True)

In [43]:
# Cost of Gasoline Table

In [44]:
# reads in csv from Resources/API_Data folder

GasolineDataPath = APIDataPath + "gasoline_84_23.csv"
GasolineDataOriginal_df = pd.read_csv(GasolineDataPath)
GasolineDataOriginal_df.head(3)

Unnamed: 0,Series ID,Year,Period,Label,Value,3-Month % Change
0,APU000074714,1984,M01,1984 Jan,1.216,-3.1
1,APU000074714,1984,M02,1984 Feb,1.209,-2.6
2,APU000074714,1984,M03,1984 Mar,1.21,-1.7


In [45]:
#makes copy of original to work with and drops unneeded column(s)
GasolineDataClean = GasolineDataOriginal_df.copy()
GasolineDataClean = GasolineDataClean.drop(columns=['Series ID'])
GasolineDataClean.head(2)

Unnamed: 0,Year,Period,Label,Value,3-Month % Change
0,1984,M01,1984 Jan,1.216,-3.1
1,1984,M02,1984 Feb,1.209,-2.6


In [46]:
# takes Label column and coverts to Pandas date/time and reinserts back into DataFrame
# Renames columns for more clarity

Gasoline_data_date = pd.to_datetime(GasolineDataClean['Label'])
GasolineDataClean['Label'] = Gasoline_data_date
GasolineDataClean = GasolineDataClean.rename(columns={"Label": "Date", "Period": "Month"})
GasolineDataClean.head(2)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,1984,M01,1984-01-01,1.216,-3.1
1,1984,M02,1984-02-01,1.209,-2.6


In [47]:
# Removes rows older than given year and resets index

Gasoline_2013 = GasolineDataClean[(GasolineDataClean['Date'].dt.year >= year)]
Gasoline_2013.reset_index(drop = True, inplace = True)
Gasoline_2013.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,2013,M01,2013-01-01,3.351,-11.5
1,2013,M02,2013-02-01,3.693,5.9
2,2013,M03,2013-03-01,3.735,12.1


In [48]:
Normalized_Data_Gasoline = (Gasoline_2013['Value']-Gasoline_2013['Value'].min())/(Gasoline_2013['Value'].max() - 
                                                              Gasoline_2013['Value'].min())
Normalized_Data_Gasoline = Normalized_Data_Gasoline * Norm_Factor                                                             
Gasoline_2013['Norm Gasoline Values'] = Normalized_Data_Gasoline
Gasoline_2013.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change,Norm Gasoline Values
0,2013,M01,2013-01-01,3.351,-11.5,48.131267
1,2013,M02,2013-02-01,3.693,5.9,58.523245
2,2013,M03,2013-03-01,3.735,12.1,59.799453


In [49]:
# write cleaned data to csv file
Gasoline_data_path_out = APIDataPath + 'Cleaned_Gasoline_2013.csv'
Gasoline_2013.to_csv(Gasoline_data_path_out, index=False, header=True)

In [50]:
# Cost of Electricity Table

In [51]:
# reads in csv from Resources/API_Data folder

ElectricDataPath = APIDataPath + "electricity_84_23.csv"
ElectricDataOriginal_df = pd.read_csv(ElectricDataPath)
ElectricDataOriginal_df.head(3)

Unnamed: 0,Series ID,Year,Period,Label,Value,3-Month % Change
0,APU000072610,1984,M01,1984 Jan,0.078,-2.5
1,APU000072610,1984,M02,1984 Feb,0.079,2.6
2,APU000072610,1984,M03,1984 Mar,0.079,2.6


In [52]:
#makes copy of original to work with and drops unneeded column(s)
ElectricDataClean = ElectricDataOriginal_df.copy()
ElectricDataClean = ElectricDataClean.drop(columns=['Series ID'])
ElectricDataClean.head(2)

Unnamed: 0,Year,Period,Label,Value,3-Month % Change
0,1984,M01,1984 Jan,0.078,-2.5
1,1984,M02,1984 Feb,0.079,2.6


In [53]:
# takes Label column and coverts to Pandas date/time and reinserts back into DataFrame
# Renames columns for more clarity

Electric_data_date = pd.to_datetime(ElectricDataClean['Label'])
ElectricDataClean['Label'] = Electric_data_date
ElectricDataClean = ElectricDataClean.rename(columns={"Label": "Date", "Period": "Month"})
ElectricDataClean.head(2)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,1984,M01,1984-01-01,0.078,-2.5
1,1984,M02,1984-02-01,0.079,2.6


In [54]:
# Removes rows older than given year and resets index

Electric_2013 = ElectricDataClean[(ElectricDataClean['Date'].dt.year >= year)]
Electric_2013.reset_index(drop = True, inplace = True)
Electric_2013.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change
0,2013,M01,2013-01-01,0.129,0.8
1,2013,M02,2013-02-01,0.129,1.6
2,2013,M03,2013-03-01,0.128,0.8


In [55]:
Normalized_Data_Electric = (Electric_2013['Value']-Electric_2013['Value'].min())/(Electric_2013['Value'].max() - 
                                                              Electric_2013['Value'].min())
Normalized_Data_Electric = Normalized_Data_Electric * Norm_Factor                                                             
Electric_2013['Norm Electric Values'] = Normalized_Data_Electric
Electric_2013.head(3)

Unnamed: 0,Year,Month,Date,Value,3-Month % Change,Norm Electric Values
0,2013,M01,2013-01-01,0.129,0.8,2.5
1,2013,M02,2013-02-01,0.129,1.6,2.5
2,2013,M03,2013-03-01,0.128,0.8,0.0


In [56]:
# write cleaned data to csv file
Electric_data_path_out = APIDataPath + 'Cleaned_Electric_2013.csv'
Electric_2013.to_csv(Electric_data_path_out, index=False, header=True)

In [57]:
# Corporate Profit Table

In [58]:
# reads in csv from Resources/API_Data folder

CorporateDataPath = APIDataPath + "Corporate Profits before tax.csv"
CorporateDataOriginal_df = pd.read_csv(CorporateDataPath)
CorporateDataOriginal_df.head(3)

Unnamed: 0,%SeriesCode,Period,Value
0,A053RC,1947Q1,33466
1,A053RC,1947Q2,31668
2,A053RC,1947Q3,31288


In [59]:
#makes copy of original to work with and drops unneeded column(s)
CorporateDataClean = CorporateDataOriginal_df.copy()
CorporateDataClean = CorporateDataClean.drop(columns=['%SeriesCode'])
CorporateDataClean.head(2)

Unnamed: 0,Period,Value
0,1947Q1,33466
1,1947Q2,31668


In [60]:
Corporate_data_date = pd.to_datetime(CorporateDataClean['Period'])
CorporateDataClean['Period'] = Corporate_data_date
CorporateDataClean = CorporateDataClean.rename(columns={"Period": "Date"})
CorporateDataClean.head(2)

Unnamed: 0,Date,Value
0,1947-01-01,33466
1,1947-04-01,31668


In [61]:
# Removes rows older than given year and resets index

Corporate_2013 = CorporateDataClean[(CorporateDataClean['Date'].dt.year >= year)]
Corporate_2013.reset_index(drop = True, inplace = True)
Corporate_2013.head(3)

Unnamed: 0,Date,Value
0,2013-01-01,2127093
1,2013-04-01,2114721
2,2013-07-01,2157510


In [62]:
# Removes commas from Values, changes values to float, normalizes values and creates new column for
# normalized values

Corporate_2013['Value'] = Corporate_2013['Value'].replace(",", "", regex=True)
Corporate_2013['Value'] = Corporate_2013['Value'].astype(float)
Corporate_2013['Value'] = pd.to_numeric(Corporate_2013["Value"])
Normalized_Data_Corporate = (Corporate_2013['Value']-Corporate_2013['Value'].min())/(Corporate_2013['Value'].max() - 
                                                              Corporate_2013['Value'].min())
Normalized_Data_Corporate = Normalized_Data_Corporate * Norm_Factor                                                             
Corporate_2013['Norm Corporate Values'] = Normalized_Data_Corporate
Corporate_2013.head(3)


Unnamed: 0,Date,Value,Norm Corporate Values
0,2013-01-01,2127093.0,8.61424
1,2013-04-01,2114721.0,7.803577
2,2013-07-01,2157510.0,10.607283


In [63]:
# write cleaned data to csv file
Corporate_data_path_out = APIDataPath + 'Cleaned_Corporate_2013.csv'
Corporate_2013.to_csv(Corporate_data_path_out, index=False, header=True)

## Nelson's Starts here

In [64]:
# define a function to call the API and collect the data
def get_gas_prices():
    api_url = 'https://api.eia.gov/v2/series/?api_key=CaYN6JIcvNOO0ASoHuG1JYT1Hugfu98N3RFotIgL&series_id=PET.EMD_EPD2D_PTE_NUS_DPG.W'
    response = requests.get(api_url)
    data = json.loads(response.text)['series'][0]['data']
    gas_prices = pd.DataFrame(data, columns=['Year', 'Price'])
    gas_prices['Year'] = pd.to_datetime(gas_prices['Year'], format='%Y')
    gas_prices.set_index('Year', inplace=True)
    gas_prices.sort_index(inplace=True)
    return gas_prices

In [65]:
# create a scatter plot of the gas prices over time
plt.figure(figsize=(12, 6))
plt.scatter(gas_prices.index, gas_prices['Price'], alpha=0.5)
plt.xlabel('Year')
plt.ylabel('Gas Price ($)')
plt.title('Gas Prices in the USA over Time')
plt.show()

NameError: name 'gas_prices' is not defined

<Figure size 1200x600 with 0 Axes>

In [None]:
# call the function to collect the data and create a data frame
gas_prices = get_gas_prices()
gas_prices.head()

In [None]:
# create a linear regression to see how the gas prices have changed over time
X = gas_prices.index.map(lambda year: year.year).values.reshape(-1, 1)
y = gas_prices['Price'].values.reshape(-1, 1)
lr = LinearRegression().fit(X, y)
y_pred = lr.predict(X)

plt.figure(figsize=(12, 6))
plt.scatter(X, y, alpha=0.5)
plt.plot(X, y_pred, color='red')
plt.xlabel('Year')
plt.ylabel('Gas Price ($)')
plt.title('Gas Prices in the USA over Time')
plt.show()


In [None]:
# create a bar plot to see the average gas prices by year
yearly_gas_prices = gas_prices.resample('Y').mean()

plt.figure(figsize=(12, 6))
plt.bar(yearly_gas_prices.index.year, yearly_gas_prices['Price'])
plt.xlabel('Year')
plt.ylabel('Average Gas Price ($)')
plt.title('Average Gas Prices in the USA by Year')
plt.show()


In [None]:
# create a distribution map to see how the gas prices have varied across the USA
us_states = pd.read_csv('https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json')
us_states['id'] = us_states['id'].apply(lambda x: x.lower())

state_gas_prices = gas_prices.groupby(gas_prices.index.year).mean()
state_gas_prices['State'] = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN']