## Data Importing Notebook

In [8]:
# import the necessary libraries
import pandas as pd
import numpy as np
import requests
import json
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression



from pathlib import Path

from api_keys import Openweather_api_key
from api_keys import Geoapify_key


#Here are the relative paths for assorted resource folders. Feel free to save to a file within these paths. 
APIDataPath = '../Resources/API_Data/'
AnalyzedDataPath = '../Resources/Data_Analysis/'
FiguresPath = '../Resources/Figures/'
#e.g. TVdata = f'{APIDataPath}TV_dat84_23.csv'


## Kevin Starts Here

In [9]:
#Kevin's code starts here
TVdataPath = Path("../Resources/API_Data/TV_dat84_23.csv")
TVdataOriginal_df = pd.read_csv(TVdataPath)
TVdataOriginal_df.head()

Unnamed: 0,Series ID,Year,Period,Label,Value,3-Month Net Change,3-Month % Change
0,EIUIR41200,1984,M12,1984 Dec,132.2,,
1,EIUIR41200,1985,M03,1985 Mar,133.5,,1.0
2,EIUIR41200,1985,M06,1985 Jun,129.5,,-3.0
3,EIUIR41200,1985,M09,1985 Sep,122.2,,-5.6
4,EIUIR41200,1985,M12,1985 Dec,123.8,,1.3


In [10]:
TV_data_First = TVdataOriginal_df.drop(columns=['Series ID', '3-Month Net Change'])
TV_data_First.head()

Unnamed: 0,Year,Period,Label,Value,3-Month % Change
0,1984,M12,1984 Dec,132.2,
1,1985,M03,1985 Mar,133.5,1.0
2,1985,M06,1985 Jun,129.5,-3.0
3,1985,M09,1985 Sep,122.2,-5.6
4,1985,M12,1985 Dec,123.8,1.3


In [11]:
TV_data_Clean = TV_data_First.drop([0])
TV_data_date = pd.to_datetime(TV_data_Clean['Label'])
TV_data_Clean['Label'] = TV_data_date
TV_data_Clean = TV_data_Clean.rename(columns={"Label": "Date", "Period": "Quarter"})
TV_data_Clean.head()

Unnamed: 0,Year,Quarter,Date,Value,3-Month % Change
1,1985,M03,1985-03-01,133.5,1.0
2,1985,M06,1985-06-01,129.5,-3.0
3,1985,M09,1985-09-01,122.2,-5.6
4,1985,M12,1985-12-01,123.8,1.3
5,1986,M03,1986-03-01,134.6,8.7


In [12]:
TV_data_2019 = TV_data_Clean[(TV_data_Clean['Date'].dt.year >= 2019)]
TV_data_2019.head()

Unnamed: 0,Year,Quarter,Date,Value,3-Month % Change
329,2019,M01,2019-01-01,41.3,-1.7
330,2019,M02,2019-02-01,41.0,-2.1
331,2019,M03,2019-03-01,40.8,-2.2
332,2019,M04,2019-04-01,40.8,-1.2
333,2019,M05,2019-05-01,40.7,-0.7


## Nelson's Starts here

In [13]:
# define a function to call the API and collect the data
def get_gas_prices():
    api_url = 'https://api.eia.gov/v2/series/?api_key=CaYN6JIcvNOO0ASoHuG1JYT1Hugfu98N3RFotIgL&series_id=PET.EMD_EPD2D_PTE_NUS_DPG.W'
    response = requests.get(api_url)
    data = json.loads(response.text)['series'][0]['data']
    gas_prices = pd.DataFrame(data, columns=['Year', 'Price'])
    gas_prices['Year'] = pd.to_datetime(gas_prices['Year'], format='%Y')
    gas_prices.set_index('Year', inplace=True)
    gas_prices.sort_index(inplace=True)
    return gas_prices

In [14]:
# create a scatter plot of the gas prices over time
plt.figure(figsize=(12, 6))
plt.scatter(gas_prices.index, gas_prices['Price'], alpha=0.5)
plt.xlabel('Year')
plt.ylabel('Gas Price ($)')
plt.title('Gas Prices in the USA over Time')
plt.show()

NameError: name 'gas_prices' is not defined

<Figure size 3600x1800 with 0 Axes>

In [None]:
# call the function to collect the data and create a data frame
gas_prices = get_gas_prices()
gas_prices.head()

In [None]:
# create a linear regression to see how the gas prices have changed over time
X = gas_prices.index.map(lambda year: year.year).values.reshape(-1, 1)
y = gas_prices['Price'].values.reshape(-1, 1)
lr = LinearRegression().fit(X, y)
y_pred = lr.predict(X)

plt.figure(figsize=(12, 6))
plt.scatter(X, y, alpha=0.5)
plt.plot(X, y_pred, color='red')
plt.xlabel('Year')
plt.ylabel('Gas Price ($)')
plt.title('Gas Prices in the USA over Time')
plt.show()


In [None]:
# create a bar plot to see the average gas prices by year
yearly_gas_prices = gas_prices.resample('Y').mean()

plt.figure(figsize=(12, 6))
plt.bar(yearly_gas_prices.index.year, yearly_gas_prices['Price'])
plt.xlabel('Year')
plt.ylabel('Average Gas Price ($)')
plt.title('Average Gas Prices in the USA by Year')
plt.show()


In [None]:
# create a distribution map to see how the gas prices have varied across the USA
us_states = pd.read_csv('https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json')
us_states['id'] = us_states['id'].apply(lambda x: x.lower())

state_gas_prices = gas_prices.groupby(gas_prices.index.year).mean()
state_gas_prices['State'] = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN']