## Data Importing Notebook

In [21]:
# import the necessary libraries
import pandas as pd
import numpy as np
import requests
import json
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression



from pathlib import Path

from api_keys import Openweather_api_key
from api_keys import Geoapify_key


#Here are the relative paths for assorted resource folders. Feel free to save to a file within these paths. 
APIDataPath = '../Resources/API_Data/'
AnalyzedDataPath = '../Resources/Data_Analysis/'
FiguresPath = '../Resources/Figures/'
#e.g. TVdata = f'{APIDataPath}TV_dat84_23.csv'


## Kevin Starts Here

In [28]:
#Kevin's code starts here

# reads in csv from Resources/API_Data folder
#TVdataPath = Path("../Resources/API_Data/TV_dat84_23.csv")
TVdataPath = APIDataPath + "TV_dat84_23.csv"
TVdataOriginal_df = pd.read_csv(TVdataPath)
TVdataOriginal_df.head(3)

Unnamed: 0,Series ID,Year,Period,Label,Value,3-Month Net Change,3-Month % Change
0,EIUIR41200,1984,M12,1984 Dec,132.2,,
1,EIUIR41200,1985,M03,1985 Mar,133.5,,1.0
2,EIUIR41200,1985,M06,1985 Jun,129.5,,-3.0


In [29]:
# removes unneeded columns
TV_data_First = TVdataOriginal_df.drop(columns=['Series ID', '3-Month Net Change'])
TV_data_First.head(2)

Unnamed: 0,Year,Period,Label,Value,3-Month % Change
0,1984,M12,1984 Dec,132.2,
1,1985,M03,1985 Mar,133.5,1.0


In [30]:
# can eliminate first row if needed to resolve NA data prob
# takes Label column and coverts to Pandas date/time and reinserts back into DataFrame
#TV_data_First = TV_data_First.drop([0])
TV_data_Clean = TV_data_First.copy()
TV_data_date = pd.to_datetime(TV_data_Clean['Label'])
TV_data_Clean['Label'] = TV_data_date
TV_data_Clean = TV_data_Clean.rename(columns={"Label": "Date", "Period": "Quarter"})
TV_data_Clean.head(2)

Unnamed: 0,Year,Quarter,Date,Value,3-Month % Change
0,1984,M12,1984-12-01,132.2,
1,1985,M03,1985-03-01,133.5,1.0


In [25]:
# elimates rows prior to set year and rests index to start bat 0
year = 2013
TV_data_2013 = TV_data_Clean[(TV_data_Clean['Date'].dt.year >= year)]
TV_data_2013.reset_index(inplace = True)
TV_data_2013.head(2)

Unnamed: 0,index,Year,Quarter,Date,Value,3-Month % Change
0,257,2013,M01,2013-01-01,52.0,-3.0
1,258,2013,M02,2013-02-01,51.9,-0.6


In [26]:
TV_data_path = APIDataPath + 'TV_2013_cleaned.csv'
TV_data_path
TV_data_2013.to_csv(TV_data_path, index=False, header=True)

## Nelson's Starts here

In [13]:
# define a function to call the API and collect the data
def get_gas_prices():
    api_url = 'https://api.eia.gov/v2/series/?api_key=CaYN6JIcvNOO0ASoHuG1JYT1Hugfu98N3RFotIgL&series_id=PET.EMD_EPD2D_PTE_NUS_DPG.W'
    response = requests.get(api_url)
    data = json.loads(response.text)['series'][0]['data']
    gas_prices = pd.DataFrame(data, columns=['Year', 'Price'])
    gas_prices['Year'] = pd.to_datetime(gas_prices['Year'], format='%Y')
    gas_prices.set_index('Year', inplace=True)
    gas_prices.sort_index(inplace=True)
    return gas_prices

In [14]:
# create a scatter plot of the gas prices over time
plt.figure(figsize=(12, 6))
plt.scatter(gas_prices.index, gas_prices['Price'], alpha=0.5)
plt.xlabel('Year')
plt.ylabel('Gas Price ($)')
plt.title('Gas Prices in the USA over Time')
plt.show()

NameError: name 'gas_prices' is not defined

<Figure size 3600x1800 with 0 Axes>

In [None]:
# call the function to collect the data and create a data frame
gas_prices = get_gas_prices()
gas_prices.head()

In [None]:
# create a linear regression to see how the gas prices have changed over time
X = gas_prices.index.map(lambda year: year.year).values.reshape(-1, 1)
y = gas_prices['Price'].values.reshape(-1, 1)
lr = LinearRegression().fit(X, y)
y_pred = lr.predict(X)

plt.figure(figsize=(12, 6))
plt.scatter(X, y, alpha=0.5)
plt.plot(X, y_pred, color='red')
plt.xlabel('Year')
plt.ylabel('Gas Price ($)')
plt.title('Gas Prices in the USA over Time')
plt.show()


In [None]:
# create a bar plot to see the average gas prices by year
yearly_gas_prices = gas_prices.resample('Y').mean()

plt.figure(figsize=(12, 6))
plt.bar(yearly_gas_prices.index.year, yearly_gas_prices['Price'])
plt.xlabel('Year')
plt.ylabel('Average Gas Price ($)')
plt.title('Average Gas Prices in the USA by Year')
plt.show()


In [None]:
# create a distribution map to see how the gas prices have varied across the USA
us_states = pd.read_csv('https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json')
us_states['id'] = us_states['id'].apply(lambda x: x.lower())

state_gas_prices = gas_prices.groupby(gas_prices.index.year).mean()
state_gas_prices['State'] = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN']