# Daily Gas Prices in US

### Libraries

In [None]:
import requests
import pandas as pd
import numpy as np
import math

## Data Pull & Cleaning

### API Key

In [None]:
#API_Keys
gas_key = "4KLQzviUEChEdBIa8D57vG:6XtcVLxYfEOpANfjsyKUMq"
g_key = "AIzaSyDXrNZhXtDdT62Fsc6JSknFebpEmyKVx_E"
eia_key = "162281490b68ef33f84de64aa9d6945b"

### Daily Gas Prices - Pull & Clean UP

In [None]:
#import Gas Daily Prices
url = "https://api.collectapi.com/gasPrice/allUsaPrice"

headers = {
    'content-type': "application/json",
    'authorization': f"apikey {gas_key}"
    }

response = requests.request("GET", url, headers=headers).json()

#Defining Variables
states = []
gasoline=[]
midGrade=[]
premium=[]
diesel=[]

#Creating for cycle to extract data

for x in response["result"]:
    states.append(x["name"])
    gasoline.append(x["gasoline"])
    midGrade.append(x["midGrade"])
    premium.append(x["premium"])
    diesel.append(x["diesel"])
    
#create dataframe
daily_gasoline_df = pd.DataFrame({"State":states,"Gas":gasoline,"MidGrade":midGrade,"Premium":premium,"Diesel":diesel})


In [None]:
#Change to FLOAT
daily_gasoline_df['Gas'] = daily_gasoline_df['Gas'].astype(float)
daily_gasoline_df['MidGrade'] = daily_gasoline_df['MidGrade'].astype(float)
daily_gasoline_df['Premium'] = daily_gasoline_df['Premium'].astype(float)
daily_gasoline_df['Diesel'] = daily_gasoline_df['Diesel'].astype(float)
daily_gasoline_df.head()

### Google GeoCoding State

In [None]:
#import State Information
state_lat = []
state_lng = []
state_code = []


for state in states:
    # Build the endpoint URL
    target_url = ('https://maps.googleapis.com/maps/api/geocode/json?'
    'address={0}&key={1}').format(state, g_key)
    geo_data = requests.get(target_url).json()
    state_lat.append(geo_data['results'][0]["geometry"]["location"]["lat"])
    state_lng.append(geo_data['results'][0]["geometry"]["location"]["lng"])
    code = geo_data['results'][0]["address_components"][0]["short_name"]
    
    #correct to the State when compared with the City Name
    if len(code)>2:
        state_code.append(geo_data['results'][0]["address_components"][2]["short_name"])
    else:
        state_code.append(code)

#Change US to NY

state_code[34]= "NY"

#create dataframe
state_df = pd.DataFrame({"State":states,"State Code":state_code,
                         "State Latitude": state_lat, 
                         "State Longitude": state_lng})
state_df.head()

### Gas Price Time Series

In [None]:
#import Gas Time Series
url = "http://api.eia.gov/series/?api_key="+eia_key+"&series_id=PET.EMM_EPM0_PTE_NUS_DPG.W"

response = requests.get(url).json()

timeSeries = response["series"][0]["data"]

#### Reduction to Wks 52

In [None]:
last52wks = timeSeries[0:52]
gasprice = []
wk = []
n=0
for week in last52wks:
    gasprice.append(week[1])
    wk.append(n)
    n=n-1

weekprices = pd.DataFrame({"WeekNum":wk,
                          "Price":gasprice})
weekprices.tail()

### GDP per State

In [None]:
#import Info
gdp_df = pd.read_csv("Resources/GDP_sheet1.csv")

#Drop Rank
gdp_df = gdp_df.drop(columns="rank")
gdp_df = gdp_df.rename(columns={"US States": "State",
                               "GDP per capita (current dollars)":"GDP per capita"})
gdp_df['GDP per capita']=gdp_df['GDP per capita'].str.replace(",","").astype(float)
gdp_df.head()

### US Refinaries

In [None]:
#import Info
refineries_df = pd.read_csv("Resources/Refineries.csv")
refineries_df = refineries_df.rename(columns={"Unnamed: 1":"State"})
refineries_df = refineries_df.rename(columns={"List of oil refineries in the US":"Refinery"})
refineries_df["Refinery"] = refineries_df["Refinery"].str.replace(r"\(.*\)","")
refineries_df = refineries_df.drop_duplicates()
refineries_df = refineries_df.drop_duplicates(subset="Refinery",keep=False)
refineries_perState = refineries_df.groupby("State").count()
refineries_df = refineries_df.merge(state_df,left_on="State",right_on="State")
refineries_grouped = refineries_df.groupby("State Code").count()
refineries_perState = refineries_grouped.reset_index()
refineries_perState = refineries_perState.drop(columns=["State","State Latitude","State Longitude"])
refineries_perState.replace("",np.nan)
refineries_perState.head()


In [None]:
# geocoordinates
target_type = "refinery"

refinery_name = []
refinery_lat=[]
refinery_lng = []

# base url
base_url = "https://maps.googleapis.com/maps/api/place/textsearch/json"

for i in range(0,len(refineries_df["State"])):
    
    target_search = refineries_df.iloc[i][0]
    # set up a parameters dictionary
    params = {
        "query": target_search,
        "key": g_key}
    refinery_name.append(target_search)
    # run a request using our params dictionary
    try:
        response = requests.get(base_url, params=params).json()
        refinery_lat.append(response["results"][0]["geometry"]["location"]["lat"])
        refinery_lng.append(response["results"][0]["geometry"]["location"]["lng"])
    except:
        refinery_lat.append("")
        refinery_lng.append("")

In [None]:
refinery_place_df = pd.DataFrame({"Refinery":refinery_name,
                                 "Refinery Lat":refinery_lat,
                                 "Refinery Lng": refinery_lng})

In [None]:
refinery_place_df = refinery_place_df.replace("",np.nan)
refinery_place_df = refinery_place_df.dropna()
refinery_place_df.head()

### Calculating distance between Refinaries and States

In [None]:
total = dict()
ref_name = refinery_place_df["Refinery"].to_list()
ref_lat = refinery_place_df["Refinery Lat"].to_numpy()
ref_lng = refinery_place_df["Refinery Lng"].to_numpy()
stt_lat = state_df["State Latitude"].to_numpy()
stt_lng = state_df["State Longitude"].to_numpy()
stt_code = state_df["State Code"].to_list()

for j in range(0,len(refinery_place_df["Refinery"])):
    distances=[]
    point2lat = ref_lat[j]
    point2lng = ref_lng[j]
    for i in range(0,len(state_df["State Code"])):
        point1lat = stt_lat[i]
        point1lng = stt_lng[i]
        r= 6371000 #earth Radius
        phi1 = point1lat*(math.pi/180)
        phi2 = point2lat*(math.pi/180)
        deltaphi=(point2lat - point1lat)*(math.pi/180)
        deltaalpha = (point2lng - point1lng)*(math.pi/180)
        a = math.sin(deltaphi/2)*math.sin(deltaphi/2) + math.cos(phi1)*math.cos(phi2)*math.sin(deltaalpha)*math.sin(deltaalpha)
        c = 2* math.atan2(math.sqrt(a),math.sqrt(1-a))
        d = r*c
        distances.append(d)
    total.update({ref_name[j]:distances})

total.update({"State Code":stt_code})
distance_df=pd.DataFrame.from_dict(total)
distance_df.columns

In [None]:
distance_df.set_index(["State Code"], inplace = True, 
                    append = True, drop = True)
distance_df.head()

In [None]:
mean_distance = distance_df.mean(axis=1)/1000
min_distance = distance_df.min(axis=1)/1000
state_refinery_distance = pd.DataFrame({"Mean Distance (km)":mean_distance,
                                        "Min Distance (km)":min_distance})

state_refinery_distance =state_refinery_distance.reset_index()
state_refinery_distance = state_refinery_distance.drop(columns = "level_0")
state_refinery_distance.head()

### Auto per State Information

In [None]:
autos_df = pd.read_csv("Resources/autosbystate.csv")
autos_df = autos_df.rename(columns={"STATE": "State"})

In [None]:
#Calculate Total # of Motor vehicles

autos_df['TOTAL MOTOR VEHICLES'] = autos_df['TOTAL AUTOMOBILES'] + autos_df['TOTAL BUSES'] + autos_df['TOTAL TRUCKS']+ autos_df['TOTAL MOTORCYCLES']

In [None]:
autos_df.head()

### Merging Information

In [None]:
daily_gasoline_df = daily_gasoline_df.merge(state_df,left_on="State",right_on="State")
daily_gasoline_df = daily_gasoline_df.merge(gdp_df,on="State",how="left")
daily_gasoline_df = daily_gasoline_df.merge(state_refinery_distance,on="State Code",how="left")
daily_gasoline_df = daily_gasoline_df.merge(refineries_perState,on="State Code",how="left")
daily_gasoline_df = daily_gasoline_df.rename(columns={"Refinery": "Refinery Count"})
daily_gasoline_df = daily_gasoline_df.merge(autos_df,on="State",how="left")
daily_gasoline_df.columns

In [None]:
daily_gasoline_df.head()

### Exporting all information

In [None]:
daily_gasoline_df.to_csv("Data/working_data.csv",index=False)
refinery_place_df.to_csv("Data/refinery.csv",index=False)
weekprices.to_csv("Data/historicalGasPrice.csv",index=False)