# Gas Price Data
This notebook uses the EIA (US Energy Informaton Association) data to populate a data frame containing monthly gas prices for a sample of states for the year 2019 (query_year is a variable and can easily be changed)

In [379]:
### Dependencies and Keys
import requests
import json
import pandas as pd
import numpy as np
import cufflinks as cf
import plotly.graph_objects as go
import plotly.express as px 
from scipy.stats import linregress

from datetime import date
import datetime

# Import API key
from api_keys import eia_key
from api_keys import g_key


## EIA API Query Browser
#### https://www.eia.gov/developer/
#### https://www.eia.gov/opendata/
API CALL TO USE : http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=240690

Children Categories:
* by Area
* by Price

##### EIA Stores the Date as a 6 character string YYYYMM
* The library datetime is used to convert the string to a date

In [413]:
# use pd.to_datetime to reformat the date
prices_df['Date'] = pd.to_datetime(pd.Series(prices_df['Date']), format="%Y%m")
prices_df.head(2)

Unnamed: 0,Date,Average Price
2,2019-12-01,3.163
3,2019-11-01,3.348


#### Get only data for 2019

In [416]:
start_date = datetime.datetime(2019, 1, 1)
end_date   = datetime.datetime(2019, 12, 31)

prices_df = prices_df[prices_df['Date'] > start_date]
prices_df = prices_df[prices_df['Date'] < end_date]
prices_df.head(2)

Unnamed: 0,Date,Average Price
2,2019-12-01,3.163
3,2019-11-01,3.348


### EIA API Queries 

The EIA Tracks prices for the following Regions, States and Cities in the Unites states
* U.S.
* East Coast (PADD 1)
* New England (PADD 1A)
* Central Atlantic (PADD 1B)
* Lower Atlantic (PADD 1C)
* Midwest (PADD 2)
* Gulf Coast (PADD 3)
* Rocky Mountain (PADD 4)
* West Coast (PADD 5)
* West Coast less California
* _California_
* _Colorado_
* _Florida_
* _Massachusetts_
* _Minnesota_
* _New York_
* _Ohio_
* _Texas_
* _Washington_
* Boston
* Chicago
* Cleveland
* Denver
* Houston
* Los Angeles
* Miami
* New York City
* San Francisco
* Seattle

This Notebook applies queries by state so we will only use the state data for the 9 states in italic

###### Set up the States to be Queried

In [417]:
eia_states = ['CA', 'CO', 'FL', 'MA', 'MN', 'NY', 'OH', 'TX', 'WA']

#### Average gas prices over all of USA

#### Loop through API Queries

In [418]:
# # states
by_city_df = pd.DataFrame(columns=['Date', 'Average Price'])

for state in eia_states:
    #print(state)
    series_id = f'PET.EMM_EPM0_PTE_S{state}_DPG.M' 
#     print(series_id)

    url = f"http://api.eia.gov/series/?api_key={eia_key}&series_id={series_id}"
    #print(url)
    response = requests.get(url).json()
#     #print(response)
#     #print(response.keys())   # response and series

    chart_title = response['series'][0]['name']
    chart_data  = response['series'][0]['data']
    #convert to dataframe
    prices_df = pd.DataFrame(chart_data, columns=['Date', 'Average Price'])
#   Convert to Date - overwriting date string
    prices_df['Date'] = pd.to_datetime(pd.Series(prices_df['Date']), format="%Y%m")
    prices_df = prices_df[prices_df['Date']>=start_date]
    prices_df = prices_df[prices_df['Date']<end_date]

    by_city_df  = by_city_df.merge(prices_df, on='Date',how='outer')
    by_city_df  = by_city_df.rename(columns={"Average Price_y": state})
    by_city_df  = by_city_df.rename(columns={"Average Price_x": state})
    by_city_df  = by_city_df.rename(columns={"Average Price": state})

by_city_df=by_city_df.dropna(how='all', axis=1)

by_city_df.head(2)

Unnamed: 0,Date,CA,CO,FL,MA,MN,NY,OH,TX,WA
0,2019-12-01,3.61,2.843,2.496,2.627,2.418,2.67,2.61,2.285,3.163
1,2019-11-01,3.944,2.89,2.426,2.631,2.486,2.675,2.529,2.299,3.348


#### A line chart to inspect the data

In [420]:
df = by_city_df
labels = df.columns[1:,]
fig = px.line(df, x="Date", y="CA", 
              title=f"Average Montly Gas Price by State ({start_date.date()} to {end_date.date()}) ")
for col in df.columns[1:,]:
    #print(col)
    fig.add_scatter(x=df['Date'],y=df[col], mode="lines", name=col)
y_title = 'Price in USD ($)'
fig.update_yaxes(title_text=y_title)

fig

#### END EIA Queries and Gas PRice Data Gathering

# ANALYSIS: Do Monthly Gas Prices Influence The Number of National Park Visitors? 

#### Approach
* Read in csv file from previous notebook which gathered visitor data from NPS and NPS STAT API
* Pull the visitor data for the states of interest (eia_states)
* Visually explore the data through a series of plots using plotly
* Plot visitor data against gas see if correlation

###### Some Preliminary Work to make things easier later

In [13]:
# Read in the csv containing state centroid coordinates = useful for mapping
centroids = pd.read_csv("./Resources/state_centroids.csv")
centroids.head(3)

Unnamed: 0,State,Latitude,Longitude
0,Alabama,32.7794,-86.8287
1,Alaska,64.0685,-152.2782
2,Arizona,34.2744,-111.6602


In [269]:
# Map State name to State Code
# create a dictionary of states and abbreviations
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Palau': 'PW',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}
### Make a dict the other way around if needed
# abbrev_us_state = dict(map(reversed, us_state_abbrev.items()))

In [421]:
# Replace centroids full State with its abbreviation 
centroids = centroids.replace({"State": us_state_abbrev})
#centroids

In [436]:
gas_states_df = centroids[centroids['StateCode'].isin(eia_states)].reset_index()
gas_states_df = gas_states_df.drop('index', axis=1)
#gas_states_df

In [427]:
## Get the parks in the states of interest
parks_data = pd.read_csv("./Resources/state_park_visitor_v_pop_by_month.csv")
parks_data.head(5)

Unnamed: 0,UnitCode,Month,Visitors,StateCode,SpansStates,Park,Lat,Lon,Name,Population,Latitude,Longitude
0,HOBE,0,96,AL,1.0,Horseshoe Bend National Military Park,32.976689,-85.734136,Alabama,4850771.0,32.7794,-86.8287
1,HOBE,1,1961,AL,1.0,Horseshoe Bend National Military Park,32.976689,-85.734136,Alabama,4850771.0,32.7794,-86.8287
2,HOBE,2,6644,AL,1.0,Horseshoe Bend National Military Park,32.976689,-85.734136,Alabama,4850771.0,32.7794,-86.8287
3,HOBE,3,3756,AL,1.0,Horseshoe Bend National Military Park,32.976689,-85.734136,Alabama,4850771.0,32.7794,-86.8287
4,HOBE,4,6528,AL,1.0,Horseshoe Bend National Military Park,32.976689,-85.734136,Alabama,4850771.0,32.7794,-86.8287


#### Just take the EIA States for Analysis

In [246]:
# Remove parks in states we are not using 
park_states_df = parks_data[parks_data['StateCode'].isin(eia_states)].reset_index()
park_states_df[park_states_df['UnitCode'] == 'YOSE']
park_states_df = park_states_df.drop('index', axis=1)
park_states_df = park_states_df.drop_duplicates()
# park_states_df.tail(50)
#park_states_df[park_states_df['UnitCode'] == 'YOSE']


# Visually Inspecting Data

In [161]:
df.columns

Index(['name', 'lat', 'lon', 'pop', 'text'], dtype='object')

In [429]:
# PLOT VISITOR DATA
df = park_states_df[['Park', 'Visitors', 'Lat', 'Lon', 'Population']]
df = df.rename(columns={'Park':"name", 'Population':'pop', 'Lat':'lat', 'Lon':'lon'})
df['MVisitors'] = (df['Visitors']/1000)

df['text'] = df['name'] + '<br>National Park visitors ' + (round(df['MVisitors']).astype(str))+' thousand'
df.head()

colors = ["royalblue","crimson","lightseagreen","orange","lightgrey"]
cities = []

fig = go.Figure()
   
fig = go.Figure(data=go.Scattergeo(
        lon = df['lon'],
        lat = df['lat'],
        text = df['text'],
        mode = 'markers',
        marker_color = df['lat'],
        marker = dict(
            size = df['MVisitors'],
            #color = df['pop'],
            color = colors[3],
            line_color='rgb(40,40,40)',
            line_width=0.5,
            sizemode = 'area'),
           name = '{}'.format(df['name'])))
  
    
fig.update_layout(
        title_text = 'National Park Visitors in Sample States (2019)',
        #showlegend = True,
        geo = dict(
            scope = 'usa',
            landcolor = 'rgb(217, 217, 217)',
        )
    )

fig.show()

In [120]:
# Save the Image
fig.write_image("./images/visitors_select_states_map.png")

In [430]:
# PLOT VISITOR V POP DATA
df = park_states_df[['Park', 'Visitors', 'Lat', 'Lon', 'Population']]
df = df.rename(columns={'Park':"name", 'Population':'pop', 'Lat':'lat', 'Lon':'lon'})
df['Percentage Visitors'] = (df['Visitors']/df['pop'])*100

df['text'] = df['name'] + '<br>National Park visitors ' + (round(df['Percentage Visitors']).astype(str))+' %'
df.head()

colors = ["royalblue","crimson","lightseagreen","orange","lightgrey"]
cities = []

fig = go.Figure()
   
fig = go.Figure(data=go.Scattergeo(
        lon = df['lon'],
        lat = df['lat'],
        text = df['text'],
        mode = 'markers',
        marker_color = df['pop'],
        marker = dict(
            size = df['Percentage Visitors'],
            #color = df['pop'],
            color = colors[3],
            line_color='rgb(40,40,40)',
            line_width=0.5,
            sizemode = 'area'),
           name = '{}'.format(df['name'])))
  
    
fig.update_layout(
        title_text = 'Percentage of State Population that are National Park Visitors for select states (2019)',
        #showlegend = True,
        geo = dict(
            scope = 'usa',
            landcolor = 'rgb(217, 217, 217)',
        )
    )

fig.show()


In [431]:
# Save the Image
fig.write_image("./images/visitors_v_population_select_states_map.png")

In [138]:
#by_city_df

Unnamed: 0,Date,CA,CO,FL,MA,MN,NY,OH,TX,WA
0,2019-12-01,3.61,2.843,2.496,2.627,2.418,2.67,2.61,2.285,3.163
1,2019-11-01,3.944,2.89,2.426,2.631,2.486,2.675,2.529,2.299,3.348
2,2019-10-01,4.116,2.822,2.46,2.638,2.52,2.676,2.563,2.324,3.294
3,2019-09-01,3.687,2.692,2.472,2.661,2.533,2.728,2.595,2.358,3.134
4,2019-08-01,3.555,2.608,2.451,2.746,2.572,2.822,2.631,2.405,3.17
5,2019-07-01,3.668,2.733,2.641,2.779,2.622,2.868,2.754,2.52,3.242
6,2019-06-01,3.787,2.876,2.542,2.791,2.64,2.86,2.61,2.437,3.392
7,2019-05-01,4.019,2.92,2.709,2.894,2.745,2.967,2.715,2.602,3.513
8,2019-04-01,3.894,2.729,2.769,2.725,2.714,2.84,2.675,2.589,3.313
9,2019-03-01,3.342,2.398,2.577,2.516,2.478,2.622,2.461,2.33,2.905


#### Get Mean Gas Prices by State

In [439]:
cdf = [by_city_df.mean()]
gas_states_df['Mean Gas']=''  # Just create empty column
gas_states_df
for s in range(len(gas_states_df['StateCode'])):
    gas_states_df['Mean Gas'][s] = by_city_df.mean()[s]
gas_states_df.head(2)

Unnamed: 0,StateCode,Latitude,Longitude,Mean Gas
0,CA,37.1841,-119.4696,3.67417
1,CO,38.9972,-105.5478,2.65267


In [457]:
# PLOT GAS DATA
df = gas_states_df[['StateCode', 'Latitude', 'Longitude', 'Mean Gas']]
df = df.rename(columns={'StateCode':"name", 'Mean Gas':'pop', 'Latitude':'lat', 'Longitude':'lon'})

df['text'] = df['name'] + '<br>Average Annual Gas Prices ' + (df['pop'].astype(str))+' $'
df.head()

colors = ["royalblue","crimson","lightseagreen","orange","lightgrey"]
cities = []
scale = 10

fig = go.Figure()
   
fig = go.Figure(data=go.Scattergeo(
        lon = df['lon'],
        lat = df['lat'],
        text = df['text'],
        mode = 'markers',
        marker_color = df['lat'],
        marker = dict(
            size  = df['pop'][1]*scale,
            color = df['pop'],
            #color = colors[3],
            line_color='rgb(40,40,40)',
            line_width=0.5,
            sizemode = 'area'),
           name = '{}'.format(df['name'])))
  
    
fig.update_layout(
        title_text = 'Average Gas Prices in Sample States (2019)',
        #showlegend = True,
        geo = dict(
            scope = 'usa',
            landcolor = 'rgb(217, 217, 217)',
        )
    )

fig.show()


In [203]:
# Save the Image
fig.write_image("./images/average_gas_price_by_state_2019.png")

### Plot Yearly Visitors

In [449]:
yearly_visitors = park_states_df.groupby(['StateCode'], as_index=False).sum()
#yearly_visitors

In [450]:
### Yearly Visitors for Sample States
yearly_visitors = park_states_df.groupby(['StateCode'], as_index=False)['Visitors'].sum()

yearly_visitors['lon'] = gas_states_df['Longitude']
yearly_visitors['lat'] = gas_states_df['Latitude']
#gas_states_df
yearly_visitors['Mean Gas'] = gas_states_df['Mean Gas']
yearly_visitors.head(2)

Unnamed: 0,StateCode,Visitors,lon,lat,Mean Gas
0,CA,37742509,-119.4696,37.1841,3.67417
1,CO,7858765,-105.5478,38.9972,2.65267


In [451]:
# PLOT VISITOR DATA
df = yearly_visitors[['StateCode', 'lat', 'lon', 'Visitors']]
df = df.rename(columns={'StateCode':"name", 'Visitors':'pop'})

df['text'] = df['name'] + '<br>Total Annual Visitors ' + (round((df['pop']/1000)).astype(str))+' visitors (in thousands)'
df.head()

colors = ["royalblue","crimson","lightseagreen","orange","lightgrey"]
cities = []
scale = .0001

fig = go.Figure()
   
fig = go.Figure(data=go.Scattergeo(
        lon = df['lon'],
        lat = df['lat'],
        text = df['text'],
        mode = 'markers',
        marker_color = df['lat'],
        marker = dict(
            size  = df['pop'] * scale,
            color = df['pop'],
            #color = colors[3],
            line_color='rgb(40,40,40)',
            line_width=0.5,
            sizemode = 'area'),
           name = '{}'.format(df['name'])))
  
    
fig.update_layout(
        title_text = 'Annual Average Park Visitors in Sample States (2019)',
        #showlegend = True,
        geo = dict(
            scope = 'usa',
            landcolor = 'rgb(217, 217, 217)',
        )
    )

fig.show()


In [458]:
# Save the Image
fig.write_image("./images/average_visitor_number_by_state_2019.png")

In [461]:
#yearly_visitors

# Bar Charts

In [460]:
df = yearly_visitors
fig = go.Figure(data=[
    go.Bar(name='Gas Pices (USD)', x=df['StateCode'], y=df['Mean Gas']),
    go.Bar(name='Visitors (Million)', x=df['StateCode'], y=df['Visitors']/1000000)
])
x_title = "Select States"
y_title = "Gas($) Visitor(Million)"
fig.update_xaxes(title_text=x_title)
fig.update_yaxes(title_text=y_title)

fig.update_layout(
    title={
        'text': "Average Gas Price and Average Park Visitors (2019)",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
# Change the bar mode           
fig.update_layout(barmode='group')
fig.show()

In [462]:
# Save the Image
fig.write_image("./images/BAR:ave_visitor_number_ave_gas_by_state_2019.png")

In [463]:
#by_city_df

In [464]:
gdf = by_city_df
fig = go.Figure(data=[
    go.Bar(name='CA', x=gdf['Date'], y=gdf['CA']),
    go.Bar(name='CO', x=gdf['Date'], y=gdf['CO']),
    go.Bar(name='FL', x=gdf['Date'], y=gdf['FL']),
    go.Bar(name='MA', x=gdf['Date'], y=gdf['MA']),
    go.Bar(name='MN', x=gdf['Date'], y=gdf['MN']),
    go.Bar(name='OH', x=gdf['Date'], y=gdf['OH']),
    go.Bar(name='TX', x=gdf['Date'], y=gdf['TX']),
    go.Bar(name='WA', x=gdf['Date'], y=gdf['WA'])
])
x_title = "Month"
y_title = "Gas Price ($)"
fig.update_xaxes(title_text=x_title)
fig.update_yaxes(title_text=y_title)

fig.update_layout(
    title={
        'text': "Average Gas Price by Select States (2019)",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
# Change the bar mode           
fig.update_layout(barmode='group')
fig.show()

In [465]:
fig.write_image("./images/BAR:ave_gas_by_state_2019.png")

# Plot by MONTH

In [466]:
park_states_df
monthly_visitors = park_states_df.groupby(['StateCode', 'Month'], as_index=False)['Visitors'].sum()
monthly_visitors.head(2)

Unnamed: 0,StateCode,Month,Visitors
0,CA,0,1710816
1,CA,1,2276380


In [467]:
## Re read as manipulated earlier
centroids = pd.read_csv("./Resources/state_centroids.csv")
centroids = centroids.rename(columns={'State':'StateCode'})
centroids = centroids.replace({"StateCode": us_state_abbrev})
#centroids

In [468]:
monthly_visitors
monthly_visitors = monthly_visitors.merge(centroids, on='StateCode')
#monthly_visitors

In [469]:
# Function to get visitor data given statecode
def get_visitors(state):
    mask = monthly_visitors['StateCode'] == state
    return monthly_visitors[mask]['Visitors']


In [470]:
months={
    0: 'January',    1: 'February',   2: 'March',    3:'April',    4:'May',      5:'June', 
    6:'July',        7:'August',      8:'September', 9:'October', 10:'November', 11:'December'}

monthly_visitors = monthly_visitors.replace({"Month": months})
#monthly_visitors

In [471]:
# Make a Bar Chart of Monthly Visitors to parks in each state
df = monthly_visitors

fig = go.Figure(data=[
    go.Bar(name='CA', x=df['Month'], y=get_visitors('CA')),
    go.Bar(name='CO', x=df['Month'], y=get_visitors('CO')),
    go.Bar(name='FL', x=df['Month'], y=get_visitors('FL')),
    go.Bar(name='MA', x=df['Month'], y=get_visitors('MA')),
    go.Bar(name='MN', x=df['Month'], y=get_visitors('MN')),
    go.Bar(name='OH', x=df['Month'], y=get_visitors('OH')),
    go.Bar(name='TX', x=df['Month'], y=get_visitors('TX')),
    go.Bar(name='WA', x=df['Month'], y=get_visitors('WA'))
])
x_title = "Month"
y_title = "Average Park Visitor (Millions)"
fig.update_xaxes(title_text=x_title)
fig.update_yaxes(title_text=y_title)

fig.update_layout(
    title={
        'text': "Average Park Visitor by Select States (2019)",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
# Change the bar mode           
fig.update_layout(barmode='group')
fig.show()

In [472]:
fig.write_image("./images/BAR:ave_visitor_by_state_2019.png")

# Bar Charts By Month

In [483]:
# loop around each state making the figure and saving the image
for state in eia_states:
    fig = go.Figure(data=[
        go.Bar(name="park visitors (M)", x=df['Month'], y=get_visitors(state)/1000000),
        go.Bar(name="gas price ($)", x=df['Month'], y=gdf[state])
    ])

    x_title = "Month"
    y_title = "Gas ($) Average Park Visitor (Millions)"
    fig.update_xaxes(title_text=x_title)
    fig.update_yaxes(title_text=y_title)

    fig.update_layout(
        title={
            'text': f"Average Park Visitor V Average Gas Prices for {state} (2019)",
            'y':0.9,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'})
    # Change the bar mode           
    fig.update_layout(barmode='group')
    fig.show()
    fig.write_image(f"./images/BAR:{state}:visitor_gas_2019.png")

# Scatter Plots By Month


In [475]:
df = park_states_df.groupby(['StateCode', 'Month'], as_index=False)['Visitors'].sum()

In [484]:
state = 'CA'
for state in eia_states:
    fig = go.Figure(data=[
        go.Scatter(name="park visitors (M)", x=df['Month'], y=get_visitors(state)/1000000,  mode='markers', marker_size=get_visitors(state)/100000),
        go.Scatter(name="gas price ($)", x=df['Month'], y=gdf[state],  mode='markers', marker_size=gdf[state]*10)
    ])

    x_title = "Month"
    y_title = "Gas ($) Average Park Visitor (Millions)"
    fig.update_xaxes(title_text=x_title)
    fig.update_yaxes(title_text=y_title)

    fig.update_layout(
        title={
            'text': f"Average Park Visitor V Average Gas Prices for {state} (2019)",
            'y':0.9,
            'x':0.4,
            'xanchor': 'center',
            'yanchor': 'top'})
    # Change the bar mode           
    fig.update_layout(barmode='group')
    fig.show()
    fig.write_image(f"./images/SCT:{state}:visitor_gas_2019.png")

# Linear Regression

In [481]:
def plot_linear_regression(x_values, y_values, x_label, y_label):    

    # Do a line regression on x and y values to calculate values for the line
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    
    # Longer print message - can uncomment if would prefer longer message
#     print('The correlation between ' + x_label + 
#       '\033[1m and \033[0m' +
#       'the ' + y_label +
#       '\033[1m is \033[0m' + 
#        f'\033[36m {rvalue:2.2f} \033[0m')


    r_squared = rvalue * rvalue;
    print(f'{y_label} V {x_label}')
    print('  The r-value'   + '\033[1m is \033[0m' + f'\033[36m {rvalue:2.6f} \033[0m')
    print('The r-squared' + '\033[1m is \033[0m' + f'\033[36m {r_squared:2.6f} \033[0m')

#     print(x_values)
#     print(regress_values)
    line_eq = ' y= '+ str(round(slope,2)) + 'x + ' + str(round(intercept,2))
    fig.add_scatter(x=x_values,y=regress_values, mode="lines", name=line_eq)
 

In [482]:
for state in eia_states:
    fig = go.Figure(data=[
        go.Scatter(name="park visitors v gas prices", x=gdf[state], y=get_visitors(state)/1000000,  mode='markers', marker_size=get_visitors(state)/100000),
        #go.Scatter(name="gas price ($)", x=df['Month'], y=gdf[state],  mode='markers', marker_size=gdf[state]*10)
    ])

    x_title = "Month"
    y_title = "Gas ($) Average Park Visitor (Millions)"
    fig.update_xaxes(title_text=x_title)
    fig.update_yaxes(title_text=y_title)

    fig.update_layout(
        title={
            'text': f"{state}: Average Park Visitor V Average Gas Prices for {state} (2019)",
            'y':0.9,
            'x':0.4,
            'xanchor': 'center',
            'yanchor': 'top'})
    # Change the bar mode           
    fig.update_layout(barmode='group')
    
    x = gdf[state]
    y=get_visitors(state)/1000000
    plot_linear_regression(x, y, state+':gas', state+":visitors")
    
    fig.show()
    fig.write_image(f"./images/REGRESSION:{state}:visitor_gas_2019.png")

CA:visitors V CA:gas
  The r-value[1m is [0m[36m 0.350875 [0m
The r-squared[1m is [0m[36m 0.123114 [0m


CO:visitors V CO:gas
  The r-value[1m is [0m[36m 0.402356 [0m
The r-squared[1m is [0m[36m 0.161890 [0m


FL:visitors V FL:gas
  The r-value[1m is [0m[36m -0.006072 [0m
The r-squared[1m is [0m[36m 0.000037 [0m


MA:visitors V MA:gas
  The r-value[1m is [0m[36m 0.701042 [0m
The r-squared[1m is [0m[36m 0.491460 [0m


MN:visitors V MN:gas
  The r-value[1m is [0m[36m 0.702308 [0m
The r-squared[1m is [0m[36m 0.493236 [0m


NY:visitors V NY:gas
  The r-value[1m is [0m[36m 0.690984 [0m
The r-squared[1m is [0m[36m 0.477459 [0m


OH:visitors V OH:gas
  The r-value[1m is [0m[36m 0.545505 [0m
The r-squared[1m is [0m[36m 0.297576 [0m


TX:visitors V TX:gas
  The r-value[1m is [0m[36m 0.589044 [0m
The r-squared[1m is [0m[36m 0.346973 [0m


WA:visitors V WA:gas
  The r-value[1m is [0m[36m 0.614378 [0m
The r-squared[1m is [0m[36m 0.377461 [0m


# End of Notebook