In [38]:
%run -i helper.py

<Figure size 432x288 with 0 Axes>

In [39]:
# Initial imports
import os
import requests
import pandas as pd
import numpy as np
import datetime as dt
from pathlib import Path
import yfinance as yf
from helper import *
from dotenv import load_dotenv
import json
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
%matplotlib inline

import panel as pn
from ipywidgets import interact
import ipywidgets as widgets
import plotly.express as px
from sklearn.linear_model import LinearRegression
pn.extension('plotly')
import hvplot.pandas
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')



In [40]:
# Load .env enviroment variables
load_dotenv()
eia_api_key = os.getenv("EIA_API_KEY")

# Set Start date and End Date
start_date = "2010-01-01"
end_date = "2014-12-30"


In [41]:
# fetch stock prices between start and end date using yfinance and convert to data frame
df_historic = yfinance_tickers_data("NG=F", start_date, end_date)
df_historic.head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2010-01-04,5.884
2010-01-05,5.637
2010-01-06,6.009
2010-01-07,5.806
2010-01-08,5.749


In [42]:
# Read Natural Gas Storage Data
# set storage data file path
df_storage_file = Path(r"Data\NG_STOR_WKLY_S1_W.csv")
# Reading Storage data and convert it to dataframe
df_storage_data = pd.read_csv(df_storage_file,index_col="Date", infer_datetime_format=True, parse_dates=True)

df_storage_data.head()

Unnamed: 0_level_0,48 States,East Region,Midwest Region,Mountain Region,Pacific Region,South Central,Salt South Central,Non Salt South Central
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-01,3117,769,900,195,268,985,159,826
2010-01-08,2850,703,820,185,257,886,123,763
2010-01-15,2607,642,750,176,246,793,91,702
2010-01-22,2521,616,710,171,235,789,102,687
2010-01-29,2406,582,661,164,221,779,108,671


In [43]:
# aggregate the weekly storage data into monthly, by averaging by month
df_storage_monthly_avg = format_strorage_monthly(df_storage_data)
df_storage_monthly_avg.head()

Unnamed: 0_level_0,48 States,East Region,Midwest Region,Mountain Region,Pacific Region,South Central,Salt South Central,Non Salt South Central,DATE
YearMonth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
201001,2700.2,662.4,768.2,178.2,245.4,846.4,116.6,729.8,2010-01-01
201002,1957.75,452.25,530.5,144.75,196.25,633.75,80.25,553.5,2010-02-01
201003,1626.25,340.0,421.0,127.0,180.5,557.75,75.75,482.0,2010-03-01
201004,1831.6,376.0,445.0,130.6,202.0,678.2,118.4,559.8,2010-04-01
201005,2220.25,479.0,534.75,151.0,247.25,808.5,145.5,663.0,2010-05-01


In [44]:
# Dictionary with ng comsumption settings, weather file names, and storage column names by US and states
region_info =  { "US" :
                {
                   "consumption" :  {"residential" : "NG.N3010US2.M", "industrial" : "NG.N3035US2.M"},
                   "weather_file" : r"",
                    "storage" :  "48 States"
                },  
    
                'PA' : 
                {
                   "consumption" :  {"residential" : "NG.N3010PA2.M", "industrial" : "NG.N3035PA2.M"},
                   "weather_file" : r"Data\Pittsburg_Area_Temp_2010-2014.csv",
                    "storage" :  r"East Region"
                },  
                 'CT' : {
                   "consumption" :  {"residential" : "NG.N3010CT2.M", "industrial" : "NG.N3035CT2.M"},
                    "weather_file": r"Data\Hartford_Area_Temp_2010-2014.csv",
                    "storage" : r"East Region"
                },
                 'IL' : {
                   "consumption" :  {"residential" : "NG.N3010IL2.M", "industrial" : "NG.N3035IL2.M"},
                    "weather_file": r"Data\Chicago_Area_Temp_2010-2014.csv",
                    "storage" : r"Midwest Region"
                },
                 'TX' : {
                   "consumption" :  {"residential" : "NG.N3010TX2.M", "industrial" : "NG.N3035TX2.M"},
                    "weather_file": r"Data\Dallas_Area_Temp_2010-2014.csv",
                    "storage" : r"South Central"
                },
                 'CA' : {
                   "consumption" :  {"residential" : "NG.N3010CA2.M", "industrial" : "NG.N3035CA2.M"},
                    "weather_file": r"Data\LosAngelus_Area_Temp_2010-2014.csv",
                    "storage" : r"Pacific Region"
                }                   
                
}

# init dictionary to store dataframes needed in this notebook
region_dfs = {}

#df_price_temperature = pd.DataFrame()

# Loop through the region_info dictionary and fetch data, cleanup and format data into various dataframes
for key, value in region_info.items():
    # init a dictionary for each key
    region_dfs[key] = {}
    
    # Fetch comumption data per region (key) both industrial and residential from EIA
    
    # Set residential series id
    series_id = region_info[key]["consumption"]["residential"]
    # Fetch residential data for region(key) from EIA
    df_industrial = eia_consumption_data_by_series_df(eia_api_key, series_id, key, "Industrial", start_date, end_date)

    # Set Industrial series id
    series_id = region_info[key]["consumption"]["industrial"]
    # Fetch Industrial data for region(key) from EIA
    df_res = eia_consumption_data_by_series_df(eia_api_key, series_id, key, "Residential", start_date, end_date)
    
    # Concat both data into a comsumption data frame
    df_comsumption = pd.concat((df_industrial, df_res), join="inner", axis=1 , sort=True).dropna()
    
    
    # Init the Storage data frame
    df_storage = pd.DataFrame()
    # Slice the regional storage data
    df_storage["Storage"] = df_storage_monthly_avg[region_info[key]["storage"]]
    
    
    # If region is US
    if key == "US":
        # Aggregrate the daily stock closing prices only as US has no weather into into monthly 
        df_avg_price = agg_stock_closing_price_monthly(df_historic)
        # Concat the stock price monly, conspumtion only and  storage monthly
        region_dfs[key]["combined"]  = pd.concat( [df_avg_price, df_comsumption, df_storage] ,
                                            join="inner", axis=1 , sort=True).dropna()

    else:
        # Get weather for region (using a city to represent a region/state)
        df_weather = weather_data(key, region_info[key]["weather_file"])
        
        # Set the weather data  for region
        #region_dfs[key]["weather"] = df_weather
        
        region_dfs[key]["price_temperature"] = pd.concat((df_historic, df_weather), join="inner", axis=1 , sort=True).dropna()
        # Aggegrate the stock prices and tempretaure into monthly values
        df_avg_price_temp =  agg_price_temperature_monthly(region_dfs[key]["price_temperature"])
        
        # Set the combined aggregated dataframes into one
        region_dfs[key]["combined"]  = pd.concat( [df_avg_price_temp, df_comsumption, df_storage ] ,
                                            join="inner", axis=1 , sort=True).dropna()
        


In [45]:
#region_dfs["TX"]["price_temperature"].reset_index()

In [64]:
# Shift data to try to get a relationship
bShift = True

# Plot the closing price vs temperature
def plot_price_temperature(state):
    
    # Get a copy data for the state
    df = region_dfs[state]["price_temperature"].copy()
    # reset index
    df = df.reset_index()
    # If shift, shift by -90 days
    if bShift:
        df["Avg Temp"] = df["Avg Temp"].shift(periods=-90)
    
    # make the plot with secondary axis
    fig = make_subplots(rows=1, cols=1,
                    specs=[[{"secondary_y": True}],
                           ],  subplot_titles = ["Relationship between NG Closing Prices & Temperature"])
    # 1st Plot closing price vs time
    fig.add_trace(
        go.Scatter( x=df["Date"], y=df["Close"], name="Closing Price", line=dict(color="#0000ff")),
        row=1, col=1, secondary_y=False
    )
    # 2nd Plot average temperarure as secondary y axes, ploted on the same plot
    fig.add_trace(
        go.Scatter( x=df["Date"], y=df["Avg Temp"], name="Average Temperature",  line=dict(color="#ff0000")),
        row=1, col=1, secondary_y=True, )

   
    
    # Set x-axis title
    fig.update_xaxes(title_text="Date")

    # Set y-axes titles
    fig.update_yaxes(title_text="Closing Price ($)", secondary_y=False)
    fig.update_yaxes(title_text="Temperature °F", secondary_y=True)
    
    # Set layout properties
    fig.update_layout(
    plot_bgcolor = "rgb(255,255,255)",        
    autosize=False,
    width=1000,
    height=500,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    )
    )
    # format x-axes
    fig.update_xaxes(tickangle=90,
                 tickmode = 'array',
                 tickvals = df['Date'][0::28],
                 tickformat = '%m-%Y', showgrid= False, ticks="inside", showline=True, linewidth=2, linecolor='black')
    #format y-axes
    fig.update_yaxes( showgrid= False, ticks="inside", showline=True, linewidth=2, linecolor='black')

    # return the figure
    return  fig

# Create the intercat panel for the plot
interact_panel = pn.interact(plot_price_temperature,state=list(region_info.keys())[-(len(region_info.keys()) - 1):]  )

interact_panel



In [65]:
# Shift data to try to get a relationship
bShift = True

# Plot storage vs temperature
def plot_storage_temperature(state):
    # Fetch a copy of data for the state
    df = region_dfs[state]["combined"].copy()
    # reset index
    df = df.reset_index()
    
    # Shift temperarure by -3 months
    if bShift:
        df["Avg Temp"] = df["Avg Temp"].shift(periods=-3)
    
    # Create a single subplot
    fig = make_subplots(rows=1, cols=1,
                    specs=[[{"secondary_y": True}],
                           ],  subplot_titles = ["Relationship between NG Storage & Temperature"])
    
    # create the ave temp vs date - 1st plot on secondary y axis
    fig.add_trace(
        go.Scatter( x=df["DATE"], y=df["Avg Temp"], name="Average Temperature",  line=dict(color="#ff0000")),
        row=1, col=1, secondary_y=True )
    # create the storage vs date - 2nd plot on primary y axis
    fig.add_trace(
        go.Scatter( x=df["DATE"], y=df["Storage"], name="Storage", line=dict(color="#0000ff")),
        row=1, col=1, secondary_y=False
    )

    
    # Set x-axis title
    fig.update_xaxes(title_text="Date")

    # Set y-axes titles
    fig.update_yaxes(title_text="Temperature °F", secondary_y=True)
    fig.update_yaxes(title_text="Storage", secondary_y=False)
    
    

    fig.update_layout(
    plot_bgcolor = "rgb(255,255,255)",        
    autosize=False,
    width=1000,
    height=500,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    xaxis_tickformat = '%m-%Y'      
    )
    fig.update_xaxes(tickangle=90,
                 tickmode = 'array',
                 tickvals = df['DATE'][0::2],
                 tickformat = '%m-%Y', showgrid= False, ticks="inside", showline=True, linewidth=2, linecolor='black')
    fig.update_yaxes( showgrid= False, ticks="inside", showline=True, linewidth=2, linecolor='black')

    
    return  fig

interact_panel = pn.interact(plot_storage_temperature,state=list(region_info.keys())[-(len(region_info.keys()) - 1):]  )

interact_panel



In [67]:
# Shift data to try to get a relationship
bShift = True
# Plot storage vs comsumption
def plot_storage_comnsumption(region):
    
    # fetch a copy of the region data
    df = region_dfs[region]["combined"].copy()
    
    # Names of residential and industrial column names
    str_res = "Industrial Consumption"
    str_indus = "Residential Consumption" 
    
    # Set title for both plots
    str_res_title = f"{region} - Relationship between Res. Consumption & Storage"
    str_indus_title = f"{region} - Relationship between  Indus. Consumption & Storage"
    
    # Shift cpomsumption data by -3 months
    df[str_res] = df[str_res].shift(periods=-3)
    df[str_indus] = df[str_indus].shift(periods=-3)
    
    # create 2 subplots one for residential, the other for Industrial
    fig = make_subplots(rows=1, cols=2,
                    specs=[[{"secondary_y": True} , {"secondary_y": True}],
                           ],  subplot_titles = [str_res_title, str_indus_title])

    # 1st Plot - Residential
    fig.add_trace(
        go.Scatter( x=df["DATE"], y=df[str_res], name=str_res,  line=dict(color="#ff0000")),
        row=1, col=1, secondary_y=False, )

    # 1st Plot - Storage on secondaru axes
    fig.add_trace(
        go.Scatter( x=df["DATE"], y=df["Storage"], name="Storage", line=dict(color="#0000ff")),
        row=1, col=1, secondary_y=True
    )

    # 2nd Plot - Industrial Consumption
    fig.add_trace(
        go.Scatter( x=df["DATE"], y=df[str_indus], name=str_indus , line=dict(color="#007f00")),
        row=1, col=2, secondary_y=False)
    # 2nd Plot - Storage Consumption
    fig.add_trace(
        go.Scatter(x=df["DATE"], y =df["Storage"], name="Storage", line=dict(color="#0000ff")),
        row=1, col=2, secondary_y=True
    )
    
    # Set x-axis title
    fig.update_xaxes(title_text="Date")    
    # Set y-axes titles
    fig.update_yaxes(title_text="Milliopn Cubic Feet - Comsumption", secondary_y=False)
    fig.update_yaxes(title_text="Storage", secondary_y=True)
    
    
    # Set Layout
    fig.update_layout(
    plot_bgcolor = "rgb(255,255,255)",
    autosize=True,
    width=1500,
    height=400
    
      
    )
    # update x & axes with ticks, tick format, tick lines
    fig.update_xaxes(tickangle=90, matches='x', title_text="Date", 
                 tickmode = 'array',
                 tickvals = df['DATE'][0::2],
                 tickformat = '%m-%Y', showgrid= False, ticks="inside", showline=True, linewidth=2, linecolor='black')
    fig.update_yaxes( showgrid= False, ticks="inside", showline=True, linewidth=2, linecolor='black')

    
    return  fig


interact_panel = pn.interact(plot_storage_comnsumption,region=list(region_info.keys()) )

interact_panel




In [68]:
bShift = True
# Plot storage vs closing prices
def plot_storage_closingprice(region):
    
    # fetch the data for the region
    df = region_dfs[region]["combined"].copy()  
    df = df.reset_index()
    
    # Shift storage ???
    if bShift:
        df["Storage"] = df["Storage"].shift(periods=-3)
    
    #display(df["Avg Temp"])
    fig = make_subplots(rows=1, cols=1,
                    specs=[[{"secondary_y": True}],
                           ],  subplot_titles = ["Relationship b/w Natural Gas Storage & Closing Price"])
    #display(df)
    # Top left
    fig.add_trace(
        go.Scatter( x=df["DATE"], y=df["Storage"], name="Monthly Storage",  line=dict(color="#ff0000")),
        row=1, col=1, secondary_y=False, )

    fig.add_trace(
        go.Scatter( x=df["DATE"], y=df["Close"], name="Closing Price", line=dict(color="#0000ff")),
        row=1, col=1, secondary_y=True
    )

    
    # Set x-axis title
    fig.update_xaxes(title_text="Date")

    # Set y-axes titles
    fig.update_yaxes(title_text="Storage", secondary_y=False)
    fig.update_yaxes(title_text="Closing Price ($)", secondary_y=True)
    
    

    fig.update_layout(
    plot_bgcolor = "rgb(255,255,255)",        
    autosize=False,
    width=1000,
    height=500,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    xaxis_tickformat = '%m-%Y'      
    )
    fig.update_xaxes(tickangle=90,
                 tickmode = 'array',
                 tickvals = df['DATE'][0::2],
                 tickformat = '%m-%Y', showgrid= False, ticks="inside", showline=True, linewidth=2, linecolor='black')
    fig.update_yaxes( showgrid= False, ticks="inside", showline=True, linewidth=2, linecolor='black')

    
    return  fig

interact_panel = pn.interact(plot_storage_closingprice,region=list(region_info.keys())  )

interact_panel



In [50]:
#region_dfs["CT"]["combined"].head()

In [69]:
# Plot consumption vs price
def plot_comsumption_price_plotly(region):
    #print(region)
    df = region_dfs[region]["combined"].copy()  
    str_res = "Industrial Consumption"
    str_indus = "Residential Consumption" 

    str_res_title = f"{region} - Relationship b/w Resid. Consumption & Closing Prices"
    str_indus_title = f"{region} - Relationship b/w Indus. Consumption & Closing Prices"
    
    
    
    df[str_res] = df[str_res].shift(periods=-3)
    df[str_indus] = df[str_indus].shift(periods=-3)
    
    
    fig = make_subplots(rows=1, cols=2,
                    specs=[[{"secondary_y": True} , {"secondary_y": True}],
                           ],  subplot_titles = [str_res_title, str_indus_title])

    # Top left
    fig.add_trace(
        go.Scatter( x=df["DATE"], y=df[str_res], name=str_res,  line=dict(color="#ff0000")),
        row=1, col=1, secondary_y=False, )

    fig.add_trace(
        go.Scatter( x=df["DATE"], y=df["Close"], name="Closing Price", line=dict(color="#0000ff")),
        row=1, col=1, secondary_y=True
    )

    # Top right
    fig.add_trace(
        go.Scatter( x=df["DATE"], y=df[str_indus], name=str_indus , line=dict(color="#007f00")),
        row=1, col=2, secondary_y=False)

    fig.add_trace(
        go.Scatter(x=df["DATE"], y =df["Close"], name="Closing Price", line=dict(color="#0000ff")),
        row=1, col=2, secondary_y=True
    )
    
    # Set x-axis title
    
    # Set y-axes titles
    fig.update_yaxes(title_text="Milliopn Cubic Feet - Comsumption", secondary_y=False)
    fig.update_yaxes(title_text="Closing Price ($)", secondary_y=True)
    
    
    fig.update_layout(
    plot_bgcolor = "rgb(255,255,255)",
    autosize=True,
    width=1500,
    height=400
    
      
    )
    fig.update_xaxes(tickangle=90, matches='x', title_text="Date", 
                 tickmode = 'array',
                 tickvals = df['DATE'][0::2],
                 tickformat = '%m-%Y', showgrid= False, ticks="inside", showline=True, linewidth=2, linecolor='black')
    fig.update_yaxes( showgrid= False, ticks="inside", showline=True, linewidth=2, linecolor='black')

    
    return  fig

interact_panel = pn.interact(plot_comsumption_price_plotly,region=list(region_info.keys()) )

interact_panel



US


In [70]:
# Plot consumption vs price
def plot_comsumption_temperaure_plotly(region):
    
    df = region_dfs[region]["combined"].copy()  
    str_res = "Industrial Consumption"
    str_indus = "Residential Consumption" 

    str_res_title = f"{region} - Relationship b/w Resid. Consumption & Temperature"
    str_indus_title = f"{region} - Relationship b/w Indus. Consumption & Temperature"
    
    
    
    df[str_res] = df[str_res] #.shift(periods=-3)
    df[str_indus] = df[str_indus] #.shift(periods=-3)
    
    
    fig = make_subplots(rows=1, cols=2,
                    specs=[[{"secondary_y": True} , {"secondary_y": True}],
                           ],  subplot_titles = [str_res_title, str_indus_title])

    # Top left
    fig.add_trace(
        go.Scatter( x=df["DATE"], y=df[str_res], name=str_res,  line=dict(color="#ff0000")),
        row=1, col=1, secondary_y=False, )

    fig.add_trace(
        go.Scatter( x=df["DATE"], y=df["Avg Temp"], name="Temperature", line=dict(color="#0000ff")),
        row=1, col=1, secondary_y=True
    )

    # Top right
    fig.add_trace(
        go.Scatter( x=df["DATE"], y=df[str_indus], name=str_indus , line=dict(color="#007f00")),
        row=1, col=2, secondary_y=False)

    fig.add_trace(
        go.Scatter(x=df["DATE"], y =df["Avg Temp"], name="Temperature", line=dict(color="#0000ff")),
        row=1, col=2, secondary_y=True
    )
    
    # Set x-axis title
    
    # Set y-axes titles
    fig.update_yaxes(title_text="Milliopn Cubic Feet - Comsumption", secondary_y=False)
    fig.update_yaxes(title_text="Temperature °F", secondary_y=True)
    
    
    fig.update_layout(
    plot_bgcolor = "rgb(255,255,255)",
    autosize=True,
    width=1500,
    height=400
    
      
    )
    fig.update_xaxes(tickangle=90, matches='x', title_text="Date", 
                 tickmode = 'array',
                 tickvals = df['DATE'][0::2],
                 tickformat = '%m-%Y', showgrid= False, ticks="inside", showline=True, linewidth=2, linecolor='black')
    fig.update_yaxes( showgrid= False, ticks="inside", showline=True, linewidth=2, linecolor='black')

    
    return  fig

interact_panel = pn.interact(plot_comsumption_price_plotly,region=list(region_info.keys()) )

interact_panel



US


In [53]:
#region_dfs["US"]["combined"] 

In [82]:
# Compute the corelation between all data per region
def compute_correlation(state):
    
    str_res = "Industrial Consumption"
    str_indus = "Residential Consumption" 

    df = region_dfs[state]["combined"].copy() 
    
    df.reset_index(inplace = True)
    df = df.set_index("DATE")
    #display(df)
        
    if state == "US":
        df = df.drop(columns = {"YearMonth" })
        df = df.pct_change().dropna()
        
    else:    
        df = df.drop(columns = {"YearMonth", "Maximum", "Minimum" })
        df = df.pct_change().dropna()
        df["Avg Temp"] = df["Avg Temp"].shift(-3)

        
    df[str_res] = df[str_res].shift(-3)
    df[str_indus] = df[str_indus].shift(-3)
    df["Storage"] = df["Storage"].shift(-3)
    correlation = df.corr()

    return correlation
    


interact_panel2 =pn.interact(compute_correlation,state=list(region_info.keys()) )

pn.Column(interact_panel2[0][0], interact_panel2[1][0], scroll=False, width=500)

In [71]:
def show_plot_correlation(state):
    stitle = f" Corelation for {state}"
    correlation = compute_correlation(state)
    hmap = correlation.hvplot.heatmap( height=500, width=700, colorbar=True , cmap="magma", title=stitle,  clim=(-1,1))
    return hmap

interact_panel =pn.interact(show_plot_correlation,state=list(region_info.keys()) )
interact_panel


In [119]:


def linear_regression(x, y, shift_period = -60):
    # Linear regresssion with temp and natural gas prices
    # Pull out average temp and natural gas closing price from the df
    x = x.shift(periods=shift_period).dropna()
    y = y.iloc[0: len(x)]
    #drop the index from both so the arrays are 1-dimensional
    x.reset_index(drop=True, inplace=True)
    x.reset_index(drop=True, inplace=True)
    #assign independent variable (x) and dependent variable (y) to the proper dataframe
    #reshape the data frames to pandas arrays
    x = np.array(x).reshape(-1,1)
    y = np.array(y)
    #print(X)
    #create the model and fit it to the data
    model = LinearRegression()
    model.fit(x,y)
    r_sq = model.score(x,y)
    #print("Coefficient of determination", r_sq)
    #print("Slope", model.coef_)
    #print("Intercept", model.intercept_)
    y_pred = model.predict(x)
    #print("Predicted response:", y_pred, sep='\n')
    return r_sq, model, y_pred
    
    

for key, value in region_dfs.items():
    if key != "US":
        
        df = region_dfs[key]["price_temperature"].copy()
        print(f"\n----------{key} - Avg Temp - Close ------------------")
        r_sq, linear_model, y_pred = linear_regression(df["Avg Temp"], df["Close"], shift_period =-90)
        print("Coefficient of determination", r_sq)
        print("Slope", linear_model.coef_)
        print("Intercept", linear_model.intercept_)
        print("Predicted response:", y_pred, sep='\n')
        
        linear_reg = pd.DataFrame()
        
        #linear_reg["Coefficient"] =  '3' #r_sq
        
        linear_reg["Slope"] = linear_model.coef_
        linear_reg["Intercept"] = linear_model.intercept_
        linear_reg["Coefficient  of determination"] =  r_sq
        linear_reg = linear_reg[['Coefficient  of determination', 'Intercept', 'Slope']]
        
        predic_response = pd.DataFrame(list(y_pred))
        display(linear_reg)
        
        region_dfs[key]["temp_close_lin_reg"] = linear_reg
        region_dfs[key]["temp_close_predic_response"] = predic_response
        
        
   # else:


print(f"\n\n")
for key, value in region_dfs.items():
    
    if key != "US":
        print(f"\n----------{key} - % Change Avg Temp - Close ------------------")
        df = region_dfs[key]["price_temperature"].copy()
        df_pct_change = df.pct_change().dropna()
        r_sq, linear_model, y_pred = linear_regression(df["Avg Temp"], df["Close"], shift_period =-90)
        r_sq, linear_model, y_pred = linear_regression(df["Avg Temp"], df["Close"], shift_period =-90)
        print("Coefficient of determination", r_sq)
        print("Slope", linear_model.coef_)
        print("Intercept", linear_model.intercept_)
        print("Predicted response:", y_pred, sep='\n')
        
        linear_reg["Slope"] = linear_model.coef_
        linear_reg["Intercept"] = linear_model.intercept_
        linear_reg["Coefficient  of determination"] =  r_sq
        linear_reg = linear_reg[['Coefficient  of determination', 'Intercept', 'Slope']]
        
        predic_response = pd.DataFrame(list(y_pred))
        display(linear_reg)
       
        #linear_reg = linear_reg[['x', 'y', 'a', 'b']]
        
        predic_response = pd.DataFrame(list(y_pred))
       
        
        region_dfs[key]["pct_temp_close_lin_reg"] = linear_reg
        region_dfs[key]["pct_temp_close_predic_response"] = predic_response
        
        


----------PA - Avg Temp - Close ------------------
Coefficient of determination 0.015665634558421626
Slope [0.00515673]
Intercept 3.569197159784058
Predicted response:
[3.88375741 3.92758957 3.85797378 ... 3.77288781 3.72647728 3.70069366]


Unnamed: 0,Coefficient of determination,Intercept,Slope
0,0.015666,3.569197,0.005157



----------CT - Avg Temp - Close ------------------
Coefficient of determination 0.013687743265033636
Slope [0.00518512]
Intercept 3.4885510801923596
Predicted response:
[3.88780517 3.85928702 3.89558285 ... 3.78928793 3.69854836 3.70114092]


Unnamed: 0,Coefficient of determination,Intercept,Slope
0,0.013688,3.488551,0.005185



----------IL - Avg Temp - Close ------------------
Coefficient of determination 0.012772565737196206
Slope [0.00416337]
Intercept 3.628852480691198
Predicted response:
[3.86408264 3.85367423 3.85991928 ... 3.80787721 3.73085494 3.68922128]


Unnamed: 0,Coefficient of determination,Intercept,Slope
0,0.012773,3.628852,0.004163



----------TX - Avg Temp - Close ------------------
Coefficient of determination 0.013687743265033636
Slope [0.00518512]
Intercept 3.4885510801923596
Predicted response:
[3.88780517 3.85928702 3.89558285 ... 3.78928793 3.69854836 3.70114092]


Unnamed: 0,Coefficient of determination,Intercept,Slope
0,0.013688,3.488551,0.005185



----------CA - Avg Temp - Close ------------------
Coefficient of determination 0.007769184706664878
Slope [0.00888296]
Intercept 3.2567766589085805
Predicted response:
[3.85193508 3.83861064 3.79863731 ... 3.72313214 3.71869066 3.71424918]


Unnamed: 0,Coefficient of determination,Intercept,Slope
0,0.007769,3.256777,0.008883






----------PA - % Change Avg Temp - Close ------------------
Coefficient of determination 0.015665634558421626
Slope [0.00515673]
Intercept 3.569197159784058
Predicted response:
[3.88375741 3.92758957 3.85797378 ... 3.77288781 3.72647728 3.70069366]


Unnamed: 0,Coefficient of determination,Intercept,Slope
0,0.015666,3.569197,0.005157



----------CT - % Change Avg Temp - Close ------------------
Coefficient of determination 0.013687743265033636
Slope [0.00518512]
Intercept 3.4885510801923596
Predicted response:
[3.88780517 3.85928702 3.89558285 ... 3.78928793 3.69854836 3.70114092]


Unnamed: 0,Coefficient of determination,Intercept,Slope
0,0.013688,3.488551,0.005185



----------IL - % Change Avg Temp - Close ------------------
Coefficient of determination 0.012772565737196206
Slope [0.00416337]
Intercept 3.628852480691198
Predicted response:
[3.86408264 3.85367423 3.85991928 ... 3.80787721 3.73085494 3.68922128]


Unnamed: 0,Coefficient of determination,Intercept,Slope
0,0.012773,3.628852,0.004163



----------TX - % Change Avg Temp - Close ------------------
Coefficient of determination 0.013687743265033636
Slope [0.00518512]
Intercept 3.4885510801923596
Predicted response:
[3.88780517 3.85928702 3.89558285 ... 3.78928793 3.69854836 3.70114092]


Unnamed: 0,Coefficient of determination,Intercept,Slope
0,0.013688,3.488551,0.005185



----------CA - % Change Avg Temp - Close ------------------
Coefficient of determination 0.007769184706664878
Slope [0.00888296]
Intercept 3.2567766589085805
Predicted response:
[3.85193508 3.83861064 3.79863731 ... 3.72313214 3.71869066 3.71424918]


Unnamed: 0,Coefficient of determination,Intercept,Slope
0,0.007769,3.256777,0.008883


In [102]:
region_dfs["PA"]["temp_close_lin_reg"]


Unnamed: 0,Coefficient of determination,Slope,Intercept
0,,0.005157,3.569197


In [121]:
# Compute the corelation between all data per region
def show_linear_reg_temp_close(state):
    lin_reg = region_dfs[state]["temp_close_lin_reg"].style.hide_index()

    return lin_reg
    


states = list(region_info.keys())[-(len(region_info.keys()) - 1):]  

interact_panel = pn.interact(show_linear_reg_temp_close, state = states  )

#interact_panel
pn.Column(interact_panel[0][0], interact_panel[1][0], scroll=False, width=500)



In [120]:
# Compute the corelation between all data per region
def show_linear_pct_reg_temp_close(state):
    lin_reg = region_dfs[state]["pct_temp_close_lin_reg"].style.hide_index()

    return lin_reg
    


states = list(region_info.keys())[-(len(region_info.keys()) - 1):]  

interact_panel = pn.interact(show_linear_pct_reg_temp_close, state = states  )

#interact_panel
pn.Column(interact_panel[0][0], interact_panel[1][0], scroll=False, width=500)

