In [11]:
import numpy as np 
import pandas as pd
import plotly
import plotly.figure_factory as ff
import plotly.graph_objects as go
from datetime import datetime
import plotly.express as px
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot
import datetime as dt
import itertools
from plotly.subplots import make_subplots
from statsmodels.tsa.seasonal import DecomposeResult, seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from fbprophet import Prophet
import math

In [12]:
consumer_discretionary_sector_pd = pd.read_csv('KU_consumer_discretionary_sector_export.csv')
consumer_discretionary_sector_pd['date'] = pd.to_datetime(consumer_discretionary_sector_pd['date'])
consumer_discretionary_sector_pd = consumer_discretionary_sector_pd.rename(columns={'consumer_discretionary':'consumer_discretionary_sector'})

In [13]:
consumer_discretionary_sector_pd = consumer_discretionary_sector_pd.set_index('date')

In [14]:
df_nike = consumer_discretionary_sector_pd[['nike']]

In [15]:
#plot
nike_closing_figure = go.Figure(go.Scatter(x=df_nike.index, y=df_nike['nike']))
nike_closing_figure.update_layout(title='Nike Closing Price')
nike_closing_figure.update_yaxes(type='linear')

In [16]:
plt.figure(figsize=(16,8))
sp_closing_figure = go.Figure(go.Scatter(x=consumer_discretionary_sector_pd.index, y=consumer_discretionary_sector_pd['spstock']))
sp_closing_figure.update_layout(title='S&P Closing Price')
sp_closing_figure.update_yaxes(type='linear')

<Figure size 1152x576 with 0 Axes>

In [17]:
plt.figure(figsize=(16,8))
consumer_discretionary_closing_figure = go.Figure(go.Scatter(x=consumer_discretionary_sector_pd.index, y=consumer_discretionary_sector_pd['consumer_discretionary_sector']))
consumer_discretionary_closing_figure.update_layout(title='Consumer Discretionary Closing')
consumer_discretionary_closing_figure.update_yaxes(type='linear')

<Figure size 1152x576 with 0 Axes>

In [18]:
df_nike_close = df_nike[['nike']]
nike_list = np.array(df_nike['nike'], dtype=float)
hist_data = [nike_list]
group_labels=['Nike']
nike_dist = ff.create_distplot(hist_data, group_labels)
nike_dist.show()

In [19]:
df_sp_close = consumer_discretionary_sector_pd[['spstock']]
sp_list = np.array(consumer_discretionary_sector_pd['spstock'], dtype=float)
hist_data = [sp_list]
group_labels=['S&P']
sp_dist = ff.create_distplot(hist_data, group_labels)
sp_dist.show()

In [20]:
df_consumer_discretionary_close = consumer_discretionary_sector_pd[['consumer_discretionary_sector']]
consumer_discretionary_list = np.array(consumer_discretionary_sector_pd['consumer_discretionary_sector'], dtype=float)
hist_data = [consumer_discretionary_list]
group_labels=['Consumer Discretionary']
consumer_discretionary_dist = ff.create_distplot(hist_data, group_labels)
consumer_discretionary_dist.show()

In [21]:
def plot_seasonal_decompose(result:DecomposeResult, dates:pd.Series=None, title:str="Seasonal Decomposition"):
    x_values = dates if dates is not None else np.arange(len(result.observed))
    return (
        make_subplots(
            rows=4,
            cols=1,
            subplot_titles=["Observed", "Trend", "Seasonal", "Residuals"],
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.observed, mode="lines", name='Observed'),
            row=1,
            col=1,
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.trend, mode="lines", name='Trend'),
            row=2,
            col=1,
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.seasonal, mode="lines", name='Seasonal'),
            row=3,
            col=1,
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.resid, mode="lines", name='Residual'),
            row=4,
            col=1,
        )
        .update_layout(
            height=900, title=f'<b>{title}</b>', margin={'t':100}, title_x=0.5, showlegend=False
        )
    )

In [22]:
result = seasonal_decompose(nike_list, model='multiplicative', freq=30)
nike_seasonal = plot_seasonal_decompose(result)
nike_seasonal.update_layout(title='Nike Seasonal Decomposition')


the 'freq'' keyword is deprecated, use 'period' instead



In [23]:
#Looking at trend and seasonality from time series
result = seasonal_decompose(consumer_discretionary_list, model='multiplicative', freq=30)
consumer_discretionary_seasonal = plot_seasonal_decompose(result)
consumer_discretionary_seasonal.update_layout(title='Consumer Discretionary Seasonal Decomposition')



the 'freq'' keyword is deprecated, use 'period' instead



Since data is not stationary we will preform log transformation to eliminate trend

In [24]:
nike_log = np.log(nike_list)
df_nike_log = pd.DataFrame(nike_log, columns=['nike'])
df_nike_log['date'] = df_nike_close.index

In [25]:
consumer_discretionary_log = np.log(consumer_discretionary_list)
df_consumer_discretionary_log = pd.DataFrame(consumer_discretionary_log, columns=['consumer_discretionary_sector'])
df_consumer_discretionary_log['date'] = df_consumer_discretionary_close.index

Split data into train and test sets

In [26]:
nike_train_data= pd.DataFrame(df_nike_log.iloc[:int(df_nike_log.shape[0]*0.8)])
nike_test_data = pd.DataFrame(df_nike_log.iloc[int(df_nike_log.shape[0]*0.80):])
nike_test_train_fig = go.Figure()
nike_test_train_fig.add_trace(go.Scatter(x=nike_train_data['date'], y=nike_train_data['nike'], name='Train'))
nike_test_train_fig.add_trace(go.Scatter(x=nike_test_data['date'], y=nike_test_data['nike'], name='Test'))
nike_test_train_fig.update_layout(title='Nike Test Train Data')

In [27]:
consumer_discretionary_train_data= df_consumer_discretionary_log.iloc[:int(df_consumer_discretionary_log.shape[0]*0.8)]
consumer_discretionary_test_data = df_consumer_discretionary_log.iloc[int(df_consumer_discretionary_log.shape[0]*0.80):]
consumer_discretionary_test_train_fig = go.Figure()
consumer_discretionary_test_train_fig.add_trace(go.Scatter(x=consumer_discretionary_train_data['date'], y=consumer_discretionary_train_data['consumer_discretionary_sector'], name='Train'))
consumer_discretionary_test_train_fig.add_trace(go.Scatter(x=consumer_discretionary_test_data['date'], y=consumer_discretionary_test_data['consumer_discretionary_sector'], name='Test'))
consumer_discretionary_test_train_fig.update_layout(title='Consumer Discretionary Test Train Data')

This Machine Learning Model will run a linear regression, ARIMA, and a Facebook Prophet Model
-some limitations of these models will be they do not take into accoutn current world sitations (like COVID impacts on the economy)

Linear Regression Model

In [28]:
df_linear_nike = consumer_discretionary_sector_pd[['nike', 'spstock']]

In [29]:
df_linear_consumer_discretionary = consumer_discretionary_sector_pd[['consumer_discretionary_sector', 'spstock']]

In [30]:
# for linear regression model we need an x_train value, and a y_train value
X_train, X_test, y_train, y_test = train_test_split(df_linear_nike[['nike']], df_linear_nike[['spstock']], test_size=.2)

linear_regression_model = LinearRegression()
linear_regression_model.fit(X_train, y_train)

y_pred = pd.DataFrame(linear_regression_model.predict(X_test))

nike_linear_mse = mean_squared_error(y_test, y_pred)
nike_linear_mae = mean_absolute_error(y_test, y_pred)
nike_linear_rmse = math.sqrt(mean_squared_error(y_test, y_pred))

In [31]:
nike_linear_regression = go.Figure()
nike_linear_regression.add_trace(go.Scatter(x=X_train['nike'], y=y_train['spstock'], mode='markers', name='Train Data'))
nike_linear_regression.add_trace(go.Scatter(x=X_test['nike'], y=y_pred[0], name='Prediction'))
nike_linear_regression.update_xaxes(type='linear')
nike_linear_regression.update_yaxes(type='linear')
nike_linear_regression.update_layout(title='Nike vs S&P Linear Regression')

In [32]:
# for linear regression model we need an x_train value, and a y_train value
X_train, X_test, y_train, y_test = train_test_split(df_linear_consumer_discretionary[['consumer_discretionary_sector']], df_linear_nike[['spstock']], test_size=.2)

linear_regression_model = LinearRegression()
linear_regression_model.fit(X_train, y_train)

y_pred = pd.DataFrame(linear_regression_model.predict(X_test))

consumer_discretionary_linear_mse = mean_squared_error(y_test, y_pred)
consumer_discretionary_linear_mae = mean_absolute_error(y_test, y_pred)
consumer_discretionary_linear_rmse = math.sqrt(mean_squared_error(y_test, y_pred))

In [33]:
consumer_discretionary_linear_regression = go.Figure()
consumer_discretionary_linear_regression.add_trace(go.Scatter(x=X_train['consumer_discretionary_sector'], y=y_train['spstock'], mode='markers', name='Train Data'))
consumer_discretionary_linear_regression.add_trace(go.Scatter(x=X_test['consumer_discretionary_sector'], y=y_pred[0], name='Prediction'))
consumer_discretionary_linear_regression.update_xaxes(type='linear')
consumer_discretionary_linear_regression.update_yaxes(type='linear')
consumer_discretionary_linear_regression.update_layout(title='Consumer Discretionary Sector vs S&P Linear Regression')

ARIMA Model

For Nike

In [34]:
#Modeling
arima_model = ARIMA(nike_train_data['nike'], order=(1,1,0))
arima_fitted = arima_model.fit()

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            2     M =           12

At X0         0 variables are exactly at the bounds

At iterate    0    f= -2.49391D+00    |proj g|=  7.82279D-02

At iterate    5    f= -2.49391D+00    |proj g|=  8.29559D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    2      9     16      1     0     0   7.727D-06  -2.494D+00
  F =  -2.4939122600235515     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             




statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:



 This problem is unconstrained.


In [35]:
#Forecast
arima_forecast, se, conf = arima_fitted.forecast(len(nike_test_data), alpha=0.05)

arima_fc_series = pd.DataFrame(arima_forecast, index=nike_test_data['date'])
lower_series = pd.DataFrame(conf[:,0], index=nike_test_data['date'])
upper_series = pd.DataFrame(conf[:,1], index=nike_test_data['date'])

In [36]:
#ARIMA Plot
nike_arima = go.Figure()
nike_arima.add_trace(go.Scatter(x=nike_train_data['date'], y=nike_train_data['nike'], name='Train'))
nike_arima.add_trace(go.Scatter(x=nike_test_data['date'], y=nike_test_data['nike'], name='Test'))
nike_arima.add_trace(go.Scatter(x=arima_fc_series.index, y=arima_fc_series[0], name='Forecast'))
nike_arima.add_trace(go.Scatter(x=lower_series.index, y=lower_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Lower Bound'))
nike_arima.add_trace(go.Scatter(x=upper_series.index, y=upper_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Upper Bound'))
nike_arima.update_layout(title='Nike ARIMA')

In [37]:
# ARIMA Model Statistics
nike_arima_mse = mean_squared_error(nike_test_data['nike'], arima_forecast)
nike_arima_mae = mean_absolute_error(nike_test_data['nike'], arima_forecast)
nike_arima_rmse = math.sqrt(mean_squared_error(nike_test_data['nike'], arima_forecast))

ARIMA Model for consumer_discretionary Sector

In [38]:
#Modeling
arima_model = ARIMA(consumer_discretionary_train_data['consumer_discretionary_sector'], order=(2,1,2))
arima_fitted = arima_model.fit()



statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:



 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           12

At X0         0 variables are exactly at the bounds

At iterate    0    f= -2.99792D+00    |proj g|=  1.04500D-02

At iterate    5    f= -2.99793D+00    |proj g|=  1.79184D-02

At iterate   10    f= -2.99793D+00    |proj g|=  2.35052D-02

At iterate   15    f= -2.99794D+00    |proj g|=  2.55617D-01

At iterate   20    f= -2.99798D+00    |proj g|=  6.92930D-03

At iterate   25    f= -2.99798D+00    |proj g|=  9.36362D-03

At iterate   30    f= -2.99801D+00    |proj g|=  2.26637D-02

At iterate   35    f= -2.99801D+00    |proj g|=  1.55187D-03

At iterate   40    f= -2.99802D+00    |proj g|=  9.28457D-04

At iterate   45    f= -2.99802D+00    |proj g|=  1.30163D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = nu

In [39]:
#Forecast
arima_forecast, se, conf = arima_fitted.forecast(len(consumer_discretionary_test_data), alpha=0.05)

arima_fc_series = pd.DataFrame(arima_forecast, index=consumer_discretionary_test_data['date'])
lower_series = pd.DataFrame(conf[:,0], index=consumer_discretionary_test_data['date'])
upper_series = pd.DataFrame(conf[:,1], index=consumer_discretionary_test_data['date'])

In [40]:
#ARIMA Plot
consumer_discretionary_arima = go.Figure()
consumer_discretionary_arima.add_trace(go.Scatter(x=consumer_discretionary_train_data['date'], y=consumer_discretionary_train_data['consumer_discretionary_sector'], name='Train'))
consumer_discretionary_arima.add_trace(go.Scatter(x=consumer_discretionary_test_data['date'], y=consumer_discretionary_test_data['consumer_discretionary_sector'], name='Test'))
consumer_discretionary_arima.add_trace(go.Scatter(x=arima_fc_series.index, y=arima_fc_series[0], name='Forecast'))
consumer_discretionary_arima.add_trace(go.Scatter(x=lower_series.index, y=lower_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Lower Bound'))
consumer_discretionary_arima.add_trace(go.Scatter(x=upper_series.index, y=upper_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Upper Bound'))
consumer_discretionary_arima.update_layout(title='Consumer Discretionary ARIMA')

In [41]:
# ARIMA Model Statistics
consumer_discretionary_arima_mse = mean_squared_error(consumer_discretionary_test_data['consumer_discretionary_sector'], arima_forecast)
consumer_discretionary_arima_mae = mean_absolute_error(consumer_discretionary_test_data['consumer_discretionary_sector'], arima_forecast)
consumer_discretionary_arima_rmse = math.sqrt(mean_squared_error(consumer_discretionary_test_data['consumer_discretionary_sector'], arima_forecast))

Facebook Prophet Model for Nike

In [42]:

prophet_data = pd.DataFrame()
prophet_data['y'] = df_nike_log['nike']
prophet_data['ds'] = consumer_discretionary_sector_pd.index


#train and validation
prophet_train = prophet_data[:int(prophet_data.shape[0]*0.80)]
prophet_test = prophet_data[int(prophet_data.shape[0]*0.80):]

#fit the model
prophet_model = Prophet(interval_width=0.95)
prophet_model.fit(prophet_train)

#predictions
close_prices = prophet_model.make_future_dataframe(periods=212)
forecast = prophet_model.predict(close_prices)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Initial log joint probability = -2.12692
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       2586.76    0.00147217       1232.94           1           1      120   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       2636.32   0.000321102       745.363      0.1826      0.1826      236   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       2670.19    0.00312541       697.045           1           1      347   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       2677.27     0.0108441       712.712           1           1      456   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     462       2686.35   8.32839e-06       219.909   2.428e-08       0.001      570  LS failed, Hessian reset 
     499       2688.93    0.00468245    

In [43]:
forecast = forecast.set_index('ds')
forecast = forecast.rename(columns={'yhat':'Prediction'})
forecast = forecast[forecast.index.dayofweek < 5]
forecast = forecast.loc['2021-05-27':'2021-12-31']

In [44]:
#plot
nike_prophet = go.Figure()
nike_prophet.add_trace(go.Scatter(x=nike_train_data['date'], y=nike_train_data['nike'], name='Train'))
nike_prophet.add_trace(go.Scatter(x=nike_test_data['date'], y=nike_test_data['nike'], name='Test'))
nike_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['Prediction'], name='Forecast'))
nike_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_lower'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Lower Bound'))
nike_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_upper'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Upper Bound'))
nike_prophet.update_layout(title='Nike Prophet')

In [45]:
#Prophet Model Statistics
nike_prophet_mse = mean_squared_error(nike_test_data['nike'], forecast['Prediction'])
nike_prophet_mae = mean_absolute_error(nike_test_data['nike'], forecast['Prediction'])
nike_prophet_rmse = math.sqrt(mean_squared_error(nike_test_data['nike'], forecast['Prediction']))

In [46]:
prophet_data = pd.DataFrame()
prophet_data['y'] = df_consumer_discretionary_log['consumer_discretionary_sector']
prophet_data['ds'] = consumer_discretionary_sector_pd.index

#train and validation
prophet_train = prophet_data[:int(prophet_data.shape[0]*0.80)]
prophet_test = prophet_data[int(prophet_data.shape[0]*0.80):]

#fit the model
prophet_model = Prophet(interval_width=0.95)
prophet_model.fit(prophet_train)

#predictions
close_prices = prophet_model.make_future_dataframe(periods=212)
forecast = prophet_model.predict(close_prices)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Initial log joint probability = -2.07631
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       2871.47    0.00255496       4282.41           1           1      114   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       2915.05    0.00312665       1809.58           1           1      225   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       2938.69   0.000433267       1026.25      0.3526      0.3526      339   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       2964.98   0.000730038       539.958           1           1      455   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499       2969.77    0.00252715       962.302           1           1      564   
    Iter      log prob        ||dx||      ||grad||       alpha  

In [47]:
forecast = forecast.set_index('ds')
forecast = forecast.rename(columns={'yhat':'Prediction'})
forecast = forecast[forecast.index.dayofweek < 5]
forecast = forecast.loc['2021-05-27':'2021-12-31']

In [48]:
#plot
consumer_discretionary_prophet = go.Figure()
consumer_discretionary_prophet.add_trace(go.Scatter(x=consumer_discretionary_train_data['date'], y=consumer_discretionary_train_data['consumer_discretionary_sector'], name='Train'))
consumer_discretionary_prophet.add_trace(go.Scatter(x=consumer_discretionary_test_data['date'], y=consumer_discretionary_test_data['consumer_discretionary_sector'], name='Test'))
consumer_discretionary_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['Prediction'], name='Forecast'))
consumer_discretionary_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_lower'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Lower Bound'))
consumer_discretionary_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_upper'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Upper Bound'))
consumer_discretionary_prophet.update_layout(title='Consumer Discretionary Prophet')

In [49]:
#Prophet Model Statistics
consumer_discretionary_prophet_mse = mean_squared_error(consumer_discretionary_test_data['consumer_discretionary_sector'], forecast['Prediction'])
consumer_discretionary_prophet_mae = mean_absolute_error(consumer_discretionary_test_data['consumer_discretionary_sector'], forecast['Prediction'])
consumer_discretionary_prophet_rmse = math.sqrt(mean_squared_error(consumer_discretionary_test_data['consumer_discretionary_sector'], forecast['Prediction']))

In [86]:
cell_hover = {  # for row hover use <tr> instead of <td>
    'selector': 'td:hover',
    'props': [('background-color', '#ffffb3')]
}
index_names = {
    'selector': '.index_name',
    'props': 'font-style: italic; color: darkgrey; font-weight:normal;'
}
headers = {
    'selector': 'th:not(.index_name)',
    'props': 'background-color: #000066; color: white;'
}

In [89]:
consumer_discretionary_statistics = pd.DataFrame(index=['MSE', 'MAE','RMSE'])
consumer_discretionary_statistics['Linear'] = [consumer_discretionary_linear_mse, consumer_discretionary_linear_mae, consumer_discretionary_linear_rmse]
consumer_discretionary_statistics['ARIMA'] = [consumer_discretionary_arima_mse, consumer_discretionary_arima_mae, consumer_discretionary_arima_rmse]
consumer_discretionary_statistics['Prophet'] = [consumer_discretionary_prophet_mse, consumer_discretionary_prophet_mae, consumer_discretionary_prophet_rmse]
consumer_discretionary_statistics = consumer_discretionary_statistics.style.set_table_styles([cell_hover, index_names, headers])

In [91]:
nike_statistics = pd.DataFrame(index=['MSE', 'MAE','RMSE'])
nike_statistics['Linear'] = [nike_linear_mse, nike_linear_mae, nike_linear_rmse]
nike_statistics['ARIMA'] = [nike_arima_mse, nike_arima_mae, nike_arima_rmse]
nike_statistics['Prophet'] = [nike_prophet_mse, nike_prophet_mae, nike_prophet_rmse]
nike_statistics = nike_statistics.style.set_table_styles([cell_hover, index_names, headers])

HTML Builder

In [74]:
style = '<link rel="stylesheet" href="style.css">'
header = '<h1> Final Project Overview </h1>'
consumer_discretionary_sector_header = '<h1> Consumer Discretionary Sector </h1>'
consumer_discretionary_sector_closing_blurb =''
consumer_discretionary_sector_data_blurb = ''
consumer_discretionary_sector_trend_blurb = ''
consumer_discretionary_sector_dist_blurb = ''
consumer_discretionary_sector_linear_blurb = ''
consumer_discretionary_sector_arima_blurb = ''
consumer_discretionary_sector_prophet_blurb = ''
consumer_discretionary_error_header = '<h3> Error Statistics </h3>'
nike_sector_header = '<h1> Nike Stock </h1>'
nike_sector_closing_blurb =''
nike_sector_data_blurb = ''
nike_sector_trend_blurb = ''
nike_sector_dist_blurb = ''
nike_sector_linear_blurb = ''
nike_sector_arima_blurb = ''
nike_sector_prophet_blurb = ''
nike_error_header = '<h3> Error Statistics </h3>'
sub_header = '<h2> An Exploratory Analysis of Stock Prediction </h2>'
topic_header = '<h3> Why this topic? </h3>'
topic_paragraph = '<p> We wanted to predict something relevant to the our economy. After analyzing various data sets, we decided we wanted to better understand the S&P 500. We each picked an individual sector and a corresponding stock in the S&P 500 to compute analysis and predictions over </p>'
data_header = '<h3> Data Exploration </h3>'
data_paragraph = '<p> The data contains daily closing prices of the S&P 500 from 2019 to 2020. The data was then broken down into an individual sector and a corresponding stock. These were: </p><ul><li> consumer_discretionary Sector: Nike </li><li> Consumer Staples Sector: Kellogg </li><li> Consumer Discretionary Sector: Nike </li><li> Energy Sector: Occidential Petroleum </li><li> Industrials Sector: CH Robinson <li></ul><p> Then to get a better understanding of the data three charts were made: a linear graph of the stock/sectors daily closing price history, a trend/seasonlity plot, and a distribution plot. These can be seen with descriptions on the corresponding stock/sector pages. </p>'
data_analysis_header = '<h3> Data Analysis </h3>'
data_analysis_paragraph = '<p> Our Selected topic entails using Machine Learning to predict future stock prices based on historical data. The Machine Learning models will take in stock data from the last three years to output prediction prices of the S&P 500. The models utilied were:</p><ul><li> Linear Regression: The purpose of viewing the linear regression model was to see how closely correlated each company’s stock or industry was related to the S&P (i.e. if the S&P increased would the company or industry also increase). </li><li> ARIMA: Autoregressive Integrated Moving Average. This is a statistical model that attempts to use past observations of the target variable to forecast its future values. Some limitations to the ARIMA model is that it has difficulty predicting turning points, it struggles with seasonality, and it performs well on short term forecasts but has poorer performance long term. </li><li> Facebook Prophet: The Facebook Prophet model is an additive regression model (like the ARIMA) but it includes growth trend and seasonal components. Some limitations of the Prophet model are it has a tendency to overfit the data, and it requires data to be in a specific format. </li></ul> '

In [82]:
nav_bar2 = '<div class="topnav"><a href="../overview.html"> Overview </a><div class="dropdown"><button class="dropbtn">Tech</button><div class="dropdown-content"><a href="tech_report.html">Tech Sector</a><a href="apple_report.html">Apple</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Staples</button><div class="dropdown-content"><a href="consumer_staples_report.html">Consumer Staples Sector</a><a href="kellogg_report.html">Kellogg</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Discretionary</button><div class="dropdown-content"><a href="consumer_discretionary_report.html">Consumer Discretionary Sector</a><a href="nike_report.html">Nike</a></div></div><div class="dropdown"><button class="dropbtn">Industrial</button><div class="dropdown-content"><a href="industrial_report.html">Industrial Sector</a><a href="ch_robinson_report.html">CH Robinson</a></div></div><div class="dropdown"><button class="dropbtn">Energy</button><div class="dropdown-content"><a href="energy_report.html">Energy Sector</a><a href="occidential_report.html">Occidential</a></div></div></div>'
nav_bar = '<div class="topnav"><a href="overview.html"> Overview </a><div class="dropdown"><button class="dropbtn">Tech</button><div class="dropdown-content"><a href="html_links/tech_report.html">Tech Sector</a><a href="html_links/apple_report.html">Apple</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Staples</button><div class="dropdown-content"><a href="html_links/consumer_staples_report.html">Consumer Staples Sector</a><a href="html_links/kellogg_report.html">Kellogg</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Discretionary</button><div class="dropdown-content"><a href="html_links/consumer_discretionary_report.html">Consumer Discretionary Sector</a><a href="html_links/nike_report.html">Nike</a></div></div><div class="dropdown"><button class="dropbtn">Industrial</button><div class="dropdown-content"><a href="html_links/industrial_report.html">Industrial Sector</a><a href="html_links/ch_robinson_report.html">CH Robinson</a></div></div><div class="dropdown"><button class="dropbtn">Energy</button><div class="dropdown-content"><a href="html_links/energy_report.html">Energy Sector</a><a href="html_links/occidential_report.html">Occidential</a></div></div></div>'

In [92]:
content_overview = style + nav_bar + '<br>' + header + sub_header + '<br>' + topic_header + topic_paragraph + '<br>' + data_header + data_paragraph +'<br>' + data_analysis_header + data_analysis_paragraph
html_overview = content_overview
with open('overview_report.html', 'w+') as file: file.write(html_overview)

In [93]:
content_consumer_discretionary = style + nav_bar2 + '<br>' + consumer_discretionary_sector_header + '<br><div align="center">' + consumer_discretionary_closing_figure.to_html() + consumer_discretionary_sector_closing_blurb + '</div><br><div align="center>'+ consumer_discretionary_test_train_fig.to_html() + consumer_discretionary_sector_data_blurb + '</div><br><div align="center">'+ consumer_discretionary_linear_regression.to_html() + consumer_discretionary_sector_linear_blurb + '</div><br><div align="center">' + consumer_discretionary_arima.to_html() + consumer_discretionary_sector_arima_blurb + '</div><br><div align="center">' + consumer_discretionary_prophet.to_html() + consumer_discretionary_sector_prophet_blurb + '</div><br><div align="center>'+ consumer_discretionary_error_header + consumer_discretionary_statistics.to_html() + '</div>'
html_consumer_discretionary = content_consumer_discretionary
with open('consumer_discretionary_report.html', 'w+') as file: file.write(html_consumer_discretionary)

In [94]:
content_nike = style + nav_bar2+ '<br>' + nike_sector_header + '<br><div align="center">' + nike_closing_figure.to_html() + nike_sector_closing_blurb + '</div><br><div align="center>'+ nike_test_train_fig.to_html() + nike_sector_data_blurb + '</div><br><div align="center">'+ nike_linear_regression.to_html() + nike_sector_linear_blurb + '</div><br><div align="center">' + nike_arima.to_html() + nike_sector_arima_blurb + '</div><br><div align="center">' + nike_prophet.to_html() + nike_sector_prophet_blurb + '</div><br><div align="center>'+ nike_error_header + nike_statistics.to_html() + '</div>'
html_nike = content_nike
with open('nike_report.html', 'w+') as file: file.write(html_nike)