In [69]:
import numpy as np 
import pandas as pd
import plotly
import plotly.figure_factory as ff
import plotly.graph_objects as go
from datetime import datetime
import plotly.express as px
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot
import datetime as dt
import itertools
from plotly.subplots import make_subplots
from statsmodels.tsa.seasonal import DecomposeResult, seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from fbprophet import Prophet
import math

In [70]:
industrial_sector_pd = pd.read_csv('KU_industrials_sector_report.csv')
industrial_sector_pd['date'] = pd.to_datetime(industrial_sector_pd['date'])
industrial_sector_pd = industrial_sector_pd.set_index('date')

In [71]:
industrial_sector_pd = industrial_sector_pd.rename(columns={'chrobinson':'ch_robinson'})

In [72]:
df_ch_robinson = industrial_sector_pd[['ch_robinson']]

In [73]:
#plot
ch_robinson_closing_figure = go.Figure(go.Scatter(x=df_ch_robinson.index, y=df_ch_robinson['ch_robinson']))
ch_robinson_closing_figure.update_layout(title='CH Robinson Closing Price')
ch_robinson_closing_figure.update_yaxes(type='linear')

In [74]:
plt.figure(figsize=(16,8))
sp_closing_figure = go.Figure(go.Scatter(x=industrial_sector_pd.index, y=industrial_sector_pd['spstock']))
sp_closing_figure.update_layout(title='S&P Closing Price')
sp_closing_figure.update_yaxes(type='linear')

<Figure size 1152x576 with 0 Axes>

In [75]:
plt.figure(figsize=(16,8))
industrial_closing_figure = go.Figure(go.Scatter(x=industrial_sector_pd.index, y=industrial_sector_pd['industrial_sector']))
industrial_closing_figure.update_layout(title='Industrial Closing')
industrial_closing_figure.update_yaxes(type='linear')

<Figure size 1152x576 with 0 Axes>

In [76]:
df_ch_robinson_close = df_ch_robinson[['ch_robinson']]
ch_robinson_list = np.array(df_ch_robinson['ch_robinson'], dtype=float)
hist_data = [ch_robinson_list]
group_labels=['CH Robinson']
ch_robinson_dist = ff.create_distplot(hist_data, group_labels)
ch_robinson_dist.show()

In [77]:
df_sp_close = industrial_sector_pd[['spstock']]
sp_list = np.array(industrial_sector_pd['spstock'], dtype=float)
hist_data = [sp_list]
group_labels=['S&P']
sp_dist = ff.create_distplot(hist_data, group_labels)
sp_dist.show()

In [78]:
df_industrial_close = industrial_sector_pd[['industrial_sector']]
industrial_list = np.array(industrial_sector_pd['industrial_sector'], dtype=float)
hist_data = [industrial_list]
group_labels=['Industrial']
industrial_dist = ff.create_distplot(hist_data, group_labels)
industrial_dist.show()

In [79]:
def plot_seasonal_decompose(result:DecomposeResult, dates:pd.Series=None, title:str="Seasonal Decomposition"):
    x_values = dates if dates is not None else np.arange(len(result.observed))
    return (
        make_subplots(
            rows=4,
            cols=1,
            subplot_titles=["Observed", "Trend", "Seasonal", "Residuals"],
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.observed, mode="lines", name='Observed'),
            row=1,
            col=1,
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.trend, mode="lines", name='Trend'),
            row=2,
            col=1,
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.seasonal, mode="lines", name='Seasonal'),
            row=3,
            col=1,
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.resid, mode="lines", name='Residual'),
            row=4,
            col=1,
        )
        .update_layout(
            height=900, title=f'<b>{title}</b>', margin={'t':100}, title_x=0.5, showlegend=False
        )
    )

In [80]:
result = seasonal_decompose(ch_robinson_list, model='multiplicative', freq=30)
ch_robinson_seasonal = plot_seasonal_decompose(result)
ch_robinson_seasonal.update_layout(title='CH Robinson Seasonal Decomposition')


the 'freq'' keyword is deprecated, use 'period' instead



In [81]:
#Looking at trend and seasonality from time series
result = seasonal_decompose(industrial_list, model='multiplicative', freq=30)
industrial_seasonal = plot_seasonal_decompose(result)
industrial_seasonal.update_layout(title='Industrial Seasonal Decomposition')



the 'freq'' keyword is deprecated, use 'period' instead



Since data is not stationary we will preform log transformation to eliminate trend

In [82]:
ch_robinson_log = np.log(ch_robinson_list)
df_ch_robinson_log = pd.DataFrame(ch_robinson_log, columns=['ch_robinson'])
df_ch_robinson_log['date'] = df_ch_robinson_close.index

In [83]:
industrial_log = np.log(industrial_list)
df_industrial_log = pd.DataFrame(industrial_log, columns=['industrial_sector'])
df_industrial_log['date'] = df_industrial_close.index

Split data into train and test sets

In [84]:
ch_robinson_train_data= pd.DataFrame(df_ch_robinson_log.iloc[:int(df_ch_robinson_log.shape[0]*0.8)])
ch_robinson_test_data = pd.DataFrame(df_ch_robinson_log.iloc[int(df_ch_robinson_log.shape[0]*0.80):])
ch_robinson_test_train_fig = go.Figure()
ch_robinson_test_train_fig.add_trace(go.Scatter(x=ch_robinson_train_data['date'], y=ch_robinson_train_data['ch_robinson'], name='Train'))
ch_robinson_test_train_fig.add_trace(go.Scatter(x=ch_robinson_test_data['date'], y=ch_robinson_test_data['ch_robinson'], name='Test'))
ch_robinson_test_train_fig.update_layout(title='CH Robinson Test Train Data')

In [85]:
industrial_train_data= df_industrial_log.iloc[:int(df_industrial_log.shape[0]*0.8)]
industrial_test_data = df_industrial_log.iloc[int(df_industrial_log.shape[0]*0.80):]
industrial_test_train_fig = go.Figure()
industrial_test_train_fig.add_trace(go.Scatter(x=industrial_train_data['date'], y=industrial_train_data['industrial_sector'], name='Train'))
industrial_test_train_fig.add_trace(go.Scatter(x=industrial_test_data['date'], y=industrial_test_data['industrial_sector'], name='Test'))
industrial_test_train_fig.update_layout(title='Industrial Test Train Data')

This Machine Learning Model will run a linear regression, ARIMA, and a Facebook Prophet Model
-some limitations of these models will be they do not take into accoutn current world sitations (like COVID impacts on the economy)

Linear Regression Model

In [86]:
df_linear_ch_robinson = industrial_sector_pd[['ch_robinson', 'spstock']]

In [87]:
df_linear_industrial = industrial_sector_pd[['industrial_sector', 'spstock']]

In [88]:
# for linear regression model we need an x_train value, and a y_train value
X_train, X_test, y_train, y_test = train_test_split(df_linear_ch_robinson[['ch_robinson']], df_linear_ch_robinson[['spstock']], test_size=.2)

linear_regression_model = LinearRegression()
linear_regression_model.fit(X_train, y_train)

y_pred = pd.DataFrame(linear_regression_model.predict(X_test))

ch_robinson_linear_mse = mean_squared_error(y_test, y_pred)
ch_robinson_linear_mae = mean_absolute_error(y_test, y_pred)
ch_robinson_linear_rmse = math.sqrt(mean_squared_error(y_test, y_pred))

In [89]:
ch_robinson_linear_regression = go.Figure()
ch_robinson_linear_regression.add_trace(go.Scatter(x=X_train['ch_robinson'], y=y_train['spstock'], mode='markers', name='Train Data'))
ch_robinson_linear_regression.add_trace(go.Scatter(x=X_test['ch_robinson'], y=y_pred[0], name='Prediction'))
ch_robinson_linear_regression.update_xaxes(type='linear')
ch_robinson_linear_regression.update_yaxes(type='linear')
ch_robinson_linear_regression.update_layout(title='CH Robinson vs S&P Linear Regression')

In [90]:
# for linear regression model we need an x_train value, and a y_train value
X_train, X_test, y_train, y_test = train_test_split(df_linear_industrial[['industrial_sector']], df_linear_ch_robinson[['spstock']], test_size=.2)

linear_regression_model = LinearRegression()
linear_regression_model.fit(X_train, y_train)

y_pred = pd.DataFrame(linear_regression_model.predict(X_test))

industrial_linear_mse = mean_squared_error(y_test, y_pred)
industrial_linear_mae = mean_absolute_error(y_test, y_pred)
industrial_linear_rmse = math.sqrt(mean_squared_error(y_test, y_pred))

In [91]:
industrial_linear_regression = go.Figure()
industrial_linear_regression.add_trace(go.Scatter(x=X_train['industrial_sector'], y=y_train['spstock'], mode='markers', name='Train Data'))
industrial_linear_regression.add_trace(go.Scatter(x=X_test['industrial_sector'], y=y_pred[0], name='Prediction'))
industrial_linear_regression.update_xaxes(type='linear')
industrial_linear_regression.update_yaxes(type='linear')
industrial_linear_regression.update_layout(title='Industrial Sector vs S&P Linear Regression')

ARIMA Model

For CH Robinson

In [92]:
#Modeling
arima_model = ARIMA(ch_robinson_train_data['ch_robinson'], order=(1,1,0))
arima_fitted = arima_model.fit()

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            2     M =           12

At X0         0 variables are exactly at the bounds

At iterate    0    f= -2.57057D+00    |proj g|=  1.82157D-01

At iterate    5    f= -2.57058D+00    |proj g|=  9.75753D-03

At iterate   10    f= -2.57058D+00    |proj g|=  2.39808D-06

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    2     10     18      1     0     0   2.398D-06  -2.571D+00
  F =  -2.5705780131326970     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             




statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:



 This problem is unconstrained.


In [93]:
#Forecast
arima_forecast, se, conf = arima_fitted.forecast(len(ch_robinson_test_data), alpha=0.05)

arima_fc_series = pd.DataFrame(arima_forecast, index=ch_robinson_test_data['date'])
lower_series = pd.DataFrame(conf[:,0], index=ch_robinson_test_data['date'])
upper_series = pd.DataFrame(conf[:,1], index=ch_robinson_test_data['date'])

In [94]:
#ARIMA Plot
ch_robinson_arima = go.Figure()
ch_robinson_arima.add_trace(go.Scatter(x=ch_robinson_train_data['date'], y=ch_robinson_train_data['ch_robinson'], name='Train'))
ch_robinson_arima.add_trace(go.Scatter(x=ch_robinson_test_data['date'], y=ch_robinson_test_data['ch_robinson'], name='Test'))
ch_robinson_arima.add_trace(go.Scatter(x=arima_fc_series.index, y=arima_fc_series[0], name='Forecast'))
ch_robinson_arima.add_trace(go.Scatter(x=lower_series.index, y=lower_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Lower Bound'))
ch_robinson_arima.add_trace(go.Scatter(x=upper_series.index, y=upper_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Upper Bound'))
ch_robinson_arima.update_layout(title='CH Robinson ARIMA')

In [95]:
# ARIMA Model Statistics
ch_robinson_arima_mse = mean_squared_error(ch_robinson_test_data['ch_robinson'], arima_forecast)
ch_robinson_arima_mae = mean_absolute_error(ch_robinson_test_data['ch_robinson'], arima_forecast)
ch_robinson_arima_rmse = math.sqrt(mean_squared_error(ch_robinson_test_data['ch_robinson'], arima_forecast))

ARIMA Model for Industrial Sector

In [96]:
#Modeling
arima_model = ARIMA(industrial_train_data['industrial_sector'], order=(2,1,2))
arima_fitted = arima_model.fit()



statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:



 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           12

At X0         0 variables are exactly at the bounds

At iterate    0    f= -2.66341D+00    |proj g|=  1.05447D+00

At iterate    5    f= -2.66365D+00    |proj g|=  2.37756D-03

At iterate   10    f= -2.66384D+00    |proj g|=  4.18732D-04

At iterate   15    f= -2.66385D+00    |proj g|=  3.25073D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    5     19     62      2     0     0   6.217D-07  -2.664D+00
  F =  -2.6638465392298474     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             



 Bad direction in the line search;
   refresh the lbfgs memory and restart the iteration.

   evaluations in the last line search.  Termination
   may possibly be caused by a bad search direction.


In [97]:
#Forecast
arima_forecast, se, conf = arima_fitted.forecast(len(industrial_test_data), alpha=0.05)

arima_fc_series = pd.DataFrame(arima_forecast, index=industrial_test_data['date'])
lower_series = pd.DataFrame(conf[:,0], index=industrial_test_data['date'])
upper_series = pd.DataFrame(conf[:,1], index=industrial_test_data['date'])

In [98]:
#ARIMA Plot
industrial_arima = go.Figure()
industrial_arima.add_trace(go.Scatter(x=industrial_train_data['date'], y=industrial_train_data['industrial_sector'], name='Train'))
industrial_arima.add_trace(go.Scatter(x=industrial_test_data['date'], y=industrial_test_data['industrial_sector'], name='Test'))
industrial_arima.add_trace(go.Scatter(x=arima_fc_series.index, y=arima_fc_series[0], name='Forecast'))
industrial_arima.add_trace(go.Scatter(x=lower_series.index, y=lower_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Lower Bound'))
industrial_arima.add_trace(go.Scatter(x=upper_series.index, y=upper_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Upper Bound'))
industrial_arima.update_layout(title='Industrial ARIMA')

In [99]:
# ARIMA Model Statistics
industrial_arima_mse = mean_squared_error(industrial_test_data['industrial_sector'], arima_forecast)
industrial_arima_mae = mean_absolute_error(industrial_test_data['industrial_sector'], arima_forecast)
industrial_arima_rmse = math.sqrt(mean_squared_error(industrial_test_data['industrial_sector'], arima_forecast))

Facebook Prophet Model for CH Robinson

In [100]:

prophet_data = pd.DataFrame()
prophet_data['y'] = df_ch_robinson_log['ch_robinson']
prophet_data['ds'] = industrial_sector_pd.index


#train and validation
prophet_train = prophet_data[:int(prophet_data.shape[0]*0.80)]
prophet_test = prophet_data[int(prophet_data.shape[0]*0.80):]

#fit the model
prophet_model = Prophet(interval_width=0.95)
prophet_model.fit(prophet_train)

#predictions
close_prices = prophet_model.make_future_dataframe(periods=212)
forecast = prophet_model.predict(close_prices)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Initial log joint probability = -2.20406
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       2658.95    0.00071038       991.964           1           1      125   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       2690.37    0.00478014       3720.45           1           1      234   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       2715.76    0.00427074         418.7           1           1      347   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       2726.32   0.000923062       763.509           1           1      453   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499        2732.3   0.000481723       482.217           1           1      567   
    Iter      log prob        ||dx||      ||grad||       alpha  

In [101]:
forecast = forecast.set_index('ds')
forecast = forecast.rename(columns={'yhat':'Prediction'})
forecast = forecast[forecast.index.dayofweek < 5]
forecast = forecast.loc['2021-05-27':'2021-12-31']

In [102]:
#plot
ch_robinson_prophet = go.Figure()
ch_robinson_prophet.add_trace(go.Scatter(x=ch_robinson_train_data['date'], y=ch_robinson_train_data['ch_robinson'], name='Train'))
ch_robinson_prophet.add_trace(go.Scatter(x=ch_robinson_test_data['date'], y=ch_robinson_test_data['ch_robinson'], name='Test'))
ch_robinson_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['Prediction'], name='Forecast'))
ch_robinson_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_lower'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Lower Bound'))
ch_robinson_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_upper'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Upper Bound'))
ch_robinson_prophet.update_layout(title='CH Robinson Prophet')

In [103]:
#Prophet Model Statistics
ch_robinson_prophet_mse = mean_squared_error(ch_robinson_test_data['ch_robinson'], forecast['Prediction'])
ch_robinson_prophet_mae = mean_absolute_error(ch_robinson_test_data['ch_robinson'], forecast['Prediction'])
ch_robinson_prophet_rmse = math.sqrt(mean_squared_error(ch_robinson_test_data['ch_robinson'], forecast['Prediction']))

In [104]:
prophet_data = pd.DataFrame()
prophet_data['y'] = df_industrial_log['industrial_sector']
prophet_data['ds'] = industrial_sector_pd.index

#train and validation
prophet_train = prophet_data[:int(prophet_data.shape[0]*0.80)]
prophet_test = prophet_data[int(prophet_data.shape[0]*0.80):]

#fit the model
prophet_model = Prophet(interval_width=0.95)
prophet_model.fit(prophet_train)

#predictions
close_prices = prophet_model.make_future_dataframe(periods=212)
forecast = prophet_model.predict(close_prices)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Initial log joint probability = -2.2818
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       2499.04    0.00701387        2602.3           1           1      118   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       2559.45    0.00604114       2377.87           1           1      228   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       2609.64    0.00252568       1158.22        0.48       0.048      338   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       2652.42    0.00139883       442.319       0.391       0.391      455   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499        2662.8   5.12866e-05       193.986           1           1      572   
    Iter      log prob        ||dx||      ||grad||       alpha   

In [105]:
forecast = forecast.set_index('ds')
forecast = forecast.rename(columns={'yhat':'Prediction'})
forecast = forecast[forecast.index.dayofweek < 5]
forecast = forecast.loc['2021-05-27':'2021-12-31']

In [106]:
#plot
industrial_prophet = go.Figure()
industrial_prophet.add_trace(go.Scatter(x=industrial_train_data['date'], y=industrial_train_data['industrial_sector'], name='Train'))
industrial_prophet.add_trace(go.Scatter(x=industrial_test_data['date'], y=industrial_test_data['industrial_sector'], name='Test'))
industrial_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['Prediction'], name='Forecast'))
industrial_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_lower'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Lower Bound'))
industrial_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_upper'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Upper Bound'))
industrial_prophet.update_layout(title='Industrial Prophet')

In [107]:
#Prophet Model Statistics
industrial_prophet_mse = mean_squared_error(industrial_test_data['industrial_sector'], forecast['Prediction'])
industrial_prophet_mae = mean_absolute_error(industrial_test_data['industrial_sector'], forecast['Prediction'])
industrial_prophet_rmse = math.sqrt(mean_squared_error(industrial_test_data['industrial_sector'], forecast['Prediction']))

In [145]:
cell_hover = {  # for row hover use <tr> instead of <td>
    'selector': 'td:hover',
    'props': [('background-color', '#ffffb3')]
}
index_names = {
    'selector': '.index_name',
    'props': 'font-style: italic; color: darkgrey; font-weight:normal;'
}
headers = {
    'selector': 'th:not(.index_name)',
    'props': 'background-color: #000066; color: white;'
}

In [151]:
industrial_statistics = pd.DataFrame(index=['MSE', 'MAE','RMSE'])
industrial_statistics['Linear'] = [industrial_linear_mse, industrial_linear_mae, industrial_linear_rmse]
industrial_statistics['ARIMA'] = [industrial_arima_mse, industrial_arima_mae, industrial_arima_rmse]
industrial_statistics['Prophet'] = [industrial_prophet_mse, industrial_prophet_mae, industrial_prophet_rmse]
industrial_statistics = industrial_statistics.style.set_table_styles([cell_hover, index_names, headers])

In [152]:
ch_robinson_statistics = pd.DataFrame(index=['MSE', 'MAE','RMSE'])
ch_robinson_statistics['Linear'] = [ch_robinson_linear_mse, ch_robinson_linear_mae, ch_robinson_linear_rmse]
ch_robinson_statistics['ARIMA'] = [ch_robinson_arima_mse, ch_robinson_arima_mae, ch_robinson_arima_rmse]
ch_robinson_statistics['Prophet'] = [ch_robinson_prophet_mse, ch_robinson_prophet_mae, ch_robinson_prophet_rmse]
ch_robinson_statistics = ch_robinson_statistics.style.set_table_styles([cell_hover, index_names, headers])

HTML Builder

In [131]:
style = '<link rel="stylesheet" href="style.css">'
header = '<h1> Final Project Overview </h1>'
industrial_sector_header = '<h1> Industrial Sector </h1>'
industrial_sector_closing_blurb =''
industrial_sector_data_blurb = ''
industrial_sector_trend_blurb = ''
industrial_sector_dist_blurb = ''
industrial_sector_linear_blurb = ''
industrial_sector_arima_blurb = ''
industrial_sector_prophet_blurb = ''
industrial_error_header = '<h3> Error Statistics </h3>'
ch_robinson_sector_header = '<h1> CH Robinson Stock </h1>'
ch_robinson_sector_closing_blurb =''
ch_robinson_sector_data_blurb = ''
ch_robinson_sector_trend_blurb = ''
ch_robinson_sector_dist_blurb = ''
ch_robinson_sector_linear_blurb = ''
ch_robinson_sector_arima_blurb = ''
ch_robinson_sector_prophet_blurb = ''
ch_robinson_error_header = '<h3> Error Statistics </h3>'
sub_header = '<h2> An Exploratory Analysis of Stock Prediction </h2>'
topic_header = '<h3> Why this topic? </h3>'
topic_paragraph = '<p> We wanted to predict something relevant to the our economy. After analyzing various data sets, we decided we wanted to better understand the S&P 500. We each picked an individual sector and a corresponding stock in the S&P 500 to compute analysis and predictions over </p>'
data_header = '<h3> Data Exploration </h3>'
data_paragraph = '<p> The data contains daily closing prices of the S&P 500 from 2019 to 2020. The data was then broken down into an individual sector and a corresponding stock. These were: </p><ul><li> Industrial Sector: CH Robinson </li><li> Consumer Staples Sector: Kellogg </li><li> Consumer Discretionary Sector: Nike </li><li> Energy Sector: Occidential Petroleum </li><li> Industrials Sector: CH Robinson <li></ul><p> Then to get a better understanding of the data three charts were made: a linear graph of the stock/sectors daily closing price history, a trend/seasonlity plot, and a distribution plot. These can be seen with descriptions on the corresponding stock/sector pages. </p>'
data_analysis_header = '<h3> Data Analysis </h3>'
data_analysis_paragraph = '<p> Our Selected topic entails using Machine Learning to predict future stock prices based on historical data. The Machine Learning models will take in stock data from the last three years to output prediction prices of the S&P 500. The models utilied were:</p><ul><li> Linear Regression: The purpose of viewing the linear regression model was to see how closely correlated each company’s stock or industry was related to the S&P (i.e. if the S&P increased would the company or industry also increase). </li><li> ARIMA: Autoregressive Integrated Moving Average. This is a statistical model that attempts to use past observations of the target variable to forecast its future values. Some limitations to the ARIMA model is that it has difficulty predicting turning points, it struggles with seasonality, and it performs well on short term forecasts but has poorer performance long term. </li><li> Facebook Prophet: The Facebook Prophet model is an additive regression model (like the ARIMA) but it includes growth trend and seasonal components. Some limitations of the Prophet model are it has a tendency to overfit the data, and it requires data to be in a specific format. </li></ul> '

In [141]:
nav_bar2 = '<div class="topnav"><a href="../overview.html"> Overview </a><div class="dropdown"><button class="dropbtn">Tech</button><div class="dropdown-content"><a href="tech_report.html">Tech Sector</a><a href="apple_report.html">Apple</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Staples</button><div class="dropdown-content"><a href="consumer_staples_report.html">Consumer Staples Sector</a><a href="kellogg_report.html">Kellogg</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Discretionary</button><div class="dropdown-content"><a href="consumer_discretionary_report.html">Consumer Discretionary Sector</a><a href="nike_report.html">Nike</a></div></div><div class="dropdown"><button class="dropbtn">Industrial</button><div class="dropdown-content"><a href="industrial_report.html">Industrial Sector</a><a href="ch_robinson_report.html">CH Robinson</a></div></div><div class="dropdown"><button class="dropbtn">Energy</button><div class="dropdown-content"><a href="energy_report.html">Energy Sector</a><a href="occidential_report.html">Occidential</a></div></div></div>'
nav_bar = '<div class="topnav"><a href="overview.html"> Overview </a><div class="dropdown"><button class="dropbtn">Tech</button><div class="dropdown-content"><a href="html_links/tech_report.html">Tech Sector</a><a href="html_links/apple_report.html">Apple</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Staples</button><div class="dropdown-content"><a href="html_links/consumer_staples_report.html">Consumer Staples Sector</a><a href="html_links/kellogg_report.html">Kellogg</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Discretionary</button><div class="dropdown-content"><a href="html_links/consumer_discretionary_report.html">Consumer Discretionary Sector</a><a href="html_links/nike_report.html">Nike</a></div></div><div class="dropdown"><button class="dropbtn">Industrial</button><div class="dropdown-content"><a href="html_links/industrial_report.html">Industrial Sector</a><a href="html_links/ch_robinson_report.html">CH Robinson</a></div></div><div class="dropdown"><button class="dropbtn">Energy</button><div class="dropdown-content"><a href="html_links/energy_report.html">Energy Sector</a><a href="html_links/occidential_report.html">Occidential</a></div></div></div>'

In [142]:
content_overview = style + nav_bar + '<br>' + header + sub_header + '<br>' + topic_header + topic_paragraph + '<br>' + data_header + data_paragraph +'<br>' + data_analysis_header + data_analysis_paragraph
html_overview = content_overview
with open('overview_report.html', 'w+') as file: file.write(html_overview)

In [149]:
content_industrial = style + nav_bar2 + '<br>' + industrial_sector_header + '<br><div align="center">' + industrial_closing_figure.to_html() + industrial_sector_closing_blurb + '</div><br><div align="center>'+ industrial_test_train_fig.to_html() + industrial_sector_data_blurb + '</div><br><div align="center">'+ industrial_linear_regression.to_html() + industrial_sector_linear_blurb + '</div><br><div align="center">' + industrial_arima.to_html() + industrial_sector_arima_blurb + '</div><br><div align="center">' + industrial_prophet.to_html() + industrial_sector_prophet_blurb + '</div><br><div align="center>'+ industrial_error_header + industrial_statistics.to_html() + '</div>'
html_industrial = content_industrial
with open('industrial_report.html', 'w+') as file: file.write(html_industrial)

In [150]:
content_ch_robinson = style + nav_bar2 + '<br>' + ch_robinson_sector_header + '<br><div align="center">' + ch_robinson_closing_figure.to_html() + ch_robinson_sector_closing_blurb + '</div><br><div align="center>'+ ch_robinson_test_train_fig.to_html() + ch_robinson_sector_data_blurb + '</div><br><div align="center">'+ ch_robinson_linear_regression.to_html() + ch_robinson_sector_linear_blurb + '</div><br><div align="center">' + ch_robinson_arima.to_html() + ch_robinson_sector_arima_blurb + '</div><br><div align="center">' + ch_robinson_prophet.to_html() + ch_robinson_sector_prophet_blurb + '</div><br><div align="center>'+ ch_robinson_error_header + ch_robinson_statistics.to_html() + '</div>'
html_ch_robinson = content_ch_robinson
with open('ch_robinson_report.html', 'w+') as file: file.write(html_ch_robinson)