In [2]:
import numpy as np 
import pandas as pd
import plotly
import plotly.figure_factory as ff
import plotly.graph_objects as go
from datetime import datetime
import plotly.express as px
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot
import datetime as dt
import itertools
from IPython.display import HTML
from plotly.subplots import make_subplots
from statsmodels.tsa.seasonal import DecomposeResult, seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from fbprophet import Prophet
import math

In [3]:
tech_sector_pd = pd.read_csv('KU_tech_sector_export.csv')
tech_sector_pd['date'] = pd.to_datetime(tech_sector_pd['date'])
tech_sector_pd = tech_sector_pd.set_index('date')

In [4]:
df_apple = tech_sector_pd[['apple']]

In [5]:
#plot
apple_closing_figure = go.Figure(go.Scatter(x=df_apple.index, y=df_apple['apple']))
apple_closing_figure.update_layout(title='Apple Closing Price')
apple_closing_figure.update_yaxes(type='linear')

In [6]:
plt.figure(figsize=(16,8))
sp_closing_figure = go.Figure(go.Scatter(x=tech_sector_pd.index, y=tech_sector_pd['spstock']))
sp_closing_figure.update_layout(title='S&P Closing Price')
sp_closing_figure.update_yaxes(type='linear')

<Figure size 1152x576 with 0 Axes>

In [7]:
plt.figure(figsize=(16,8))
tech_closing_figure = go.Figure(go.Scatter(x=tech_sector_pd.index, y=tech_sector_pd['tech_sector']))
tech_closing_figure.update_layout(title='Tech Closing')
tech_closing_figure.update_yaxes(type='linear')

<Figure size 1152x576 with 0 Axes>

In [8]:
df_apple_close = df_apple[['apple']]
apple_list = np.array(df_apple['apple'], dtype=float)
hist_data = [apple_list]
group_labels=['Apple']
apple_dist = ff.create_distplot(hist_data, group_labels)
apple_dist.show()

In [9]:
df_sp_close = tech_sector_pd[['spstock']]
sp_list = np.array(tech_sector_pd['spstock'], dtype=float)
hist_data = [sp_list]
group_labels=['S&P']
sp_dist = ff.create_distplot(hist_data, group_labels)
sp_dist.show()

In [10]:
df_tech_close = tech_sector_pd[['tech_sector']]
tech_list = np.array(tech_sector_pd['tech_sector'], dtype=float)
hist_data = [tech_list]
group_labels=['Tech']
tech_dist = ff.create_distplot(hist_data, group_labels)
tech_dist.show()

In [11]:
def plot_seasonal_decompose(result:DecomposeResult, dates:pd.Series=None, title:str="Seasonal Decomposition"):
    x_values = dates if dates is not None else np.arange(len(result.observed))
    return (
        make_subplots(
            rows=4,
            cols=1,
            subplot_titles=["Observed", "Trend", "Seasonal", "Residuals"],
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.observed, mode="lines", name='Observed'),
            row=1,
            col=1,
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.trend, mode="lines", name='Trend'),
            row=2,
            col=1,
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.seasonal, mode="lines", name='Seasonal'),
            row=3,
            col=1,
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.resid, mode="lines", name='Residual'),
            row=4,
            col=1,
        )
        .update_layout(
            height=900, title=f'<b>{title}</b>', margin={'t':100}, title_x=0.5, showlegend=False
        )
    )

In [12]:
result = seasonal_decompose(apple_list, model='multiplicative', freq=30)
apple_seasonal = plot_seasonal_decompose(result)
apple_seasonal.update_layout(title='Apple Seasonal Decomposition')


the 'freq'' keyword is deprecated, use 'period' instead



In [13]:
#Looking at trend and seasonality from time series
result = seasonal_decompose(tech_list, model='multiplicative', freq=30)
tech_seasonal = plot_seasonal_decompose(result)
tech_seasonal.update_layout(title='Tech Seasonal Decomposition')



the 'freq'' keyword is deprecated, use 'period' instead



Since data is not stationary we will preform log transformation to eliminate trend

In [14]:
apple_log = np.log(apple_list)
df_apple_log = pd.DataFrame(apple_log, columns=['apple'])
df_apple_log['date'] = df_apple_close.index

In [15]:
tech_log = np.log(tech_list)
df_tech_log = pd.DataFrame(tech_log, columns=['tech_sector'])
df_tech_log['date'] = df_tech_close.index

Split data into train and test sets

In [16]:
apple_train_data= pd.DataFrame(df_apple_log.iloc[:int(df_apple_log.shape[0]*0.8)])
apple_test_data = pd.DataFrame(df_apple_log.iloc[int(df_apple_log.shape[0]*0.80):])
apple_test_train_fig = go.Figure()
apple_test_train_fig.add_trace(go.Scatter(x=apple_train_data['date'], y=apple_train_data['apple'], name='Train'))
apple_test_train_fig.add_trace(go.Scatter(x=apple_test_data['date'], y=apple_test_data['apple'], name='Test'))
apple_test_train_fig.update_layout(title='Apple Test Train Data')

In [17]:
tech_train_data= df_tech_log.iloc[:int(df_tech_log.shape[0]*0.8)]
tech_test_data = df_tech_log.iloc[int(df_tech_log.shape[0]*0.80):]
tech_test_train_fig = go.Figure()
tech_test_train_fig.add_trace(go.Scatter(x=tech_train_data['date'], y=tech_train_data['tech_sector'], name='Train'))
tech_test_train_fig.add_trace(go.Scatter(x=tech_test_data['date'], y=tech_test_data['tech_sector'], name='Test'))
tech_test_train_fig.update_layout(title='Tech Test Train Data')

This Machine Learning Model will run a linear regression, ARIMA, and a Facebook Prophet Model
-some limitations of these models will be they do not take into accoutn current world sitations (like COVID impacts on the economy)

Linear Regression Model

In [18]:
df_linear_apple = tech_sector_pd[['apple', 'spstock']]

In [19]:
df_linear_tech = tech_sector_pd[['tech_sector', 'spstock']]

In [20]:
# for linear regression model we need an x_train value, and a y_train value
X_train, X_test, y_train, y_test = train_test_split(df_linear_apple[['apple']], df_linear_apple[['spstock']], test_size=.2)

linear_regression_model = LinearRegression()
linear_regression_model.fit(X_train, y_train)

y_pred = pd.DataFrame(linear_regression_model.predict(X_test))

apple_linear_mse = mean_squared_error(y_test, y_pred)
apple_linear_mae = mean_absolute_error(y_test, y_pred)
apple_linear_rmse = math.sqrt(mean_squared_error(y_test, y_pred))

In [21]:
apple_linear_regression = go.Figure()
apple_linear_regression.add_trace(go.Scatter(x=X_train['apple'], y=y_train['spstock'], mode='markers', name='Train Data'))
apple_linear_regression.add_trace(go.Scatter(x=X_test['apple'], y=y_pred[0], name='Prediction'))
apple_linear_regression.update_xaxes(type='linear')
apple_linear_regression.update_yaxes(type='linear')
apple_linear_regression.update_layout(title='Apple vs S&P Linear Regression')

In [22]:
# for linear regression model we need an x_train value, and a y_train value
X_train, X_test, y_train, y_test = train_test_split(df_linear_tech[['tech_sector']], df_linear_apple[['spstock']], test_size=.2)

linear_regression_model = LinearRegression()
linear_regression_model.fit(X_train, y_train)

y_pred = pd.DataFrame(linear_regression_model.predict(X_test))

tech_linear_mse = mean_squared_error(y_test, y_pred)
tech_linear_mae = mean_absolute_error(y_test, y_pred)
tech_linear_rmse = math.sqrt(mean_squared_error(y_test, y_pred))

In [23]:
tech_linear_regression = go.Figure()
tech_linear_regression.add_trace(go.Scatter(x=X_train['tech_sector'], y=y_train['spstock'], mode='markers', name='Train Data'))
tech_linear_regression.add_trace(go.Scatter(x=X_test['tech_sector'], y=y_pred[0], name='Prediction'))
tech_linear_regression.update_xaxes(type='linear')
tech_linear_regression.update_yaxes(type='linear')
tech_linear_regression.update_layout(title='Tech Sector vs S&P Linear Regression')

ARIMA Model

For Apple

In [24]:
#Modeling
arima_model = ARIMA(apple_train_data['apple'], order=(1,1,0))
arima_fitted = arima_model.fit()

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            2     M =           12

At X0         0 variables are exactly at the bounds

At iterate    0    f= -2.37060D+00    |proj g|=  3.32236D-01

At iterate    5    f= -2.37062D+00    |proj g|=  7.00837D-02

At iterate   10    f= -2.37065D+00    |proj g|=  2.12351D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    2     14     31      1     0     0   1.279D-05  -2.371D+00
  F =  -2.3706456645958842     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             




statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:



 This problem is unconstrained.

   evaluations in the last line search.  Termination
   may possibly be caused by a bad search direction.


In [25]:
#Forecast
arima_forecast, se, conf = arima_fitted.forecast(len(apple_test_data), alpha=0.05)

arima_fc_series = pd.DataFrame(arima_forecast, index=apple_test_data['date'])
lower_series = pd.DataFrame(conf[:,0], index=apple_test_data['date'])
upper_series = pd.DataFrame(conf[:,1], index=apple_test_data['date'])

In [26]:
#ARIMA Plot
apple_arima = go.Figure()
apple_arima.add_trace(go.Scatter(x=apple_train_data['date'], y=apple_train_data['apple'], name='Train'))
apple_arima.add_trace(go.Scatter(x=apple_test_data['date'], y=apple_test_data['apple'], name='Test'))
apple_arima.add_trace(go.Scatter(x=arima_fc_series.index, y=arima_fc_series[0], name='Forecast'))
apple_arima.add_trace(go.Scatter(x=lower_series.index, y=lower_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Lower Bound'))
apple_arima.add_trace(go.Scatter(x=upper_series.index, y=upper_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Upper Bound'))
apple_arima.update_layout(title='Apple ARIMA')

In [27]:
# ARIMA Model Statistics
apple_arima_mse = mean_squared_error(apple_test_data['apple'], arima_forecast)
apple_arima_mae = mean_absolute_error(apple_test_data['apple'], arima_forecast)
apple_arima_rmse = math.sqrt(mean_squared_error(apple_test_data['apple'], arima_forecast))

ARIMA Model for Tech Sector

In [28]:
#Modeling
arima_model = ARIMA(tech_train_data['tech_sector'], order=(2,1,2))
arima_fitted = arima_model.fit()



statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:



 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           12

At X0         0 variables are exactly at the bounds

At iterate    0    f= -2.64170D+00    |proj g|=  7.98821D-02

At iterate    5    f= -2.64170D+00    |proj g|=  5.73848D-02

At iterate   10    f= -2.64176D+00    |proj g|=  6.28197D-01

At iterate   15    f= -2.64194D+00    |proj g|=  8.46700D-03

At iterate   20    f= -2.64242D+00    |proj g|=  1.46989D-01

At iterate   25    f= -2.64281D+00    |proj g|=  4.83080D-03

At iterate   30    f= -2.64282D+00    |proj g|=  8.77540D-02

At iterate   35    f= -2.64283D+00    |proj g|=  4.75966D-03

At iterate   40    f= -2.64283D+00    |proj g|=  6.38130D-03

At iterate   45    f= -2.64284D+00    |proj g|=  1.82605D-03

At iterate   50    f= -2.64284D+00    |proj g|=  1.52767D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cau

In [29]:
#Forecast
arima_forecast, se, conf = arima_fitted.forecast(len(tech_test_data), alpha=0.05)

arima_fc_series = pd.DataFrame(arima_forecast, index=tech_test_data['date'])
lower_series = pd.DataFrame(conf[:,0], index=tech_test_data['date'])
upper_series = pd.DataFrame(conf[:,1], index=tech_test_data['date'])

In [30]:
#ARIMA Plot
tech_arima = go.Figure()
tech_arima.add_trace(go.Scatter(x=tech_train_data['date'], y=tech_train_data['tech_sector'], name='Train'))
tech_arima.add_trace(go.Scatter(x=tech_test_data['date'], y=tech_test_data['tech_sector'], name='Test'))
tech_arima.add_trace(go.Scatter(x=arima_fc_series.index, y=arima_fc_series[0], name='Forecast'))
tech_arima.add_trace(go.Scatter(x=lower_series.index, y=lower_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Lower Bound'))
tech_arima.add_trace(go.Scatter(x=upper_series.index, y=upper_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Upper Bound'))
tech_arima.update_layout(title='Tech ARIMA')

In [31]:
# ARIMA Model Statistics
tech_arima_mse = mean_squared_error(tech_test_data['tech_sector'], arima_forecast)
tech_arima_mae = mean_absolute_error(tech_test_data['tech_sector'], arima_forecast)
tech_arima_rmse = math.sqrt(mean_squared_error(tech_test_data['tech_sector'], arima_forecast))

Facebook Prophet Model for Apple

In [32]:

prophet_data = pd.DataFrame()
prophet_data['y'] = df_apple_log['apple']
prophet_data['ds'] = tech_sector_pd.index


#train and validation
prophet_train = prophet_data[:int(prophet_data.shape[0]*0.80)]
prophet_test = prophet_data[int(prophet_data.shape[0]*0.80):]

#fit the model
prophet_model = Prophet(interval_width=0.95)
prophet_model.fit(prophet_train)

#predictions
close_prices = prophet_model.make_future_dataframe(periods=212)
forecast = prophet_model.predict(close_prices)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Initial log joint probability = -2.23024
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       2572.78   0.000747485       620.524           1           1      118   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199        2595.3   0.000294246       1826.88      0.6393      0.6393      226   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299        2642.5    0.00326795       1808.16       4.602      0.4602      335   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       2665.82   0.000970188       540.408           1           1      453   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499       2669.96   0.000345003       1391.64      0.3282      0.3282      564   
    Iter      log prob        ||dx||      ||grad||       alpha  

In [33]:
forecast = forecast.set_index('ds')
forecast = forecast.rename(columns={'yhat':'Prediction'})
forecast = forecast[forecast.index.dayofweek < 5]
forecast = forecast.loc['2021-05-27':'2021-12-31']

In [34]:
#plot
apple_prophet = go.Figure()
apple_prophet.add_trace(go.Scatter(x=apple_train_data['date'], y=apple_train_data['apple'], name='Train'))
apple_prophet.add_trace(go.Scatter(x=apple_test_data['date'], y=apple_test_data['apple'], name='Test'))
apple_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['Prediction'], name='Forecast'))
apple_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_lower'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Lower Bound'))
apple_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_upper'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Upper Bound'))
apple_prophet.update_layout(title='Apple Prophet')

In [35]:
#Prophet Model Statistics
apple_prophet_mse = mean_squared_error(apple_test_data['apple'], forecast['Prediction'])
apple_prophet_mae = mean_absolute_error(apple_test_data['apple'], forecast['Prediction'])
apple_prophet_rmse = math.sqrt(mean_squared_error(apple_test_data['apple'], forecast['Prediction']))

In [36]:
prophet_data = pd.DataFrame()
prophet_data['y'] = df_tech_log['tech_sector']
prophet_data['ds'] = tech_sector_pd.index

#train and validation
prophet_train = prophet_data[:int(prophet_data.shape[0]*0.80)]
prophet_test = prophet_data[int(prophet_data.shape[0]*0.80):]

#fit the model
prophet_model = Prophet(interval_width=0.95)
prophet_model.fit(prophet_train)

#predictions
close_prices = prophet_model.make_future_dataframe(periods=212)
forecast = prophet_model.predict(close_prices)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Initial log joint probability = -2.0755
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       2694.34     0.0157375       2557.43           1           1      125   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       2770.86    0.00103004       991.445           1           1      236   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299        2807.2    0.00184741       1876.19           1           1      343   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       2844.91    0.00220445       990.696          10           1      453   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499       2854.71     0.0391582       3474.16           1           1      563   
    Iter      log prob        ||dx||      ||grad||       alpha   

In [37]:
forecast = forecast.set_index('ds')
forecast = forecast.rename(columns={'yhat':'Prediction'})
forecast = forecast[forecast.index.dayofweek < 5]
forecast = forecast.loc['2021-05-27':'2021-12-31']

In [38]:
#plot
tech_prophet = go.Figure()
tech_prophet.add_trace(go.Scatter(x=tech_train_data['date'], y=tech_train_data['tech_sector'], name='Train'))
tech_prophet.add_trace(go.Scatter(x=tech_test_data['date'], y=tech_test_data['tech_sector'], name='Test'))
tech_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['Prediction'], name='Forecast'))
tech_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_lower'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Lower Bound'))
tech_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_upper'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Upper Bound'))
tech_prophet.update_layout(title='Tech Prophet')

In [39]:
#Prophet Model Statistics
tech_prophet_mse = mean_squared_error(tech_test_data['tech_sector'], forecast['Prediction'])
tech_prophet_mae = mean_absolute_error(tech_test_data['tech_sector'], forecast['Prediction'])
tech_prophet_rmse = math.sqrt(mean_squared_error(tech_test_data['tech_sector'], forecast['Prediction']))

In [40]:
cell_hover = {  # for row hover use <tr> instead of <td>
    'selector': 'td:hover',
    'props': [('background-color', '#ffffb3')]
}
index_names = {
    'selector': '.index_name',
    'props': 'font-style: italic; color: darkgrey; font-weight:normal;'
}
headers = {
    'selector': 'th:not(.index_name)',
    'props': 'background-color: #000066; color: white;'
}

In [41]:
tech_statistics = pd.DataFrame(index=['MSE', 'MAE','RMSE'])
tech_statistics['Linear'] = [tech_linear_mse, tech_linear_mae, tech_linear_rmse]
tech_statistics['ARIMA'] = [tech_arima_mse, tech_arima_mae, tech_arima_rmse]
tech_statistics['Prophet'] = [tech_prophet_mse, tech_prophet_mae, tech_prophet_rmse]
tech_statistics = tech_statistics.style.set_table_styles([cell_hover, index_names, headers])

In [42]:
apple_statistics = pd.DataFrame(index=['MSE', 'MAE','RMSE'])
apple_statistics['Linear'] = [apple_linear_mse, apple_linear_mae, apple_linear_rmse]
apple_statistics['ARIMA'] = [apple_arima_mse, apple_arima_mae, apple_arima_rmse]
apple_statistics['Prophet'] = [apple_prophet_mse, apple_prophet_mae, apple_prophet_rmse]
apple_statistics = apple_statistics.style.set_table_styles([cell_hover, index_names, headers])

HTML Builder

In [43]:
style = '<link rel="stylesheet" href="style.css">'
header = '<h1> Final Project Overview </h1>'
tech_sector_header = '<h1> Tech Sector </h1>'
tech_sector_closing_blurb =''
tech_sector_data_blurb = ''
tech_sector_trend_blurb = ''
tech_sector_dist_blurb = ''
tech_sector_linear_blurb = ''
tech_sector_arima_blurb = ''
tech_sector_prophet_blurb = ''
tech_error_header = '<h3> Error Statistics </h3>'
apple_sector_header = '<h1> Apple Stock </h1>'
apple_sector_closing_blurb =''
apple_sector_data_blurb = ''
apple_sector_trend_blurb = ''
apple_sector_dist_blurb = ''
apple_sector_linear_blurb = ''
apple_sector_arima_blurb = ''
apple_sector_prophet_blurb = ''
apple_error_header = '<h3> Error Statistics </h3>'
sub_header = '<h2> An Exploratory Analysis of Stock Prediction </h2>'
topic_header = '<h3> Why this topic? </h3>'
topic_paragraph = '<p> We wanted to predict something relevant to the our economy. After analyzing various data sets, we decided we wanted to better understand the S&P 500. We each picked an individual sector and a corresponding stock in the S&P 500 to compute analysis and predictions over </p>'
data_header = '<h3> Data Exploration </h3>'
data_paragraph = '<p> The data contains daily closing prices of the S&P 500 from 2019 to 2020. The data was then broken down into an individual sector and a corresponding stock. These were: </p><ul><li> Tech Sector: Apple </li><li> Consumer Staples Sector: Kellogg </li><li> Consumer Discretionary Sector: Nike </li><li> Energy Sector: Occidential Petroleum </li><li> Industrials Sector: CH Robinson <li></ul><p> Then to get a better understanding of the data three charts were made: a linear graph of the stock/sectors daily closing price history, a trend/seasonlity plot, and a distribution plot. These can be seen with descriptions on the corresponding stock/sector pages. </p>'
data_analysis_header = '<h3> Data Analysis </h3>'
data_analysis_paragraph = '<p> Our Selected topic entails using Machine Learning to predict future stock prices based on historical data. The Machine Learning models will take in stock data from the last three years to output prediction prices of the S&P 500. The models utilied were:</p><ul><li> Linear Regression: The purpose of viewing the linear regression model was to see how closely correlated each company’s stock or industry was related to the S&P (i.e. if the S&P increased would the company or industry also increase). </li><li> ARIMA: Autoregressive Integrated Moving Average. This is a statistical model that attempts to use past observations of the target variable to forecast its future values. Some limitations to the ARIMA model is that it has difficulty predicting turning points, it struggles with seasonality, and it performs well on short term forecasts but has poorer performance long term. </li><li> Facebook Prophet: The Facebook Prophet model is an additive regression model (like the ARIMA) but it includes growth trend and seasonal components. Some limitations of the Prophet model are it has a tendency to overfit the data, and it requires data to be in a specific format. </li></ul> '

In [48]:
nav_bar2 = '<div class="topnav"><a href="../overview.html"> Overview </a><div class="dropdown"><button class="dropbtn">Tech</button><div class="dropdown-content"><a href="tech_report.html">Tech Sector</a><a href="apple_report.html">Apple</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Staples</button><div class="dropdown-content"><a href="consumer_staples_report.html">Consumer Staples Sector</a><a href="kellogg_report.html">Kellogg</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Discretionary</button><div class="dropdown-content"><a href="consumer_discretionary_report.html">Consumer Discretionary Sector</a><a href="nike_report.html">Nike</a></div></div><div class="dropdown"><button class="dropbtn">Industrial</button><div class="dropdown-content"><a href="industrial_report.html">Industrial Sector</a><a href="ch_robinson_report.html">CH Robinson</a></div></div><div class="dropdown"><button class="dropbtn">Energy</button><div class="dropdown-content"><a href="energy_report.html">Energy Sector</a><a href="occidential_report.html">Occidential</a></div></div></div>'
nav_bar = '<div class="topnav"><a href="overview.html"> Overview </a><div class="dropdown"><button class="dropbtn">Tech</button><div class="dropdown-content"><a href="html_links/tech_report.html">Tech Sector</a><a href="html_links/apple_report.html">Apple</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Staples</button><div class="dropdown-content"><a href="html_links/consumer_staples_report.html">Consumer Staples Sector</a><a href="html_links/kellogg_report.html">Kellogg</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Discretionary</button><div class="dropdown-content"><a href="html_links/consumer_discretionary_report.html">Consumer Discretionary Sector</a><a href="html_links/nike_report.html">Nike</a></div></div><div class="dropdown"><button class="dropbtn">Industrial</button><div class="dropdown-content"><a href="html_links/industrial_report.html">Industrial Sector</a><a href="html_links/ch_robinson_report.html">CH Robinson</a></div></div><div class="dropdown"><button class="dropbtn">Energy</button><div class="dropdown-content"><a href="html_links/energy_report.html">Energy Sector</a><a href="html_links/occidential_report.html">Occidential</a></div></div></div>'


In [45]:
content_overview = style + nav_bar + '<br>' + header + sub_header + '<br>' + topic_header + topic_paragraph + '<br>' + data_header + data_paragraph +'<br>' + data_analysis_header + data_analysis_paragraph
html_overview = content_overview
with open('overview_report.html', 'w+') as file: file.write(html_overview)

In [46]:
content_tech = style + nav_bar2 + '<br>' + tech_sector_header + '<br><div align="center">' + tech_closing_figure.to_html() + tech_sector_closing_blurb + '</div><br><div align="center>'+ tech_test_train_fig.to_html() + tech_sector_data_blurb + '</div><br><div align="center">'+ tech_linear_regression.to_html() + tech_sector_linear_blurb + '</div><br><div align="center">' + tech_arima.to_html() + tech_sector_arima_blurb + '</div><br><div align="center">' + tech_prophet.to_html() + tech_sector_prophet_blurb + '</div><br>'+ tech_error_header + tech_statistics.to_html()
html_tech = content_tech
with open('tech_report.html', 'w+') as file: file.write(html_tech)

In [47]:
content_apple = style + nav_bar2 + '<br>' + apple_sector_header + '<br><div align="center">' + apple_closing_figure.to_html() + apple_sector_closing_blurb + '</div><br><div align="center>'+ apple_test_train_fig.to_html() + apple_sector_data_blurb + '</div><br><div align="center">'+ apple_linear_regression.to_html() + apple_sector_linear_blurb + '</div><br><div align="center">' + apple_arima.to_html() + apple_sector_arima_blurb + '</div><br><div align="center">' + apple_prophet.to_html() + apple_sector_prophet_blurb + '</div><br><div align="center>'+ apple_error_header + apple_statistics.to_html() + '</div>'
html_apple = content_apple
with open('apple_report.html', 'w+') as file: file.write(html_apple)