In [10]:
import numpy as np 
import pandas as pd
import plotly
import plotly.figure_factory as ff
import plotly.graph_objects as go
from datetime import datetime
import plotly.express as px
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot
import datetime as dt
import itertools
from plotly.subplots import make_subplots
from statsmodels.tsa.seasonal import DecomposeResult, seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from fbprophet import Prophet
import math

In [11]:
energy_sector_pd = pd.read_csv('KU_energy_sector_export.csv')
energy_sector_pd['date'] = pd.to_datetime(energy_sector_pd['date'])
energy_sector_pd = energy_sector_pd.rename(columns={'oxy':'occidential'})

In [12]:
energy_sector_pd = energy_sector_pd.set_index('date')

In [13]:
df_occidential = energy_sector_pd[['occidential']]

In [14]:
#plot
occidential_closing_figure = go.Figure(go.Scatter(x=df_occidential.index, y=df_occidential['occidential']))
occidential_closing_figure.update_layout(title='Occidential Closing Price')
occidential_closing_figure.update_yaxes(type='linear')

In [15]:
plt.figure(figsize=(16,8))
sp_closing_figure = go.Figure(go.Scatter(x=energy_sector_pd.index, y=energy_sector_pd['spstock']))
sp_closing_figure.update_layout(title='S&P Closing Price')
sp_closing_figure.update_yaxes(type='linear')

<Figure size 1152x576 with 0 Axes>

In [16]:
plt.figure(figsize=(16,8))
energy_closing_figure = go.Figure(go.Scatter(x=energy_sector_pd.index, y=energy_sector_pd['energy_sector']))
energy_closing_figure.update_layout(title='Energy Closing')
energy_closing_figure.update_yaxes(type='linear')

<Figure size 1152x576 with 0 Axes>

In [17]:
df_occidential_close = df_occidential[['occidential']]
occidential_list = np.array(df_occidential['occidential'], dtype=float)
hist_data = [occidential_list]
group_labels=['Occidential']
occidential_dist = ff.create_distplot(hist_data, group_labels)
occidential_dist.show()

In [18]:
df_sp_close = energy_sector_pd[['spstock']]
sp_list = np.array(energy_sector_pd['spstock'], dtype=float)
hist_data = [sp_list]
group_labels=['S&P']
sp_dist = ff.create_distplot(hist_data, group_labels)
sp_dist.show()

In [19]:
df_energy_close = energy_sector_pd[['energy_sector']]
energy_list = np.array(energy_sector_pd['energy_sector'], dtype=float)
hist_data = [energy_list]
group_labels=['Energy']
energy_dist = ff.create_distplot(hist_data, group_labels)
energy_dist.show()

In [20]:
def plot_seasonal_decompose(result:DecomposeResult, dates:pd.Series=None, title:str="Seasonal Decomposition"):
    x_values = dates if dates is not None else np.arange(len(result.observed))
    return (
        make_subplots(
            rows=4,
            cols=1,
            subplot_titles=["Observed", "Trend", "Seasonal", "Residuals"],
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.observed, mode="lines", name='Observed'),
            row=1,
            col=1,
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.trend, mode="lines", name='Trend'),
            row=2,
            col=1,
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.seasonal, mode="lines", name='Seasonal'),
            row=3,
            col=1,
        )
        .add_trace(
            go.Scatter(x=x_values, y=result.resid, mode="lines", name='Residual'),
            row=4,
            col=1,
        )
        .update_layout(
            height=900, title=f'<b>{title}</b>', margin={'t':100}, title_x=0.5, showlegend=False
        )
    )

In [21]:
result = seasonal_decompose(occidential_list, model='multiplicative', freq=30)
occidential_seasonal = plot_seasonal_decompose(result)
occidential_seasonal.update_layout(title='Occidential Seasonal Decomposition')


the 'freq'' keyword is deprecated, use 'period' instead



In [22]:
#Looking at trend and seasonality from time series
result = seasonal_decompose(energy_list, model='multiplicative', freq=30)
energy_seasonal = plot_seasonal_decompose(result)
energy_seasonal.update_layout(title='Energy Seasonal Decomposition')



the 'freq'' keyword is deprecated, use 'period' instead



Since data is not stationary we will preform log transformation to eliminate trend

In [23]:
occidential_log = np.log(occidential_list)
df_occidential_log = pd.DataFrame(occidential_log, columns=['occidential'])
df_occidential_log['date'] = df_occidential_close.index

In [24]:
energy_log = np.log(energy_list)
df_energy_log = pd.DataFrame(energy_log, columns=['energy_sector'])
df_energy_log['date'] = df_energy_close.index

Split data into train and test sets

In [25]:
occidential_train_data= pd.DataFrame(df_occidential_log.iloc[:int(df_occidential_log.shape[0]*0.8)])
occidential_test_data = pd.DataFrame(df_occidential_log.iloc[int(df_occidential_log.shape[0]*0.80):])
occidential_test_train_fig = go.Figure()
occidential_test_train_fig.add_trace(go.Scatter(x=occidential_train_data['date'], y=occidential_train_data['occidential'], name='Train'))
occidential_test_train_fig.add_trace(go.Scatter(x=occidential_test_data['date'], y=occidential_test_data['occidential'], name='Test'))
occidential_test_train_fig.update_layout(title='Occidential Test Train Data')

In [26]:
energy_train_data= df_energy_log.iloc[:int(df_energy_log.shape[0]*0.8)]
energy_test_data = df_energy_log.iloc[int(df_energy_log.shape[0]*0.80):]
energy_test_train_fig = go.Figure()
energy_test_train_fig.add_trace(go.Scatter(x=energy_train_data['date'], y=energy_train_data['energy_sector'], name='Train'))
energy_test_train_fig.add_trace(go.Scatter(x=energy_test_data['date'], y=energy_test_data['energy_sector'], name='Test'))
energy_test_train_fig.update_layout(title='Energy Test Train Data')

This Machine Learning Model will run a linear regression, ARIMA, and a Facebook Prophet Model
-some limitations of these models will be they do not take into accoutn current world sitations (like COVID impacts on the economy)

Linear Regression Model

In [27]:
df_linear_occidential = energy_sector_pd[['occidential', 'spstock']]

In [28]:
df_linear_energy = energy_sector_pd[['energy_sector', 'spstock']]

In [29]:
# for linear regression model we need an x_train value, and a y_train value
X_train, X_test, y_train, y_test = train_test_split(df_linear_occidential[['occidential']], df_linear_occidential[['spstock']], test_size=.2)

linear_regression_model = LinearRegression()
linear_regression_model.fit(X_train, y_train)

y_pred = pd.DataFrame(linear_regression_model.predict(X_test))

occidential_linear_mse = mean_squared_error(y_test, y_pred)
occidential_linear_mae = mean_absolute_error(y_test, y_pred)
occidential_linear_rmse = math.sqrt(mean_squared_error(y_test, y_pred))

In [30]:
occidential_linear_regression = go.Figure()
occidential_linear_regression.add_trace(go.Scatter(x=X_train['occidential'], y=y_train['spstock'], mode='markers', name='Train Data'))
occidential_linear_regression.add_trace(go.Scatter(x=X_test['occidential'], y=y_pred[0], name='Prediction'))
occidential_linear_regression.update_xaxes(type='linear')
occidential_linear_regression.update_yaxes(type='linear')
occidential_linear_regression.update_layout(title='occidential vs S&P Linear Regression')

In [31]:
# for linear regression model we need an x_train value, and a y_train value
X_train, X_test, y_train, y_test = train_test_split(df_linear_energy[['energy_sector']], df_linear_occidential[['spstock']], test_size=.2)

linear_regression_model = LinearRegression()
linear_regression_model.fit(X_train, y_train)

y_pred = pd.DataFrame(linear_regression_model.predict(X_test))

energy_linear_mse = mean_squared_error(y_test, y_pred)
energy_linear_mae = mean_absolute_error(y_test, y_pred)
energy_linear_rmse = math.sqrt(mean_squared_error(y_test, y_pred))

In [32]:
energy_linear_regression = go.Figure()
energy_linear_regression.add_trace(go.Scatter(x=X_train['energy_sector'], y=y_train['spstock'], mode='markers', name='Train Data'))
energy_linear_regression.add_trace(go.Scatter(x=X_test['energy_sector'], y=y_pred[0], name='Prediction'))
energy_linear_regression.update_xaxes(type='linear')
energy_linear_regression.update_yaxes(type='linear')
energy_linear_regression.update_layout(title='energy Sector vs S&P Linear Regression')

ARIMA Model

For occidential

In [33]:
#Modeling
arima_model = ARIMA(occidential_train_data['occidential'], order=(1,1,0))
arima_fitted = arima_model.fit()

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            2     M =           12

At X0         0 variables are exactly at the bounds

At iterate    0    f= -1.51045D+00    |proj g|=  6.30134D-03

At iterate    5    f= -1.51045D+00    |proj g|=  3.38396D-05

At iterate   10    f= -1.51045D+00    |proj g|=  4.44089D-08

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    2     10     14      1     0     0   4.441D-08  -1.510D+00
  F =  -1.5104470781339197     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             




statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:



 This problem is unconstrained.


In [34]:
#Forecast
arima_forecast, se, conf = arima_fitted.forecast(len(occidential_test_data), alpha=0.05)

arima_fc_series = pd.DataFrame(arima_forecast, index=occidential_test_data['date'])
lower_series = pd.DataFrame(conf[:,0], index=occidential_test_data['date'])
upper_series = pd.DataFrame(conf[:,1], index=occidential_test_data['date'])

In [35]:
#ARIMA Plot
occidential_arima = go.Figure()
occidential_arima.add_trace(go.Scatter(x=occidential_train_data['date'], y=occidential_train_data['occidential'], name='Train'))
occidential_arima.add_trace(go.Scatter(x=occidential_test_data['date'], y=occidential_test_data['occidential'], name='Test'))
occidential_arima.add_trace(go.Scatter(x=arima_fc_series.index, y=arima_fc_series[0], name='Forecast'))
occidential_arima.add_trace(go.Scatter(x=lower_series.index, y=lower_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Lower Bound'))
occidential_arima.add_trace(go.Scatter(x=upper_series.index, y=upper_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Upper Bound'))
occidential_arima.update_layout(title='Occidential ARIMA')

In [36]:
# ARIMA Model Statistics
occidential_arima_mse = mean_squared_error(occidential_test_data['occidential'], arima_forecast)
occidential_arima_mae = mean_absolute_error(occidential_test_data['occidential'], arima_forecast)
occidential_arima_rmse = math.sqrt(mean_squared_error(occidential_test_data['occidential'], arima_forecast))

ARIMA Model for energy Sector

In [37]:
#Modeling
arima_model = ARIMA(energy_train_data['energy_sector'], order=(2,1,2))
arima_fitted = arima_model.fit()



statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:



 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           12

At X0         0 variables are exactly at the bounds

At iterate    0    f= -2.20288D+00    |proj g|=  9.07531D-01

At iterate    5    f= -2.20315D+00    |proj g|=  1.41435D-01

At iterate   10    f= -2.20341D+00    |proj g|=  1.45793D-01

At iterate   15    f= -2.20344D+00    |proj g|=  2.34909D-02

At iterate   20    f= -2.20396D+00    |proj g|=  3.90003D-01

At iterate   25    f= -2.20460D+00    |proj g|=  3.43637D-02

At iterate   30    f= -2.20461D+00    |proj g|=  9.43885D-03

At iterate   35    f= -2.20462D+00    |proj g|=  8.98707D-02

At iterate   40    f= -2.20463D+00    |proj g|=  8.42615D-04

At iterate   45    f= -2.20463D+00    |proj g|=  9.18998D-04

At iterate   50    f= -2.20463D+00    |proj g|=  3.90799D-06

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cau

In [38]:
#Forecast
arima_forecast, se, conf = arima_fitted.forecast(len(energy_test_data), alpha=0.05)

arima_fc_series = pd.DataFrame(arima_forecast, index=energy_test_data['date'])
lower_series = pd.DataFrame(conf[:,0], index=energy_test_data['date'])
upper_series = pd.DataFrame(conf[:,1], index=energy_test_data['date'])

In [39]:
#ARIMA Plot
energy_arima = go.Figure()
energy_arima.add_trace(go.Scatter(x=energy_train_data['date'], y=energy_train_data['energy_sector'], name='Train'))
energy_arima.add_trace(go.Scatter(x=energy_test_data['date'], y=energy_test_data['energy_sector'], name='Test'))
energy_arima.add_trace(go.Scatter(x=arima_fc_series.index, y=arima_fc_series[0], name='Forecast'))
energy_arima.add_trace(go.Scatter(x=lower_series.index, y=lower_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Lower Bound'))
energy_arima.add_trace(go.Scatter(x=upper_series.index, y=upper_series[0], fill='tonexty', fillcolor='rgba(0,100,80,0.1)', name='Upper Bound'))
energy_arima.update_layout(title='Energy ARIMA')

In [40]:
# ARIMA Model Statistics
energy_arima_mse = mean_squared_error(energy_test_data['energy_sector'], arima_forecast)
energy_arima_mae = mean_absolute_error(energy_test_data['energy_sector'], arima_forecast)
energy_arima_rmse = math.sqrt(mean_squared_error(energy_test_data['energy_sector'], arima_forecast))

Facebook Prophet Model for occidential

In [41]:

prophet_data = pd.DataFrame()
prophet_data['y'] = df_occidential_log['occidential']
prophet_data['ds'] = energy_sector_pd.index


#train and validation
prophet_train = prophet_data[:int(prophet_data.shape[0]*0.80)]
prophet_test = prophet_data[int(prophet_data.shape[0]*0.80):]

#fit the model
prophet_model = Prophet(interval_width=0.95)
prophet_model.fit(prophet_train)

#predictions
close_prices = prophet_model.make_future_dataframe(periods=212)
forecast = prophet_model.predict(close_prices)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Initial log joint probability = -7.21267
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       1654.21     0.0197334       277.185           1           1      119   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       1676.33    0.00181512       231.343       1.555      0.1555      244   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     261       1679.87   0.000334334       184.906    3.14e-06       0.001      363  LS failed, Hessian reset 
     299       1680.46    0.00606306       79.2123           1           1      407   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       1691.54   0.000376197       68.1245      0.4234           1      517   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     464       1694.35   0.000491092    

In [42]:
forecast = forecast.set_index('ds')
forecast = forecast.rename(columns={'yhat':'Prediction'})
forecast = forecast[forecast.index.dayofweek < 5]
forecast = forecast.loc['2021-05-27':'2021-12-31']

In [43]:
#plot
occidential_prophet = go.Figure()
occidential_prophet.add_trace(go.Scatter(x=occidential_train_data['date'], y=occidential_train_data['occidential'], name='Train'))
occidential_prophet.add_trace(go.Scatter(x=occidential_test_data['date'], y=occidential_test_data['occidential'], name='Test'))
occidential_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['Prediction'], name='Forecast'))
occidential_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_lower'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Lower Bound'))
occidential_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_upper'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Upper Bound'))
occidential_prophet.update_layout(title='Occidential Prophet')

In [44]:
#Prophet Model Statistics
occidential_prophet_mse = mean_squared_error(occidential_test_data['occidential'], forecast['Prediction'])
occidential_prophet_mae = mean_absolute_error(occidential_test_data['occidential'], forecast['Prediction'])
occidential_prophet_rmse = math.sqrt(mean_squared_error(occidential_test_data['occidential'], forecast['Prediction']))

In [45]:
prophet_data = pd.DataFrame()
prophet_data['y'] = df_energy_log['energy_sector']
prophet_data['ds'] = energy_sector_pd.index

#train and validation
prophet_train = prophet_data[:int(prophet_data.shape[0]*0.80)]
prophet_test = prophet_data[int(prophet_data.shape[0]*0.80):]

#fit the model
prophet_model = Prophet(interval_width=0.95)
prophet_model.fit(prophet_train)

#predictions
close_prices = prophet_model.make_future_dataframe(periods=212)
forecast = prophet_model.predict(close_prices)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Initial log joint probability = -3.35406
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       2057.09     0.0282711       1477.63           1           1      119   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       2125.02    0.00478212       632.718           1           1      235   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       2167.85    0.00756497       306.395           1           1      351   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       2186.85    0.00316618       751.598      0.3417           1      459   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499       2204.28     0.0012435       181.079      0.8841      0.8841      581   
    Iter      log prob        ||dx||      ||grad||       alpha  

In [46]:
forecast = forecast.set_index('ds')
forecast = forecast.rename(columns={'yhat':'Prediction'})
forecast = forecast[forecast.index.dayofweek < 5]
forecast = forecast.loc['2021-05-27':'2021-12-31']

In [47]:
#plot
energy_prophet = go.Figure()
energy_prophet.add_trace(go.Scatter(x=energy_train_data['date'], y=energy_train_data['energy_sector'], name='Train'))
energy_prophet.add_trace(go.Scatter(x=energy_test_data['date'], y=energy_test_data['energy_sector'], name='Test'))
energy_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['Prediction'], name='Forecast'))
energy_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_lower'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Lower Bound'))
energy_prophet.add_trace(go.Scatter(x=forecast.index, y=forecast['yhat_upper'], fill='tonexty', fillcolor='rgba(0, 100, 80, 0.1)', name='Upper Bound'))
energy_prophet.update_layout(title='Energy Prophet')

In [48]:
#Prophet Model Statistics
energy_prophet_mse = mean_squared_error(energy_test_data['energy_sector'], forecast['Prediction'])
energy_prophet_mae = mean_absolute_error(energy_test_data['energy_sector'], forecast['Prediction'])
energy_prophet_rmse = math.sqrt(mean_squared_error(energy_test_data['energy_sector'], forecast['Prediction']))

In [84]:
cell_hover = {  # for row hover use <tr> instead of <td>
    'selector': 'td:hover',
    'props': [('background-color', '#ffffb3')]
}
index_names = {
    'selector': '.index_name',
    'props': 'font-style: italic; color: darkgrey; font-weight:normal;'
}
headers = {
    'selector': 'th:not(.index_name)',
    'props': 'background-color: #000066; color: white;'
}

In [85]:
energy_statistics = pd.DataFrame(index=['MSE', 'MAE','RMSE'])
energy_statistics['Linear'] = [energy_linear_mse, energy_linear_mae, energy_linear_rmse]
energy_statistics['ARIMA'] = [energy_arima_mse, energy_arima_mae, energy_arima_rmse]
energy_statistics['Prophet'] = [energy_prophet_mse, energy_prophet_mae, energy_prophet_rmse]
energy_statistics = energy_statistics.style.set_table_styles([cell_hover,index_names,headers])

In [86]:
occidential_statistics = pd.DataFrame(index=['MSE', 'MAE','RMSE'])
occidential_statistics['Linear'] = [occidential_linear_mse, occidential_linear_mae, occidential_linear_rmse]
occidential_statistics['ARIMA'] = [occidential_arima_mse, occidential_arima_mae, occidential_arima_rmse]
occidential_statistics['Prophet'] = [occidential_prophet_mse, occidential_prophet_mae, occidential_prophet_rmse]
occidential_statistics = occidential_statistics.style.set_table_styles([cell_hover,index_names,headers])

HTML Builder

In [72]:
style = '<link rel="stylesheet" href="style.css">'
header = '<h1> Final Project Overview </h1>'
energy_sector_header = '<h1> Energy Sector </h1>'
energy_sector_closing_blurb =''
energy_sector_data_blurb = ''
energy_sector_trend_blurb = ''
energy_sector_dist_blurb = ''
energy_sector_linear_blurb = ''
energy_sector_arima_blurb = ''
energy_sector_prophet_blurb = ''
energy_error_header = '<h3> Error Statistics </h3>'
occidential_sector_header = '<h1> Occidential Stock </h1>'
occidential_sector_closing_blurb =''
occidential_sector_data_blurb = ''
occidential_sector_trend_blurb = ''
occidential_sector_dist_blurb = ''
occidential_sector_linear_blurb = ''
occidential_sector_arima_blurb = ''
occidential_sector_prophet_blurb = ''
occidential_error_header = '<h3> Error Statistics </h3>'
sub_header = '<h2> An Exploratory Analysis of Stock Prediction </h2>'
topic_header = '<h3> Why this topic? </h3>'
topic_paragraph = '<p> We wanted to predict something relevant to the our economy. After analyzing various data sets, we decided we wanted to better understand the S&P 500. We each picked an individual sector and a corresponding stock in the S&P 500 to compute analysis and predictions over </p>'
data_header = '<h3> Data Exploration </h3>'
data_paragraph = '<p> The data contains daily closing prices of the S&P 500 from 2019 to 2020. The data was then broken down into an individual sector and a corresponding stock. These were: </p><ul><li> energy Sector: occidential </li><li> Consumer Staples Sector: Kellogg </li><li> Consumer Discretionary Sector: Nike </li><li> Energy Sector: Occidential Petroleum </li><li> Industrials Sector: CH Robinson <li></ul><p> Then to get a better understanding of the data three charts were made: a linear graph of the stock/sectors daily closing price history, a trend/seasonlity plot, and a distribution plot. These can be seen with descriptions on the corresponding stock/sector pages. </p>'
data_analysis_header = '<h3> Data Analysis </h3>'
data_analysis_paragraph = '<p> Our Selected topic entails using Machine Learning to predict future stock prices based on historical data. The Machine Learning models will take in stock data from the last three years to output prediction prices of the S&P 500. The models utilied were:</p><ul><li> Linear Regression: The purpose of viewing the linear regression model was to see how closely correlated each company’s stock or industry was related to the S&P (i.e. if the S&P increased would the company or industry also increase). </li><li> ARIMA: Autoregressive Integrated Moving Average. This is a statistical model that attempts to use past observations of the target variable to forecast its future values. Some limitations to the ARIMA model is that it has difficulty predicting turning points, it struggles with seasonality, and it performs well on short term forecasts but has poorer performance long term. </li><li> Facebook Prophet: The Facebook Prophet model is an additive regression model (like the ARIMA) but it includes growth trend and seasonal components. Some limitations of the Prophet model are it has a tendency to overfit the data, and it requires data to be in a specific format. </li></ul> '

In [79]:
nav_bar2 = '<div class="topnav"><a href="../overview.html"> Overview </a><div class="dropdown"><button class="dropbtn">Tech</button><div class="dropdown-content"><a href="tech_report.html">Tech Sector</a><a href="apple_report.html">Apple</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Staples</button><div class="dropdown-content"><a href="consumer_staples_report.html">Consumer Staples Sector</a><a href="kellogg_report.html">Kellogg</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Discretionary</button><div class="dropdown-content"><a href="consumer_discretionary_report.html">Consumer Discretionary Sector</a><a href="nike_report.html">Nike</a></div></div><div class="dropdown"><button class="dropbtn">Industrial</button><div class="dropdown-content"><a href="industrial_report.html">Industrial Sector</a><a href="ch_robinson_report.html">CH Robinson</a></div></div><div class="dropdown"><button class="dropbtn">Energy</button><div class="dropdown-content"><a href="energy_report.html">Energy Sector</a><a href="occidential_report.html">Occidential</a></div></div></div>'
nav_bar = '<div class="topnav"><a href="overview.html"> Overview </a><div class="dropdown"><button class="dropbtn">Tech</button><div class="dropdown-content"><a href="html_links/tech_report.html">Tech Sector</a><a href="html_links/apple_report.html">Apple</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Staples</button><div class="dropdown-content"><a href="html_links/consumer_staples_report.html">Consumer Staples Sector</a><a href="html_links/kellogg_report.html">Kellogg</a></div></div><div class="dropdown"><button class="dropbtn">Consumer Discretionary</button><div class="dropdown-content"><a href="html_links/consumer_discretionary_report.html">Consumer Discretionary Sector</a><a href="html_links/nike_report.html">Nike</a></div></div><div class="dropdown"><button class="dropbtn">Industrial</button><div class="dropdown-content"><a href="html_links/industrial_report.html">Industrial Sector</a><a href="html_links/ch_robinson_report.html">CH Robinson</a></div></div><div class="dropdown"><button class="dropbtn">Energy</button><div class="dropdown-content"><a href="html_links/energy_report.html">Energy Sector</a><a href="html_links/occidential_report.html">Occidential</a></div></div></div>'

In [80]:
content_overview = style + nav_bar + '<br>' + header + sub_header + '<br>' + topic_header + topic_paragraph + '<br>' + data_header + data_paragraph +'<br>' + data_analysis_header + data_analysis_paragraph
html_overview = content_overview
with open('overview_report.html', 'w+') as file: file.write(html_overview)

In [87]:
content_energy = style + nav_bar2 + '<br>' + energy_sector_header + '<br><div align="center">' + energy_closing_figure.to_html() + energy_sector_closing_blurb + '</div><br><div align="center>'+ energy_test_train_fig.to_html() + energy_sector_data_blurb + '</div><br><div align="center">'+ energy_linear_regression.to_html() + energy_sector_linear_blurb + '</div><br><div align="center">' + energy_arima.to_html() + energy_sector_arima_blurb + '</div><br><div align="center">' + energy_prophet.to_html() + energy_sector_prophet_blurb + '</div><br><div align="center>'+ energy_error_header + energy_statistics.to_html() + '</div>'
html_energy = content_energy
with open('energy_report.html', 'w+') as file: file.write(html_energy)

In [88]:
content_occidential = style + nav_bar2 + '<br>' + occidential_sector_header + '<br><div align="center">' + occidential_closing_figure.to_html() + occidential_sector_closing_blurb + '</div><br><div align="center>'+ occidential_test_train_fig.to_html() + occidential_sector_data_blurb + '</div><br><div align="center">'+ occidential_linear_regression.to_html() + occidential_sector_linear_blurb + '</div><br><div align="center">' + occidential_arima.to_html() + occidential_sector_arima_blurb + '</div><br><div align="center">' + occidential_prophet.to_html() + occidential_sector_prophet_blurb + '</div><br><div align="center>'+ occidential_error_header + occidential_statistics.to_html() + '</div>'
html_occidential = content_occidential
with open('occidential_report.html', 'w+') as file: file.write(html_occidential)