In [1]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics

In [2]:
df = pd.read_csv("ULTAAllDates.csv")

In [3]:
df.fillna(0)

Unnamed: 0,date,open,high,low,close,volume
0,1/1/2016,0.00,0.00,0.0000,0.00,0.0
1,1/2/2016,0.00,0.00,0.0000,0.00,0.0
2,1/3/2016,0.00,0.00,0.0000,0.00,0.0
3,1/4/2016,182.67,184.60,180.4550,181.81,1196625.0
4,1/5/2016,182.57,183.88,180.7300,182.67,567952.0
...,...,...,...,...,...,...
1491,1/31/2020,274.27,274.27,266.7375,267.91,730840.0
1492,2/1/2020,0.00,0.00,0.0000,0.00,0.0
1493,2/2/2020,0.00,0.00,0.0000,0.00,0.0
1494,2/3/2020,273.00,280.00,272.1600,274.96,947958.0


In [4]:
df['date'] = pd.to_datetime(df['date'])

In [5]:
df['sum_h_l'] = df['high'] + df['low']

INFO:numexpr.utils:Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [6]:
df.head()

Unnamed: 0,date,open,high,low,close,volume,sum_h_l
0,2016-01-01,,,,,,
1,2016-01-02,,,,,,
2,2016-01-03,,,,,,
3,2016-01-04,182.67,184.6,180.455,181.81,1196625.0,365.055
4,2016-01-05,182.57,183.88,180.73,182.67,567952.0,364.61


In [7]:
df['average price'] = df['sum_h_l']/2

In [8]:
df.head()

Unnamed: 0,date,open,high,low,close,volume,sum_h_l,average price
0,2016-01-01,,,,,,,
1,2016-01-02,,,,,,,
2,2016-01-03,,,,,,,
3,2016-01-04,182.67,184.6,180.455,181.81,1196625.0,365.055,182.5275
4,2016-01-05,182.57,183.88,180.73,182.67,567952.0,364.61,182.305


In [9]:
df.rename(columns = {'date':'ds'}, inplace = True)
df.head()

Unnamed: 0,ds,open,high,low,close,volume,sum_h_l,average price
0,2016-01-01,,,,,,,
1,2016-01-02,,,,,,,
2,2016-01-03,,,,,,,
3,2016-01-04,182.67,184.6,180.455,181.81,1196625.0,365.055,182.5275
4,2016-01-05,182.57,183.88,180.73,182.67,567952.0,364.61,182.305


In [10]:
df_avg_price = df[['ds', 'average price']].copy()
df_avg_price

Unnamed: 0,ds,average price
0,2016-01-01,
1,2016-01-02,
2,2016-01-03,
3,2016-01-04,182.52750
4,2016-01-05,182.30500
...,...,...
1491,2020-01-31,270.50375
1492,2020-02-01,
1493,2020-02-02,
1494,2020-02-03,276.08000


In [11]:
df_avg_price.rename(columns = {'average price':'y'}, inplace = True)

In [12]:
black_friday = pd.DataFrame({
  'holiday': 'black-friday',
  'ds': pd.to_datetime(['2016-11-25','2017-11-24', '2018-11-23', '2019-11-29',
                        '2020-11-27','2021-11-26']),
  'lower_window': 0,
  'upper_window': 2,
})

In [13]:
cyber_monday = pd.DataFrame({
  'holiday': 'cyber_monday',
  'ds': pd.to_datetime(['2016-11-28','2017-11-27', '2018-11-26', '2019-12-02',
                        '2020-11-30','2021-11-29']),
  'lower_window': 0,
  'upper_window': 7,
})

In [14]:
holidays = pd.concat((black_friday, cyber_monday))

In [15]:
m = Prophet(holidays = holidays,
            yearly_seasonality=True,
            weekly_seasonality=True,
            daily_seasonality=True)

In [16]:
m.add_country_holidays(country_name='US')

<fbprophet.forecaster.Prophet at 0x15a9f51df88>

In [17]:
m.fit(df_avg_price);
future = m.make_future_dataframe(periods=1000)

In [18]:
forecast = m.predict(future)

In [19]:
forecast.head()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,Christmas Day,Christmas Day_lower,Christmas Day_upper,Christmas Day (Observed),...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2016-01-01,177.101298,123.431781,166.431333,177.101298,177.101298,0.0,0.0,0.0,0.0,...,-0.710874,-0.710874,-0.710874,-16.446515,-16.446515,-16.446515,0.0,0.0,0.0,145.331903
1,2016-01-02,177.41134,128.7864,169.889297,177.41134,177.41134,0.0,0.0,0.0,0.0,...,1.826501,1.826501,1.826501,-15.087459,-15.087459,-15.087459,0.0,0.0,0.0,149.538377
2,2016-01-03,177.721382,131.142346,172.757433,177.721382,177.721382,0.0,0.0,0.0,0.0,...,1.826501,1.826501,1.826501,-13.683237,-13.683237,-13.683237,0.0,0.0,0.0,151.25264
3,2016-01-04,178.031424,129.591005,171.597452,178.031424,178.031424,0.0,0.0,0.0,0.0,...,-0.864171,-0.864171,-0.864171,-12.250188,-12.250188,-12.250188,0.0,0.0,0.0,150.30506
4,2016-01-05,178.341466,131.997007,171.98734,178.341466,178.341466,0.0,0.0,0.0,0.0,...,-0.240467,-0.240467,-0.240467,-10.805241,-10.805241,-10.805241,0.0,0.0,0.0,152.683753


In [20]:
df_cv_avg_price = cross_validation(m, initial='366 days', period='92 days', horizon = '365 days')
df_cv_avg_price.head()

INFO:fbprophet:Making 9 forecasts with cutoffs between 2017-01-29 00:00:00 and 2019-02-04 00:00:00


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper,y,cutoff
0,2017-01-30,259.690487,253.607329,265.307724,269.335,2017-01-29
1,2017-01-31,258.231312,252.590444,264.069376,269.785,2017-01-29
2,2017-02-01,257.243384,250.963598,263.140837,271.495,2017-01-29
3,2017-02-02,256.232913,250.927226,262.46428,269.145,2017-01-29
4,2017-02-03,256.057574,250.52483,261.845827,272.675,2017-01-29


In [23]:
df_cv_avg_price.tail()

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper,y,cutoff
2257,2020-01-29,376.776428,317.863703,435.405681,273.4,2019-02-04
2258,2020-01-30,375.964165,318.175537,436.557253,272.575,2019-02-04
2259,2020-01-31,376.335777,317.223752,432.818092,270.50375,2019-02-04
2260,2020-02-03,374.124507,315.008595,433.157772,276.08,2019-02-04
2261,2020-02-04,374.005931,315.563002,433.627642,276.37,2019-02-04


In [21]:
df_p_avg_price = performance_metrics(df_cv_avg_price)
df_p_avg_price.head()

Unnamed: 0,horizon,mse,rmse,mae,mape,coverage
0,37 days,503.792377,22.44532,18.496017,0.071247,0.297724
1,38 days,551.926084,23.493107,19.246827,0.074021,0.28445
2,39 days,603.069209,24.557467,20.006262,0.076844,0.273072
3,40 days,645.456253,25.405831,20.571995,0.079212,0.267383
4,41 days,679.118192,26.059896,21.049903,0.081354,0.265487


In [22]:
df_p_avg_price.to_csv('StockProjectionsPerformanceMetrics.csv')