In [2]:
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from prophet.diagnostics import *
from itertools import combinations
from IPython.display import display_html
from statsmodels.tsa.stattools import acf, pacf, ccf, adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [3]:
trainPrices = pd.read_csv("./train_files/stock_prices.csv", parse_dates=['Date'])
testPrices = pd.read_csv("./supplemental_files/stock_prices.csv", parse_dates=['Date'])

In [4]:
def prepare_data(df):
    df['AdjustmentFactor'].fillna(1.0, inplace=True)
    df.sort_values(by=['SecuritiesCode','Date'], inplace=True)
    df['Open'].interpolate(inplace=True)
    df['High'].interpolate(inplace=True)
    df['Low'].interpolate(inplace=True)
    df['Close'].interpolate(inplace=True)
    df.loc[df['Volume'] == 0,"Volume"] = np.nan
    df['Volume'].interpolate(inplace=True)
    df['Target'].interpolate(inplace=True)
    for col in ['Open', 'High', 'Low', 'Close']:
        df[str(col) + 'Adjusted'] = df[col].values * df['AdjustmentFactor'].values
    
    return df

In [5]:
trainPrices = prepare_data(trainPrices)
testPrices = prepare_data(testPrices)

In [6]:
Code = 9007
STOCK = trainPrices[trainPrices.SecuritiesCode==Code].set_index("Date")
TEST = testPrices[testPrices.SecuritiesCode==Code].set_index("Date")

In [9]:
STOCK['diffClose'] = STOCK['Close'].diff(periods=1)/STOCK['Close']
STOCK['diffHigh'] = STOCK['High'].diff(periods=1)/STOCK['High']
STOCK['diffHighShift1'] = STOCK['diffHigh'].shift(-1)
STOCK['diffLow'] = STOCK['Low'].diff(periods=1)/STOCK['Low']
STOCK['diffLowShift1'] = STOCK['Low'].diff(periods=1).shift(-1)
STOCK['diffOpen'] = STOCK['Open'].diff(periods=1)/STOCK['Open']
STOCK['diffCloseShift1'] = (STOCK['Close'].diff(periods=1)/STOCK['Close']).shift(-1)
STOCK['pctDailyChange'] = (STOCK['Close'] - STOCK['Open'])/STOCK['Close']
STOCK['diffVolume'] = STOCK['Volume'].diff()/STOCK['Volume']

In [10]:
STOCK

Unnamed: 0_level_0,RowId,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,...,diffHigh,diffHighShift1,diffLow,diffLowShift1,diffOpen,diffCloseShift1,pctDailyChange,pctVolumeChange,diffVolume,diffClose
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-04,20170104_9007,9007,2324.0,2362.0,2318.0,2361.0,670700.0,1.0,,False,...,,0.010888,,36.0,,0.005057,0.015671,,,
2017-01-05,20170105_9007,9007,2362.0,2388.0,2354.0,2373.0,616400.0,1.0,,False,...,0.010888,0.014851,0.015293,4.0,0.016088,0.015761,0.004635,-0.088092,-0.088092,0.005057
2017-01-06,20170106_9007,9007,2366.0,2424.0,2358.0,2411.0,775700.0,1.0,,False,...,0.014851,-0.006645,0.001696,-17.0,0.001691,-0.024649,0.018664,0.205363,0.205363,0.015761
2017-01-10,20170110_9007,9007,2408.0,2408.0,2341.0,2353.0,814300.0,1.0,,False,...,-0.006645,-0.013895,-0.007262,8.0,0.017442,0.003388,-0.023374,0.047403,0.047403,-0.024649
2017-01-11,20170111_9007,9007,2360.0,2375.0,2349.0,2361.0,445200.0,1.0,,False,...,-0.013895,-0.005930,0.003406,-37.0,-0.020339,-0.018111,0.000424,-0.829066,-0.829066,0.003388
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-29,20211129_9007,9007,2170.0,2185.0,2124.0,2130.0,1263500.0,1.0,,False,...,-0.035698,0.024118,-0.039077,48.0,-0.042857,0.026063,-0.018779,0.409735,0.409735,-0.038967
2021-11-30,20211130_9007,9007,2176.0,2239.0,2172.0,2187.0,2712400.0,1.0,,False,...,0.024118,-0.015420,0.022099,-31.0,0.002757,-0.006906,0.005030,0.534176,0.534176,0.026063
2021-12-01,20211201_9007,9007,2170.0,2205.0,2141.0,2172.0,905800.0,1.0,,False,...,-0.015420,-0.018946,-0.014479,-9.0,-0.002765,-0.011644,0.000921,-1.994480,-1.994480,-0.006906
2021-12-02,20211202_9007,9007,2149.0,2164.0,2132.0,2147.0,752500.0,1.0,,False,...,-0.018946,0.024786,-0.004221,31.0,-0.009772,0.029385,-0.000932,-0.203721,-0.203721,-0.011644


In [None]:
STOCK[['diffLowShift1', 'diffLow']].corr()

In [None]:
STOCK['diffClose'].dropna() #.reset_index()

In [None]:
cacf(STOCK['diffClose'].dropna(), STOCK['pctDailyChange'][:-1])

In [None]:
adfuller(STOCK['diffClose'].dropna())

In [None]:
diff1 = diff1.dropna()/STOCK['Close'][1:]
diff1

In [None]:
plt.plot(STOCK['pctDailyChange'])

In [None]:
plot_pacf(STOCK['diffLow'].dropna(), lags=10)

In [None]:
plt.figure(figsize=(15,7))
top = plt.subplot2grid((4,4), (0, 0), rowspan=3, colspan=4)
bottom = plt.subplot2grid((4,4), (3,0), rowspan=1, colspan=4)
top.plot(STOCK.index, STOCK.Close, label="Train set")
top.plot(TEST.index,TEST.Close, color="magenta", label="Test set")
bottom.bar(STOCK.index, STOCK.Volume)
bottom.bar(TEST.index, TEST.Volume, color="magenta")
top.legend(bbox_to_anchor=(1.01, 1., 0.11, 0.), loc='upper right', borderaxespad=0.)
 
# set the labels
top.axes.xaxis.set_ticklabels([])
top.set_title(Code)
top.grid(True)
top.set_ylabel('Closing Price')
bottom.set_ylabel('Volume')
bottom.grid(True);

In [None]:
fut = pd.concat([STOCK,TEST])

In [None]:
from fbprophet import Prophet

def train_ph_model(df):
    m = Prophet(daily_seasonality=True, changepoint_prior_scale=0.5, seasonality_prior_scale=0.01)
    m.add_seasonality(name='monthly', period=21, fourier_order=5)
    ph_df = df[['CloseAdjusted','Date']].copy()
    ph_df.rename(columns={'CloseAdjusted': 'y', 'Date': 'ds'}, inplace=True)
    m.fit(ph_df)
    return m

In [None]:
from fbprophet import Prophet
from fbprophet.make_holidays import make_holidays_df

year_list = [2017, 2018, 2019, 2020, 2021, 2022]
holidays = make_holidays_df(year_list=year_list, country='JP')

def train_ph_model(df):
    m = Prophet(holidays=holidays,
                daily_seasonality=False,
                changepoint_prior_scale=0.5,
                seasonality_prior_scale=0.01)
    m.add_regressor('diffClose')
    m.add_seasonality(name='monthly', period=21, fourier_order=5)
    ph_df = df[['diffClose','Close','Date']].copy()
    ph_df.rename(columns={'Close': 'y', 'Date': 'ds'}, inplace=True)
    m.fit(ph_df)
    return m

In [None]:
ph_train = fut[['diffClose','Close']][w:len(STOCK)].reset_index()
m = train_ph_model(ph_train)

In [None]:
future_prices = m.make_future_dataframe(periods=87, freq='d')
future_prices = future_prices[future_prices.ds.dt.dayofweek < 5].dropna()
future_prices

In [None]:
forecast = m.predict(future_prices)
result = TEST[['Close']].join(forecast[['ds','yhat','yhat_lower','yhat_upper']].set_index("ds"),how='left')
display_html(result)

In [None]:
# Generate diagonal line to plot.
rho = np.corrcoef(result.Close,result.yhat)
fig, ax = plt.subplots(figsize=(8,8))
d_x = np.linspace(start=TEST.Close.min() - 1, stop=TEST.Close.max() + 1, num=100)
sns.regplot(x=result.Close, y=result.yhat, color='magenta', label='test', ax=ax)
sns.lineplot(x=d_x, y=d_x, dashes={'linestyle': ''}, color='blue', ax=ax)
ax.lines[1].set_linestyle('--')
ax.legend()
ax.set(title=f'Test Data vs Predictions - Corr = {np.corrcoef(result.Close,result.yhat)[0,1]:.3f}');

In [None]:
fig = m.plot(forecast)
plt.title(f"{Code} Stock Price Forecast", fontsize=16)
plt.xlabel("Date", fontsize=12)
plt.ylabel("Close Price", fontsize=12)
plt.axvline(TEST.index[0], color='red', linestyle='--')
plt.plot(TEST.index, TEST.Close,  marker="o", markersize=3, color="magenta", linewidth=0, label="Test Close")
plt.show()

In [None]:
def Hyperparameter_tuning(df):
    param_grid = {  
        'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
        'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
        'seasonality_mode': ['additive', 'multiplicative'],
        
    }

    # Generate all combinations of parameters
    all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
    rmses = []  # Store the RMSEs for each params here

    # Use cross validation to evaluate all parameters
    year_list = [2017, 2018, 2019, 2020, 2021, 2022]
    holidays = make_holidays_df(year_list=year_list, country='JP')

    for params in all_params:
        m = Prophet(holidays=holidays,
                    daily_seasonality=False,
                    **params)
        m.add_regressor('Cl_lr')
        m.add_seasonality(name='monthly', period=21, fourier_order=5)
        ph_df = df[['Close','Date','Cl_lr']].copy()
        ph_df.rename(columns={'Close': 'y', 'Date': 'ds'}, inplace=True)
        m.fit(ph_df)
        df_cv = cross_validation(m, horizon='30 days', parallel="processes")
        df_p = performance_metrics(df_cv, rolling_window=1)
        rmses.append(df_p['rmse'].values[0])

    # Find the best parameters
    tuning_results = pd.DataFrame(all_params)
    tuning_results['rmse'] = rmses
    print(tuning_results)

In [None]:
Hyperparameter_tuning(fut[["Close","Cl_lr"]][w:len(STOCK)].reset_index())

In [None]:
# Create Future dates
m = train_ph_model(STOCK.reset_index().copy())
future_prices = m.make_future_dataframe(periods=90, freq='d')

In [None]:
forecast = m.predict(future_prices)
result = TEST[["CloseAdjusted"]].join(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].set_index("ds"), how='left')
display_html(result)

In [None]:
fig = m.plot(forecast)
plt.title(f"{Code} Stock Price Forecast", fontsize=16)
plt.xlabel("Date", fontsize=12)
plt.ylabel("Close Price", fontsize=12)
plt.axvline(TEST.index[0], color='red', linestyle='--')
plt.plot(TEST.index, TEST.Close,  marker="o", markersize=3, color="magenta", linewidth=0, label="Test Close")
plt.show()

In [None]:
fig2 = m.plot_components(forecast)