# Import Dependencies and Read Dataset

In [12]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
from prophet import Prophet
from matplotlib import pyplot as plt
from prophet.plot import plot_plotly, plot_components_plotly
import plotly.express as px
from neuralprophet import NeuralProphet
from neuralprophet import NeuralProphet, set_log_level
set_log_level("ERROR")
import time
from sqlalchemy import create_engine
from sqlalchemy.engine import URL
import psycopg2

from config import db_password
from config import user

In [2]:
# create db connection
db_string = f'postgresql://{user}:{db_password}@tokyose.ctcjkv5b73cy.us-east-1.rds.amazonaws.com:5432/tokyo_se'
engine = create_engine(db_string)
print('Database Connected')

Database Connected


In [3]:
# read price df
query = 'SELECT * FROM prices_table'
df = pd.read_sql(query,engine)
df.head()

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,SupervisionFlag,Target
0,20211021_8699,2021-10-21,8699,1033.0,1065.0,1033.0,1045.0,220500,False,-0.022663
1,20211021_8706,2021-10-21,8706,795.0,799.0,788.0,788.0,36100,False,-0.008883
2,20211021_8707,2021-10-21,8707,1409.0,1411.0,1396.0,1400.0,48500,False,-0.028139
3,20211021_8708,2021-10-21,8708,1093.0,1110.0,1080.0,1080.0,54700,False,0.011101
4,20211021_8713,2021-10-21,8713,1220.0,1240.0,1218.0,1227.0,49200,False,-0.020559


# Preprocess Dataset

In [4]:
df_clean = df.drop(columns=['RowId','SupervisionFlag','Target'], axis=1)
df_clean.head()

Unnamed: 0,Date,SecuritiesCode,Open,High,Low,Close,Volume
0,2021-10-21,8699,1033.0,1065.0,1033.0,1045.0,220500
1,2021-10-21,8706,795.0,799.0,788.0,788.0,36100
2,2021-10-21,8707,1409.0,1411.0,1396.0,1400.0,48500
3,2021-10-21,8708,1093.0,1110.0,1080.0,1080.0,54700
4,2021-10-21,8713,1220.0,1240.0,1218.0,1227.0,49200


In [5]:
# df_clean = df_clean.dropna()

In [6]:
df_close = df_clean.drop(columns=['Open','High','Low','Volume'],axis=1)
df_close.head()

Unnamed: 0,Date,SecuritiesCode,Close
0,2021-10-21,8699,1045.0
1,2021-10-21,8706,788.0
2,2021-10-21,8707,1400.0
3,2021-10-21,8708,1080.0
4,2021-10-21,8713,1227.0


In [7]:
df_grouped = df_clean.groupby(['SecuritiesCode','Date'])['Close'].sum()

In [8]:
# split securities codes from df into iterable list
df_grouped = df_grouped.to_frame().reset_index()
itemlist = df_clean.SecuritiesCode.unique()



In [9]:
# create df to store forecasts
forecast_all = pd.DataFrame()  

# Initiate the NeuralProphet model

In [11]:
count=0
for i in itemlist:
    temp = df_grouped[df_grouped.SecuritiesCode == i]
    temp = temp.drop(columns=['SecuritiesCode'])
    temp['Date'] = pd.to_datetime(temp['Date'])
    temp = temp.set_index('Date')
    d_df = temp.resample('D').sum()
    d_df = d_df.reset_index().dropna()
    d_df.columns = ['ds','y']
  
    m = NeuralProphet(
#         n_changepoints=10,
#         changepoints_range=0.8,
#         yearly_seasonality=True,
#         weekly_seasonality=True,
#         num_hidden_layers=50,
        epochs=50,
        learning_rate=1.0,
    )
  
    metrics = m.fit(d_df, freq="D")
    
    future = m.make_future_dataframe(d_df, periods=56, n_historic_predictions=len(df_grouped))
    forecast = m.predict(future)
    forecast['SecuritiesCode'] = i
    forecast_all = pd.concat((forecast_all, forecast))
   
    count +=1
    print(f'Securities Code: {i}, Loop Iteration: {count}')


                                                                                                                       

Securities Code: 8699, Loop Iteration: 1


                                                                                                                       

Securities Code: 8706, Loop Iteration: 2


                                                                                                                       

Securities Code: 8707, Loop Iteration: 3


                                                                                                                       

Securities Code: 8708, Loop Iteration: 4


                                                                                                                       

Securities Code: 8713, Loop Iteration: 5


                                                                                                                       

Securities Code: 8714, Loop Iteration: 6


                                                                                                                       

Securities Code: 8715, Loop Iteration: 7


                                                                                                                       

Securities Code: 8725, Loop Iteration: 8


                                                                                                                       

Securities Code: 8739, Loop Iteration: 9


                                                                                                                       

Securities Code: 8750, Loop Iteration: 10


                                                                                                                       

Securities Code: 8766, Loop Iteration: 11


                                                                                                                       

Securities Code: 8771, Loop Iteration: 12


                                                                                                                       

Securities Code: 8772, Loop Iteration: 13


                                                                                                                       

Securities Code: 8793, Loop Iteration: 14


                                                                                                                       

Securities Code: 8795, Loop Iteration: 15


                                                                                                                       

Securities Code: 8798, Loop Iteration: 16


                                                                                                                       

Securities Code: 8801, Loop Iteration: 17


                                                                                                                       

Securities Code: 8802, Loop Iteration: 18


                                                                                                                       

Securities Code: 8803, Loop Iteration: 19


                                                                                                                       

Securities Code: 8804, Loop Iteration: 20


                                                                                                                       

Securities Code: 8806, Loop Iteration: 21


                                                                                                                       

Securities Code: 8818, Loop Iteration: 22


                                                                                                                       

Securities Code: 8830, Loop Iteration: 23


                                                                                                                       

Securities Code: 8841, Loop Iteration: 24


                                                                                                                       

Securities Code: 8842, Loop Iteration: 25


                                                                                                                       

Securities Code: 8844, Loop Iteration: 26


                                                                                                                       

Securities Code: 8848, Loop Iteration: 27


                                                                                                                       

Securities Code: 8850, Loop Iteration: 28


                                                                                                                       

Securities Code: 8860, Loop Iteration: 29


                                                                                                                       

Securities Code: 8864, Loop Iteration: 30


                                                                                                                       

Securities Code: 8869, Loop Iteration: 31


                                                                                                                       

Securities Code: 8871, Loop Iteration: 32


                                                                                                                       

Securities Code: 8876, Loop Iteration: 33


                                                                                                                       

Securities Code: 8877, Loop Iteration: 34


                                                                                                                       

Securities Code: 8881, Loop Iteration: 35


                                                                                                                       

KeyboardInterrupt: 

In [None]:
forecast_all.tail()

# Clean Forecast Results

In [None]:
# read price df
df = forecast_all
# query = 'SELECT * FROM neural_prophet_model__full_results'
# df = pd.read_sql(query,engine)
df.head()

In [None]:
df.info()

In [None]:
df_clean = df.loc[:, ~df.columns.str.startswith('residual')]
df_clean = df_clean.loc[:, ~df_clean.columns.str.startswith('ar')]
df_clean = df_clean.loc[:, ~df_clean.columns.str.startswith('season')]
df_clean = df_clean.loc[:, ~df_clean.columns.str.startswith('trend')]
df_clean = df_clean.loc[:, ~df_clean.columns.str.startswith('event')]
df_clean.head()

In [None]:
# df_clean = df_clean[df_clean['y'] > 0]
# df_clean.head()

In [None]:
df_clean.info()

In [None]:
# export cleaned forecast results to AWS
df_clean.to_sql('neural_prophet_model__full_results',engine,if_exists='replace',index=False)     

## Calculate Sharpe Ratio

In [None]:
# getting close prices for all SecuritiesCodes
close_prices = df_clean[['SecuritiesCode','ds','yhat1']]
close_prices['Close'] = close_prices['yhat1']
close_prices['Date'] = close_prices['ds']
close_prices = close_prices.drop(columns=['yhat1','ds'],axis=1)
prices_target = close_prices.pivot_table(index='Date', columns='SecuritiesCode', values='Close')
prices_target.head()

In [None]:
# first 20 values of SecuritiesCodes for every time step(1202).
import numpy as np
first_20 = []
for i in range(0,len(prices_target.index),1):
      first_20.append(-np.sort(-prices_target.iloc[i, :].values)[:20])
prices_target_first_20 = pd.DataFrame(first_20,index=prices_target.index)
prices_target_first_20.head()

In [None]:
# last 20 values of SecuritiesCodes for every time step(1202).
last_20 = []
for i in range(0,len(prices_target.index),1):
      last_20.append(np.sort(prices_target.iloc[i, :].values)[0:20])
prices_target_last_20 = pd.DataFrame(last_20,index=prices_target.index)
prices_target_last_20.head()

In [None]:
# calculate weights
weights = np.linspace(start=2, stop=1, num=20)
weights

In [None]:
# calculate weighted top stocks
Sup=((prices_target_first_20 * weights).sum(axis = 1))/np.mean(weights)
Sup

In [None]:
# calculate weighted bottom stocks
Sdown=((prices_target_last_20 * weights).sum(axis = 1))/np.mean(weights)
Sdown

In [None]:
# calculate daily spread return
daily_spread_return = (Sup - Sdown).mean()/(Sup - Sdown).std()
daily_spread_return

In [None]:
# calculate true top 20 stocks
count_SecuritiesCode_first_20 = prices_target.isin(prices_target_first_20.values.flatten())
True_first_20 = (count_SecuritiesCode_first_20.apply(pd.Series.value_counts, axis=0).fillna(0).iloc[1:2]).T
True_first_20 = True_first_20.add_prefix('first_20_')

In [None]:
# calculate true bottom 20 stocks
count_SecuritiesCode_last_20 = prices_target.isin(prices_target_last_20.values.flatten())
True_last_20 = (count_SecuritiesCode_last_20.apply(pd.Series.value_counts, axis=0).fillna(0).iloc[1:2]).T
True_last_20 = True_last_20.add_prefix('last_20_')

In [None]:
Count = pd.concat([True_first_20,True_last_20],axis=1)

In [None]:
SecuritiesCode_weight = (Count/len(prices_target.index)).sum(axis=1)/2

In [None]:
df_weight = pd.DataFrame(SecuritiesCode_weight,columns=["Weight"]).reset_index()

In [None]:
# plot securities code weights
plt.figure(figsize=(10, 6), dpi=80)
plt.xticks(color='cyan')
plt.yticks(color='cyan')
plt.scatter(x=SecuritiesCode_weight.index,y=SecuritiesCode_weight)

In [None]:
# sort top ranked
Count.reset_index
Count.columns
top_ranked = Count.sort_values(by=['first_20_True'],ascending=False).iloc[0:20]
top_ranked

# Top 20 Highest Performing Stocks

In [None]:
top_ranked = top_ranked.drop(columns='last_20_True',axis=1)
top_ranked.to_csv('../Outputs/neural_prophet_top_ranked.csv')


In [None]:
prices_target.reset_index(inplace=True)
target_dates = prices_target.loc[prices_target['Date'] >= '2021-12-06']
target_dates.head()

In [None]:
ranks = top_ranked.SecuritiesCode.unique()

In [None]:
predictions = pd.DataFrame(target_dates['Date'])
for i in ranks:
    predictions[i] = target_dates[i]

In [None]:
predictions

In [None]:
predictions.to_csv('neuralprophet_top_predictions.csv')

### Kaggle Competition Code

In [None]:
trading_dates = np.array(sorted(close_prices['Date'].unique()))
stock_ids = np.array(sorted(close_prices['SecuritiesCode'].unique()))

temp_mat = pd.DataFrame(np.nan, index=stock_ids, columns=trading_dates)

def create_factor(item, temp_mat=temp_mat):
    output_mat = pd.pivot_table(close_prices,
                                values=item,
                                index='SecuritiesCode', columns='Date')
    
    output_factor = temp_mat.copy()
    output_factor.loc[output_mat.index, output_mat.columns] = output_mat.values

    return output_factor

close_factor = create_factor('Close')
rtn_mat = close_factor.pct_change(1,axis=1)

In [None]:
reversal_5d = -np.log(rtn_mat.T+1).rolling(5).sum().T

### Kaggle submission testing

In [None]:
# import jpx_tokyo_market_prediction
# env = jpx_tokyo_market_prediction.make_env()
# iter_test = env.iter_test()

# counter = 0


# for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
#     pred_dt = prices['Date'][0]
#     print(" => {} Now generating ranking at date [{}]".format(counter+1, pred_dt))
#     pred_stocks = sample_prediction['SecuritiesCode'].values
#     pred_stocks = pred_stocks.fillna(method='bfill')
#     #### Generating the Ranking Signal ####
#     # access the predict 
#     signal = reversal_5d.loc[pred_stocks,pred_dt]
#     # convert to rank
#     pred_rank = signal.rank(method='first').astype(int)-1
    
#     # assign the rank score: 0 -> 1999
#     sample_prediction['Rank'] = sample_prediction['SecuritiesCode'].map(pred_rank.to_dict()) 
    
#     #### upload prediction ####
#     submission_df = sample_prediction[["Date","SecuritiesCode","Rank"]]
#     env.predict(sample_prediction)
#     ########################
    
#     counter+=1