# Import Dependencies and Read Dataset

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
from prophet import Prophet
from matplotlib import pyplot as plt
from prophet.plot import plot_plotly, plot_components_plotly
import plotly.express as px
from neuralprophet import NeuralProphet
from neuralprophet import NeuralProphet, set_log_level
set_log_level("ERROR")
import time
from sqlalchemy import create_engine
from sqlalchemy.engine import URL
import psycopg2

from config import db_password
from config import user

In [2]:
# create db connection
db_string = f'postgresql://{user}:{db_password}@tokyose.ctcjkv5b73cy.us-east-1.rds.amazonaws.com:5432/tokyo_se'
engine = create_engine(db_string)
print('Database Connected')

Database Connected


In [3]:
# read price df
query = 'SELECT * FROM prices_table'
df = pd.read_sql(query,engine)
df.head()

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,SupervisionFlag,Target
0,20190806_9434,2019-08-06,9434,1456.5,1487.0,1455.0,1479.5,25735000,False,0.011183
1,20190806_9435,2019-08-06,9435,23470.0,24360.0,23290.0,24240.0,117800,False,-0.018868
2,20190806_9436,2019-08-06,9436,3440.0,3475.0,3405.0,3450.0,20300,False,-0.002857
3,20190806_9438,2019-08-06,9438,740.0,746.0,727.0,744.0,80200,False,-0.002725
4,20190806_9441,2019-08-06,9441,4170.0,4240.0,4030.0,4235.0,3000,False,-0.009512


# Preprocess Dataset

In [4]:
df_clean = df.drop(columns=['RowId','SupervisionFlag','Target'], axis=1)
df_clean.head()

Unnamed: 0,Date,SecuritiesCode,Open,High,Low,Close,Volume
0,2019-08-06,9434,1456.5,1487.0,1455.0,1479.5,25735000
1,2019-08-06,9435,23470.0,24360.0,23290.0,24240.0,117800
2,2019-08-06,9436,3440.0,3475.0,3405.0,3450.0,20300
3,2019-08-06,9438,740.0,746.0,727.0,744.0,80200
4,2019-08-06,9441,4170.0,4240.0,4030.0,4235.0,3000


In [5]:
df_clean = df_clean.dropna()

In [6]:
df_close = df_clean.drop(columns=['Open','High','Low','Volume'],axis=1)
df_close.head()

Unnamed: 0,Date,SecuritiesCode,Close
0,2019-08-06,9434,1479.5
1,2019-08-06,9435,24240.0
2,2019-08-06,9436,3450.0
3,2019-08-06,9438,744.0
4,2019-08-06,9441,4235.0


In [7]:
df_grouped = df_clean.groupby(['SecuritiesCode','Date'])['Close'].sum()

In [8]:
# split securities codes from df into iterable list
df_grouped = df_grouped.to_frame().reset_index()
itemlist = df_clean.SecuritiesCode.unique()

In [9]:
# create df to store forecasts
forecast_all = pd.DataFrame()  

# Initiate the NeuralProphet model

In [10]:
count=0
for i in itemlist:
    temp = df_grouped[df_grouped.SecuritiesCode == i]
    temp = temp.drop(columns=['SecuritiesCode'])
    temp['Date'] = pd.to_datetime(temp['Date'])
    temp = temp.set_index('Date')
    d_df = temp.resample('D').sum()
    d_df = d_df.reset_index().dropna()
    d_df.columns = ['ds','y']
      
    m = NeuralProphet(
        n_forecasts=56,
        n_lags=56,
        n_changepoints=10,
        changepoints_range=0.8,
        yearly_seasonality='auto',
        weekly_seasonality='auto',
        num_hidden_layers=5,
        epochs=5,
        learning_rate=1.0,
    )
    m = m.add_country_holidays('Japan')    
    metrics = m.fit(d_df, freq="D")
    
    future = m.make_future_dataframe(d_df)
    forecast = m.predict(future)
    forecast['SecuritiesCode'] = i
    forecast_all = pd.concat((forecast_all, forecast))
   
    count +=1
    print(f'Securities Code: {i}, Loop Iteration: {count}')


SyntaxError: invalid syntax (571085423.py, line 14)

In [None]:
# reset db connection in case of timeout
db_string = f'postgresql://{user}:{db_password}@tokyose.ctcjkv5b73cy.us-east-1.rds.amazonaws.com:5432/tokyo_se'
engine = create_engine(db_string)
print('Database Connected')

In [None]:
forecast_all.info()

# Clean Forecast Results

In [None]:
# read price df
df = forecast_all
# query = 'SELECT * FROM neural_prophet_model__full_results'
# df = pd.read_sql(query,engine)
df.head()

In [None]:
df.info()

In [None]:
df_clean = df.loc[:, ~df.columns.str.startswith('residual')]
df_clean = df_clean.loc[:, ~df_clean.columns.str.startswith('ar')]
df_clean = df_clean.loc[:, ~df_clean.columns.str.startswith('season')]
df_clean = df_clean.loc[:, ~df_clean.columns.str.startswith('trend')]
df_clean = df_clean.loc[:, ~df_clean.columns.str.startswith('event')]
df_clean.head()

In [None]:
# df_clean = df_clean[df_clean['y'] > 0]
# df_clean.head()

In [None]:
df_clean.info()

In [None]:
# export cleaned forecast results to AWS
df_clean.to_sql('neural_prophet_model_autoregression_results',engine,if_exists='replace',index=False)     

## Calculate Sharpe Ratio

In [None]:
# getting close prices for all SecuritiesCodes
close_prices = df_clean[['SecuritiesCode','ds','yhat1']]
close_prices['Close'] = close_prices['yhat1']
close_prices['Date'] = close_prices['ds']
close_prices = close_prices.drop(columns=['yhat1','ds'],axis=1)
prices_target = close_prices.pivot_table(index='Date', columns='SecuritiesCode', values='Close')
prices_target.head()

In [None]:
# first 20 values of SecuritiesCodes for every time step(1202).
import numpy as np
first_20 = []
for i in range(0,len(prices_target.index),1):
      first_20.append(-np.sort(-prices_target.iloc[i, :].values)[:20])
prices_target_first_20 = pd.DataFrame(first_20,index=prices_target.index)
prices_target_first_20.head()

In [None]:
# last 20 values of SecuritiesCodes for every time step(1202).
last_20 = []
for i in range(0,len(prices_target.index),1):
      last_20.append(np.sort(prices_target.iloc[i, :].values)[0:20])
prices_target_last_20 = pd.DataFrame(last_20,index=prices_target.index)
prices_target_last_20.head()

In [None]:
# calculate weights
weights = np.linspace(start=2, stop=1, num=20)
weights

In [None]:
# calculate weighted top stocks
Sup=((prices_target_first_20 * weights).sum(axis = 1))/np.mean(weights)
Sup

In [None]:
# calculate weighted bottom stocks
Sdown=((prices_target_last_20 * weights).sum(axis = 1))/np.mean(weights)
Sdown

In [None]:
# calculate daily spread return
daily_spread_return = (Sup - Sdown).mean()/(Sup - Sdown).std()
daily_spread_return

In [None]:
# calculate true top 20 stocks
count_SecuritiesCode_first_20 = prices_target.isin(prices_target_first_20.values.flatten())
True_first_20 = (count_SecuritiesCode_first_20.apply(pd.Series.value_counts, axis=0).fillna(0).iloc[1:2]).T
True_first_20 = True_first_20.add_prefix('first_20_')

In [None]:
# calculate true bottom 20 stocks
count_SecuritiesCode_last_20 = prices_target.isin(prices_target_last_20.values.flatten())
True_last_20 = (count_SecuritiesCode_last_20.apply(pd.Series.value_counts, axis=0).fillna(0).iloc[1:2]).T
True_last_20 = True_last_20.add_prefix('last_20_')

In [None]:
Count = pd.concat([True_first_20,True_last_20],axis=1)

In [None]:
SecuritiesCode_weight = (Count/len(prices_target.index)).sum(axis=1)/2

In [None]:
df_weight = pd.DataFrame(SecuritiesCode_weight,columns=["Weight"]).reset_index()

In [None]:
# plot securities code weights
plt.figure(figsize=(10, 6), dpi=80)
plt.xticks(color='cyan')
plt.yticks(color='cyan')
plt.scatter(x=SecuritiesCode_weight.index,y=SecuritiesCode_weight)

In [None]:
# sort top ranked
Count.reset_index
Count.columns
top_ranked = Count.sort_values(by=['first_20_True'],ascending=False).iloc[0:20]
top_ranked

# Top 20 Highest Performing Stocks

In [None]:
top_ranked = top_ranked.drop(columns='last_20_True',axis=1)
top_ranked.to_csv('../Outputs/neural_prophet_logistic_top_ranked.csv')


### Kaggle Competition Code

In [None]:
trading_dates = np.array(sorted(close_prices['Date'].unique()))
stock_ids = np.array(sorted(close_prices['SecuritiesCode'].unique()))

temp_mat = pd.DataFrame(np.nan, index=stock_ids, columns=trading_dates)

def create_factor(item, temp_mat=temp_mat):
    output_mat = pd.pivot_table(close_prices,
                                values=item,
                                index='SecuritiesCode', columns='Date')
    
    output_factor = temp_mat.copy()
    output_factor.loc[output_mat.index, output_mat.columns] = output_mat.values

    return output_factor

close_factor = create_factor('Close')
rtn_mat = close_factor.pct_change(1,axis=1)

In [None]:
reversal_5d = -np.log(rtn_mat.T+1).rolling(5).sum().T

### Kaggle submission testing

In [None]:
# import jpx_tokyo_market_prediction
# env = jpx_tokyo_market_prediction.make_env()
# iter_test = env.iter_test()

# counter = 0


# for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
#     pred_dt = prices['Date'][0]
#     print(" => {} Now generating ranking at date [{}]".format(counter+1, pred_dt))
#     pred_stocks = sample_prediction['SecuritiesCode'].values
#     pred_stocks = pred_stocks.fillna(method='bfill')
#     #### Generating the Ranking Signal ####
#     # access the predict 
#     signal = reversal_5d.loc[pred_stocks,pred_dt]
#     # convert to rank
#     pred_rank = signal.rank(method='first').astype(int)-1
    
#     # assign the rank score: 0 -> 1999
#     sample_prediction['Rank'] = sample_prediction['SecuritiesCode'].map(pred_rank.to_dict()) 
    
#     #### upload prediction ####
#     submission_df = sample_prediction[["Date","SecuritiesCode","Rank"]]
#     env.predict(sample_prediction)
#     ########################
    
#     counter+=1