In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import os.path

import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard

from sklearn.preprocessing import MinMaxScaler

KeyboardInterrupt: 

In [2]:
indicators = [
            'aliq_at', 'aliq_mat', 'ami_126d', 'at_be', 'at_gr1', 'at_me', 'at_turnover', 'be_gr1a', 'be_me', 'beta_60m', 'beta_dimson_21d', 'betabab_1260d', 
            'betadown_252d', 'bev_mev', 'bidaskhl_21d', 'capex_abn', 'capx_gr1', 'capx_gr2', 'capx_gr3', 'cash_at', 'chcsho_12m', 'coa_gr1a', 'col_gr1a', 'cop_at', 
            'cop_atl1', 'corr_1260d', 'coskew_21d', 'cowc_gr1a', 'dbnetis_at', 'debt_gr3', 'debt_me', 'dgp_dsale', 'div12m_me', 'dolvol_126d', 'dolvol_var_126d', 
            'dsale_dinv', 'dsale_drec', 'dsale_dsga', 'earnings_variability', 'ebit_bev', 'ebit_sale', 'ebitda_mev', 'emp_gr1', 'eq_dur', 'eqnetis_at', 'eqnpo_12m', 
            'eqnpo_me', 'eqpo_me', 'f_score', 'fcf_me', 'fnl_gr1a', 'gp_at', 'gp_atl1', 'intrinsic_value', 'inv_gr1', 'inv_gr1a', 'iskew_capm_21d', 'iskew_ff3_21d', 
            'iskew_hxz4_21d', 'ivol_capm_21d', 'ivol_capm_252d', 'ivol_ff3_21d', 'ivol_hxz4_21d', 'kz_index', 'lnoa_gr1a', 'lti_gr1a', 'market_equity', 'mispricing_mgmt', 
            'mispricing_perf', 'ncoa_gr1a', 'ncol_gr1a', 'netdebt_me', 'netis_at', 'nfna_gr1a', 'ni_ar1', 'ni_be', 'ni_inc8q', 'ni_ivol', 'ni_me', 'niq_at', 'niq_at_chg1', 
            'niq_be', 'niq_be_chg1', 'niq_su', 'nncoa_gr1a', 'noa_at', 'noa_gr1a', 'o_score', 'oaccruals_at', 'oaccruals_ni', 'ocf_at', 'ocf_at_chg1', 'ocf_me', 
            'ocfq_saleq_std', 'op_at', 'op_atl1', 'ope_be', 'ope_bel1', 'opex_at', 'pi_nix', 'ppeinv_gr1a', 'prc', 'prc_highprc_252d', 'qmj', 'qmj_growth', 
            'qmj_prof', 'qmj_safety', 'rd_me', 'rd_sale', 'rd5_at', 'resff3_12_1', 'resff3_6_1', 'ret_1_0', 'ret_12_1', 'ret_12_7', 'ret_3_1', 'ret_6_1', 
            'ret_60_12', 'ret_9_1', 'rmax1_21d', 'rmax5_21d', 'rmax5_rvol_21d', 'rskew_21d', 'rvol_21d', 'sale_bev', 'sale_emp_gr1', 'sale_gr1', 'sale_gr3', 
            'sale_me', 'saleq_gr1', 'saleq_su', 'seas_1_1an', 'seas_1_1na', 'seas_2_5an', 'seas_2_5na', 'sti_gr1a', 'taccruals_at', 'taccruals_ni', 
            'tangibility', 'tax_gr1a', 'turnover_126d', 'turnover_var_126d', 'z_score', 'zero_trades_126d', 'zero_trades_21d', 'zero_trades_252d'
        ]
predictors = ['Tomorrow']

In [3]:
if not os.path.isfile('sorted_with_tomorrow.parquet'):
    df = pd.read_csv("hackathon_sample_v2.csv")
    dfList = list()
    stockTickers = df.stock_ticker.unique().tolist()
    for ticker in stockTickers:
        newDf = df[df['stock_ticker'] == ticker]
        newDf['Tomorrow'] = newDf['prc'].shift(-1).copy()
        dfList.append(newDf)
    df = pd.concat(dfList)
    df = df[indicators + predictors]
    df.to_parquet('sorted_with_tomorrow.parquet')

In [4]:
data = pd.read_parquet('sorted_with_tomorrow.parquet')
data=data.fillna(0)

In [5]:
# Split into train and test sets
train_size = int(len(data) * 0.8)
train_data, test_data = data[:train_size], data[train_size:]

In [6]:
# Normalize data
scaler = MinMaxScaler()
train_data = scaler.fit_transform(train_data)
test_data = scaler.transform(test_data)

In [7]:
def create_sequences(data, seq_length=10):
    X = []
    y = []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length, :len(indicators)])
        y.append(data[i + seq_length, len(indicators):])
    return np.array(X), np.array(y)


In [8]:
# Create sequences for training set
X_train, y_train = create_sequences(train_data)

np.savetxt('trial.csv', X_train[0], delimiter=",")
np.savetxt('trial2.csv', y_train[0], delimiter=",")

# Create sequences for testing set
X_test, y_test = create_sequences(test_data)

In [9]:
cp_path = "models/price_prediction"
cp_dir = os.path.dirname(cp_path)

log_path = "logs/models"
log_dir = os.path.dirname(log_path)

cp_callback = ModelCheckpoint(filepath=cp_dir, save_weights_only=True, verbose=1)
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
model = Sequential()
model.add(LSTM(units=256, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(units=128))
model.add(Dense(units=32))
model.add(Dense(units=1))   
model.compile(loss='mean_squared_error', optimizer=Adam(0.001))

In [None]:
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=16,
    validation_split=0.1,
    verbose=1,
    shuffle=False,
    callbacks=[cp_callback, tensorboard_callback]
)

In [12]:
model.save('price_prediction.keras')

In [None]:
y_pred = model.predict(X_test)
y_pred.shape

In [None]:
plt.plot(y_test)

In [None]:
plt.plot(y_pred)

In [None]:
x = np.array(range(0, 54662))
plt.plot(x, y_pred, label='prediction')
plt.plot(x, y_test, label='actual')
plt.legend()
plt.show()