In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

In [2]:
sample_data = pd.read_csv('sample.csv')
sample_data

Unnamed: 0,Ticker,Date,Open,High,Low,Close,Volume
0,AB,2022-10-24,24.58,24.58,24.58,24.58,130
1,AB,2022-10-26,23.90,24.58,23.90,23.90,14301
2,AB,2022-10-27,24.58,24.58,24.57,24.58,505
3,AB,2022-10-28,24.58,24.58,24.09,24.58,5882
4,AB,2022-10-31,24.58,24.58,24.57,24.58,1240
...,...,...,...,...,...,...,...
187982,XABYT,2012-04-27,9.04,9.04,8.50,8.90,4618
187983,XABYT,2012-04-30,8.99,9.04,8.73,9.03,480
187984,XABYT,2012-05-02,8.80,9.03,8.76,8.98,4397
187985,XABYT,2012-05-03,8.89,8.90,8.75,8.80,4409


In [3]:
sample_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187987 entries, 0 to 187986
Data columns (total 7 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Ticker  187987 non-null  object 
 1   Date    187987 non-null  object 
 2   Open    187965 non-null  float64
 3   High    187965 non-null  float64
 4   Low     187960 non-null  float64
 5   Close   187965 non-null  float64
 6   Volume  187987 non-null  int64  
dtypes: float64(4), int64(1), object(2)
memory usage: 10.0+ MB


In [4]:
df = sample_data.dropna()

In [5]:
def datetime_converter(data : pd.DataFrame, col : str):
    data[col] = pd.to_datetime(data[col], format='%Y-%m-%d')
    return data

df = datetime_converter(sample_data, 'Date')

In [6]:
df['weekday'] = df['Date'].dt.weekday + 1
df['monthday'] = df['Date'].dt.day
df['monthweek'] = round(df['Date'].dt.day / 7)
df['quarter'] = round(df['Date'].dt.month / 3)

df

Unnamed: 0,Ticker,Date,Open,High,Low,Close,Volume,weekday,monthday,monthweek,quarter
0,AB,2022-10-24,24.58,24.58,24.58,24.58,130,1,24,3.0,3.0
1,AB,2022-10-26,23.90,24.58,23.90,23.90,14301,3,26,4.0,3.0
2,AB,2022-10-27,24.58,24.58,24.57,24.58,505,4,27,4.0,3.0
3,AB,2022-10-28,24.58,24.58,24.09,24.58,5882,5,28,4.0,3.0
4,AB,2022-10-31,24.58,24.58,24.57,24.58,1240,1,31,4.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...
187982,XABYT,2012-04-27,9.04,9.04,8.50,8.90,4618,5,27,4.0,1.0
187983,XABYT,2012-04-30,8.99,9.04,8.73,9.03,480,1,30,4.0,1.0
187984,XABYT,2012-05-02,8.80,9.03,8.76,8.98,4397,3,2,0.0,2.0
187985,XABYT,2012-05-03,8.89,8.90,8.75,8.80,4409,4,3,0.0,2.0


In [7]:
df['sin_weekday'] = np.sin((df['weekday'] / 7) * 2 * np.pi)
df['cos_weekday'] = np.cos((df['weekday'] / 7) * 2 * np.pi)

df['sin_monthday'] = np.sin((df['monthday'] / 31) * 2 * np.pi)
df['cos_monthday'] = np.cos((df['monthday'] / 31) * 2 * np.pi)

df['sin_monthweek'] = np.sin((df['monthweek'] / 4) * 2 * np.pi)
df['cos_monthweek'] = np.cos((df['monthweek'] / 4) * 2 * np.pi)

df['sin_quarter'] = np.sin((df['quarter'] / 4) * 2 * np.pi)
df['cos_quarter'] = np.cos((df['quarter'] / 4) * 2 * np.pi)

df['Gain'] = df['High'] - df['Low']

df

Unnamed: 0,Ticker,Date,Open,High,Low,Close,Volume,weekday,monthday,monthweek,quarter,sin_weekday,cos_weekday,sin_monthday,cos_monthday,sin_monthweek,cos_monthweek,sin_quarter,cos_quarter,Gain
0,AB,2022-10-24,24.58,24.58,24.58,24.58,130,1,24,3.0,3.0,0.781831,0.623490,-9.884683e-01,0.151428,-1.000000e+00,-1.836970e-16,-1.000000e+00,-1.836970e-16,0.00
1,AB,2022-10-26,23.90,24.58,23.90,23.90,14301,3,26,4.0,3.0,0.433884,-0.900969,-8.486443e-01,0.528964,-2.449294e-16,1.000000e+00,-1.000000e+00,-1.836970e-16,0.68
2,AB,2022-10-27,24.58,24.58,24.57,24.58,505,4,27,4.0,3.0,-0.433884,-0.900969,-7.247928e-01,0.688967,-2.449294e-16,1.000000e+00,-1.000000e+00,-1.836970e-16,0.01
3,AB,2022-10-28,24.58,24.58,24.09,24.58,5882,5,28,4.0,3.0,-0.974928,-0.222521,-5.712682e-01,0.820763,-2.449294e-16,1.000000e+00,-1.000000e+00,-1.836970e-16,0.49
4,AB,2022-10-31,24.58,24.58,24.57,24.58,1240,1,31,4.0,3.0,0.781831,0.623490,-2.449294e-16,1.000000,-2.449294e-16,1.000000e+00,-1.000000e+00,-1.836970e-16,0.01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187982,XABYT,2012-04-27,9.04,9.04,8.50,8.90,4618,5,27,4.0,1.0,-0.974928,-0.222521,-7.247928e-01,0.688967,-2.449294e-16,1.000000e+00,1.000000e+00,6.123234e-17,0.54
187983,XABYT,2012-04-30,8.99,9.04,8.73,9.03,480,1,30,4.0,1.0,0.781831,0.623490,-2.012985e-01,0.979530,-2.449294e-16,1.000000e+00,1.000000e+00,6.123234e-17,0.31
187984,XABYT,2012-05-02,8.80,9.03,8.76,8.98,4397,3,2,0.0,2.0,0.433884,-0.900969,3.943559e-01,0.918958,0.000000e+00,1.000000e+00,1.224647e-16,-1.000000e+00,0.27
187985,XABYT,2012-05-03,8.89,8.90,8.75,8.80,4409,4,3,0.0,2.0,-0.433884,-0.900969,5.712682e-01,0.820763,0.000000e+00,1.000000e+00,1.224647e-16,-1.000000e+00,0.15


In [8]:
features = ['Open','High','Low','Close','Volume','sin_weekday','cos_weekday','sin_monthday','cos_monthday',
            'sin_quarter','cos_quarter']

In [9]:
split_date = '2020-06-01'
df_train = df[df['Date'] < split_date]
df_test = df[df['Date'] >= split_date]

In [10]:
X_train, y_train = df_train[features], df_train['Gain']
X_test, y_test = df_test[features], df_test['Gain']

scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

BATCH_SIZE = 64
BUFFER_SIZE = 100
WINDOW_LENGTH = 24


def window_data(X, Y, window=7):
    x = []
    y = []
    for i in range(window-1, len(X)):
        x.append(X[i-window+1:i+1])
        y.append(Y[i])
    return np.array(x), np.array(y)


X_w = np.concatenate((X_train, X_test))
y_w = np.concatenate((y_train, y_test))

X_w, y_w = window_data(X_w, y_w, window=WINDOW_LENGTH)
X_train_w = X_w[:-len(X_test)]
y_train_w = y_w[:-len(X_test)]
X_test_w = X_w[-len(X_test):]
y_test_w = y_w[-len(X_test):]

train_data = tf.data.Dataset.from_tensor_slices((X_train_w, y_train_w))
train_data = train_data.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_data = tf.data.Dataset.from_tensor_slices((X_test_w, y_test_w))
val_data = val_data.batch(BATCH_SIZE).repeat()

In [11]:
simple_lstm_model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(128, input_shape=X_train_w.shape[-2:], dropout=0.0),
    tf.keras.layers.Dense(128),
    tf.keras.layers.Dense(128),
    tf.keras.layers.Dense(1)
])

simple_lstm_model.compile(optimizer='adam', loss='mae')

In [12]:
EVALUATION_INTERVAL = 200
EPOCHS = 1

model_history = simple_lstm_model.fit(train_data, epochs=EPOCHS,
                                      steps_per_epoch=EVALUATION_INTERVAL,
                                      validation_data=val_data, validation_steps=50)

