In [9]:
import tensorflow as tf
import pandas as pd
import numpy as np
import glob
import datetime
from sklearn.preprocessing import robust_scale
from tqdm.auto import tqdm

from preprocessing_utils import *
from AdvModel import AdvModel

In [10]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
T = 15

date_limit_train_validation = datetime.datetime(year = 2016, month=5, day=1)
features = {
    "preprocessed_open": lambda df : df["Open"] / df["Close"] - 1,
    "preprocessed_high": lambda df : df["High"] / df["Close"] - 1,
    "preprocessed_low": lambda df : df["Low"] / df["Close"] - 1,
    "preprocessed_close": lambda df : df["Close"].pct_change(),
    "preprocessed_adj_close": lambda df : df["Adj Close"].pct_change(),
    "preprocessed_5-day": lambda df : (df["Adj Close"].rolling(5).mean() / df["Adj Close"]) -1,
    "preprocessed_10-day": lambda df : (df["Adj Close"].rolling(10).mean() / df["Adj Close"]) -1,
    "preprocessed_15-day": lambda df : (df["Adj Close"].rolling(15).mean() / df["Adj Close"]) -1,
    "preprocessed_20-day": lambda df : (df["Adj Close"].rolling(20).mean() / df["Adj Close"]) -1,
    "preprocessed_25-day": lambda df : (df["Adj Close"].rolling(25).mean() / df["Adj Close"]) -1,
    "preprocessed_30-day": lambda df : (df["Adj Close"].rolling(30).mean() / df["Adj Close"]) -1,
    "preprocessed_volume": lambda df : df["Volume"]
}



# Data : Gather & Preprocess

In [13]:
raw_data_path = "data/stocknet-dataset/price/raw/*.csv"
raw_data_pathes = glob.glob(raw_data_path)

X_train, y_train = None, None
X_validation, y_validation = None, None
#Iterate through each stock RAW data
for path in tqdm(raw_data_pathes): 
    stock_df = pd.read_csv(path, parse_dates=["Date"],index_col="Date")
    
    ## Generate labels
    labelling(stock_df)

    ## Preprocessing
    stock_df.sort_index(inplace= True)
    stock_df.dropna(inplace = True)

    ### Apply features functions
    for feature_key in features.keys(): 
        stock_df[feature_key] = robust_scale(features[feature_key](stock_df))
    stock_df.dropna(inplace = True)

    train_stock_df = stock_df[stock_df.index < date_limit_train_validation]
    validation_stock_df = stock_df[stock_df.index >= date_limit_train_validation]

    ## Generate sequences
    X_stock_train, y_stock_train = generate_sequences(df = train_stock_df, features_columns= features.keys(), T = T )
    X_stock_validation, y_stock_validation = generate_sequences(df = validation_stock_df, features_columns= features.keys(), T = T)

    if X_train is None: X_train = X_stock_train
    else : X_train = np.concatenate([X_train, X_stock_train], axis = 0)
    if X_validation is None: X_validation = X_stock_validation
    else : X_validation = np.concatenate([X_validation, X_stock_validation], axis = 0)
    if y_train is None: y_train = y_stock_train
    else : y_train = np.concatenate([y_train, y_stock_train], axis = 0)
    if y_validation is None: y_validation = y_stock_validation
    else : y_validation = np.concatenate([y_validation, y_stock_validation], axis = 0)

# Shuffle X and y
X_train, y_train = shuffled_X_y(X_train, y_train)
X_validation, y_validation = shuffled_X_y(X_validation, y_validation)
X_train.shape, y_train.shape, X_validation.shape, y_validation.shape

  0%|          | 0/87 [00:00<?, ?it/s]

((44268, 15, 12), (44268,), (14237, 15, 12), (14237,))

# LSTM Model

In [18]:
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(16, dropout = 0.2, return_sequences= False),
    tf.keras.layers.Dense(3, activation="softmax")
])
model.compile(
    loss= "sparse_categorical_crossentropy",
    optimizer= "adam",
    metrics = ["acc"]
)
model.fit(
    X_train, (y_train + 1)/2,
    validation_data = (X_validation, (y_validation + 1)/2),
    epochs = 30,
    batch_size = 1024
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fb6342aa580>

In [23]:
# epsilon, beta, input_shape, model_latent_space, model_classifier, 
adv_model = AdvModel(
    epsilon = 0.001, 
    beta = 0.05, 
    input_shape = X_train.shape[1:],
    model_latent_space = tf.keras.models.Sequential([
            tf.keras.layers.LSTM(64, return_sequences= False)
        ]),
    model_classifier = tf.keras.models.Sequential([
            tf.keras.layers.Dense(3, activation = "linear")
        ])
)
adv_model.compile(
    loss= "hinge",
    optimizer = tf.keras.optimizers.Adam(learning_rate = 1E-2),
    metrics = ["acc"]
)
model.fit(
    X_train, y_train,
    validation_data = (X_validation, y_validation),
    epochs = 30,
    batch_size = 1024
)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fb5601f56d0>