In [None]:
%load_ext autoreload
%autoreload 2

In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
import glob
import datetime
from sklearn.preprocessing import robust_scale
from tqdm.auto import tqdm

from preprocessing_utils import *
from AdvASLTM import *

In [3]:
T = 15

date_limit_train_validation = datetime.datetime(year = 2016, month=5, day=1)
features = {
    "preprocessed_open": lambda df : df["Open"] / df["Close"] - 1,
    "preprocessed_high": lambda df : df["High"] / df["Close"] - 1,
    "preprocessed_low": lambda df : df["Low"] / df["Close"] - 1,
    "preprocessed_close": lambda df : df["Close"].pct_change(),
    "preprocessed_adj_close": lambda df : df["Adj Close"].pct_change(),
    "preprocessed_5-day": lambda df : (df["Adj Close"].rolling(5).mean() / df["Adj Close"]) -1,
    "preprocessed_10-day": lambda df : (df["Adj Close"].rolling(10).mean() / df["Adj Close"]) -1,
    "preprocessed_15-day": lambda df : (df["Adj Close"].rolling(15).mean() / df["Adj Close"]) -1,
    "preprocessed_20-day": lambda df : (df["Adj Close"].rolling(20).mean() / df["Adj Close"]) -1,
    "preprocessed_25-day": lambda df : (df["Adj Close"].rolling(25).mean() / df["Adj Close"]) -1,
    "preprocessed_30-day": lambda df : (df["Adj Close"].rolling(30).mean() / df["Adj Close"]) -1,
    "preprocessed_volume": lambda df : df["Volume"]
}



# Data : Gather & Preprocess

In [7]:
raw_data_path = "data/stocknet-dataset/price/raw/*.csv"
raw_data_pathes = glob.glob(raw_data_path)

X_train, y_train = None, None
X_validation, y_validation = None, None
#Iterate through each stock RAW data
for path in tqdm(raw_data_pathes): 
    stock_df = pd.read_csv(path, parse_dates=["Date"],index_col="Date")
    
    ## Generate labels
    labelling(stock_df)

    ## Preprocessing
    stock_df.sort_index(inplace= True)
    stock_df.dropna(inplace = True)

    ### Apply features functions
    for feature_key in features.keys(): 
        stock_df[feature_key] = robust_scale(features[feature_key](stock_df))
    stock_df.dropna(inplace = True)

    train_stock_df = stock_df[stock_df.index < date_limit_train_validation]
    validation_stock_df = stock_df[stock_df.index >= date_limit_train_validation]

    ## Generate sequences
    X_stock_train, y_stock_train = generate_sequences(df = train_stock_df, features_columns= features.keys(), T = T )
    X_stock_validation, y_stock_validation = generate_sequences(df = validation_stock_df, features_columns= features.keys(), T = T)

    if X_train is None: X_train = X_stock_train
    else : X_train = np.concatenate([X_train, X_stock_train], axis = 0)
    if X_validation is None: X_validation = X_stock_validation
    else : X_validation = np.concatenate([X_validation, X_stock_validation], axis = 0)
    if y_train is None: y_train = y_stock_train
    else : y_train = np.concatenate([y_train, y_stock_train], axis = 0)
    if y_validation is None: y_validation = y_stock_validation
    else : y_validation = np.concatenate([y_validation, y_stock_validation], axis = 0)

# Shuffle X and y
X_train, y_train = shuffled_X_y(X_train, y_train)
X_validation, y_validation = shuffled_X_y(X_validation, y_validation)
X_train.shape, y_train.shape, X_validation.shape, y_validation.shape

  0%|          | 0/87 [00:00<?, ?it/s]

((44268, 15, 12), (44268,), (14237, 15, 12), (14237,))

# Tuner search

In [17]:
model = AdvALSTM(
    units = 16, 
    epsilon = 0.001, 
    beta = 0.05, 
    learning_rate = 1E-2, 
    l2 = 0.001, 
    attention = True, 
    hinge = True,
    adversarial_training = True
)
model.fit(
    X_train, y=  y_train,
    validation_data = (X_validation, y_validation),
    epochs = 300,
    batch_size = 1024
)

Epoch 1/300


2023-03-22 18:10:28.990475: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:903] function_optimizer failed: INVALID_ARGUMENT: Input 0 of node Adam/gradients/zeros_like_4 was passed float from sequential_22/lstm_11/PartitionedCall:7 incompatible with expected variant.
2023-03-22 18:10:29.010923: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:903] shape_optimizer failed: OUT_OF_RANGE: src_output = 31, but num_outputs is only 30
2023-03-22 18:10:29.021932: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:903] layout failed: OUT_OF_RANGE: src_output = 31, but num_outputs is only 30
2023-03-22 18:10:29.049813: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:903] tfg_optimizer{} failed: INVALID_ARGUMENT: Input 0 of node Adam/gradients/zeros_like_4 was passed float from sequential_22/lstm_11/PartitionedCall:7 incompatible with expected variant.
	when importing GraphDef to MLIR module in GrapplerHook
2023-03-22 18:10:29.057674: E tensorflow/core/grappler/optimiz

Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 7