## Imports

In [1]:
import os
import gc
import random
import pickle
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from joblib import Parallel, delayed

from scipy.optimize import minimize
from sklearn.cluster import KMeans
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder, QuantileTransformer

tqdm.pandas()
%matplotlib inline

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import Model, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import get_custom_objects
from tensorflow.keras.layers import Activation, Embedding, Concatenate, Dense, Flatten

import warnings
warnings.filterwarnings("ignore")

def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)


SEED = 2021
seed_everything(SEED)

In [2]:
# feature utils
def calculate_wap(df, rank="1"):
    return (df[f"bid_price{rank}"] * df[f"ask_size{rank}"] + df[f"bid_size{rank}"] * df[f"ask_price{rank}"]) / (
                df[f"bid_size{rank}"] + df[f"ask_size{rank}"])


def calculate_agg_wap(df):
    wap1 = df["bid_price1"] * df["ask_size1"] + df["bid_size1"] * df["ask_price1"]
    wap2 = df["bid_price2"] * df["ask_size2"] + df["bid_size2"] * df["ask_price2"]
    den = df["ask_size1"] + df["ask_size2"] + df["bid_size1"] + df["bid_size2"]
    
    return (wap1 + wap2)/den
    pass


def calculate_inter_wap(df, rank="1"):
    return (df[f"bid_price{rank}"] * df[f"bid_size{rank}"] + df[f"ask_size{rank}"] * df[f"ask_price{rank}"]) / (
            df[f"bid_size{rank}"] + df[f"ask_size{rank}"])
    pass

def calculate_agg_inter_wap(df):
    iwap1 = df["bid_price1"] * df["bid_size1"] + df["ask_size1"] * df["ask_price1"]
    iwap2 = df["bid_price2"] * df["bid_size2"] + df["ask_size2"] * df["ask_price2"]
    den = df["ask_size1"] + df["ask_size2"] + df["bid_size1"] + df["bid_size2"]
    
    return (iwap1 + iwap2)/den
    pass


def calc_depth(df):
    depth = df['bid_price1'] * df['bid_size1'] + df['ask_price1'] * df['ask_size1'] + df['bid_price2'] * df[
               'bid_size2'] + df['ask_price2'] * df['ask_size2']
    return depth


def calc_slope(df):
    v0 = (df['bid_size1']+df['ask_size1'])/2
    p0 = (df['bid_price1']+df['ask_price1'])/2
    slope_bid = ((df['bid_size1']/v0)-1)/abs((df['bid_price1']/p0)-1)+(
                (df['bid_size2']/df['bid_size1'])-1)/abs((df['bid_price2']/df['bid_price1'])-1)
    slope_ask = ((df['ask_size1']/v0)-1)/abs((df['ask_price1']/p0)-1)+(
                (df['ask_size2']/df['ask_size1'])-1)/abs((df['ask_price2']/df['ask_price1'])-1)
    return (slope_bid+slope_ask)/2, abs(slope_bid-slope_ask)


def calc_dispersion(df):
    bspread = df['bid_price1'] - df['bid_price2']
    aspread = df['ask_price2'] - df['ask_price1']
    bmid = (df['bid_price1'] + df['ask_price1'])/2  - df['bid_price1']
    bmid2 = (df['bid_price1'] + df['ask_price1'])/2  - df['bid_price2']
    amid = df['ask_price1'] - (df['bid_price1'] + df['ask_price1'])/2
    amid2 = df['ask_price2'] - (df['bid_price1'] + df['ask_price1'])/2
    bdisp = (df['bid_size1']*bmid + df['bid_size2']*bspread)/(df['bid_size1']+df['bid_size2'])
    bdisp2 = (df['bid_size1']*bmid + df['bid_size2']*bmid2)/(df['bid_size1']+df['bid_size2'])
    adisp = (df['ask_size1']*amid + df['ask_size2']*aspread)/(df['ask_size1']+df['ask_size2'])      
    adisp2 = (df['ask_size1']*amid + df['ask_size2']*amid2)/(df['ask_size1']+df['ask_size2'])
    return (bdisp + adisp)/2, (bdisp2 + adisp2)/2

def calc_price_impact(df):
    ask = (df['ask_price1'] * df['ask_size1'] + df['ask_price2'] * df['ask_size2'])/(df['ask_size1']+df['ask_size2'])
    bid = (df['bid_price1'] * df['bid_size1'] + df['bid_price2'] * df['bid_size2'])/(df['bid_size1']+df['bid_size2'])
    return (df['ask_price1'] - ask)/df['ask_price1'], (df['bid_price1'] - bid)/df['bid_price1']


def calc_ofi(df):
    a = df['bid_size1']*np.where(df['bid_price1'].diff()>=0,1,0)
    b = df['bid_size1'].shift()*np.where(df['bid_price1'].diff()<=0,1,0)
    c = df['ask_size1']*np.where(df['ask_price1'].diff()<=0,1,0)
    d = df['ask_size1'].shift()*np.where(df['ask_price1'].diff()>=0,1,0)
    return a - b - c + d


def calc_tt1(df):
    p1 = df['ask_price1'] * df['ask_size1'] + df['bid_price1'] * df['bid_size1']
    p2 = df['ask_price2'] * df['ask_size2'] + df['bid_price2'] * df['bid_size2']      
    return p2 - p1 


def calculate_log_return(series):
    return np.log(series).diff()


def calculate_rv(series):
    return np.sqrt(np.sum(np.square(series)))

    
# Calculate integrated quarticity
def calculate_rv_quarticity(series):
    return (series.count()/3)*np.sum(series**4)

# Calculate weighted volatility
def calculate_rv_vol_weighted(series):
    return np.sqrt(np.sum(series**2)/series.count())


def count_unique(series):
    return len(np.unique(series))


def get_stats_window(df, seconds_in_bucket, features_dict, add_suffix=False):
    df_feature = df[df["seconds_in_bucket"] >= seconds_in_bucket].groupby(["time_id"]).agg(features_dict).reset_index()
    df_feature.columns = ["_".join(col) for col in df_feature.columns]

    if add_suffix:
        df_feature = df_feature.add_suffix("_" + str(seconds_in_bucket))

    return df_feature
    pass


def window_stats(df, feature_dict, feature_dict_time, second_windows, additional_dfs=None):
    df_merged = get_stats_window(df, seconds_in_bucket=0, features_dict=feature_dict)

    if additional_dfs is not None:
        df_merged = df_merged.merge(additional_dfs, how='left', left_on='time_id_', right_on='time_id')

    temp_dfs = []
    for window in second_windows:
        temp_dfs.append(
            (window,
             get_stats_window(df, seconds_in_bucket=window, features_dict=feature_dict_time, add_suffix=True)
             )
        )

    for window, temp_df in temp_dfs:
        df_merged = df_merged.merge(temp_df, how="left", left_on="time_id_", right_on=f"time_id__{window}")
        df_merged.drop(columns=[f"time_id__{window}"], inplace=True)

    return df_merged
    pass


def tendency(price, vol):
    diff = np.diff(price)
    val = (diff / price[1:]) * 100
    power = np.sum(val * vol[1:])
    return power
    pass


def get_stock_clusters(df, n_clusters=6):
    pivoted_data = df.pivot(index="time_id", columns=["stock_id"], values="target")
    corr_pivoted = pivoted_data.corr()

    clusters = KMeans(n_clusters, random_state=cfg.random_state).fit(corr_pivoted.values)

    groups = []
    for i in range(n_clusters):
        groups.append([x-1] for x in (corr_pivoted.index+1)*(clusters.labels_ == i) if x > 0)
    return groups
    pass


def create_cluster_aggregations(df, groups):
    feats = []

    for i, idx in enumerate(groups):
        chunk_df = df.loc[df['stock_id'].isin(idx)]
        chunk_df = chunk_df.groupby(['time_id']).agg(np.nanmean)
        chunk_df.loc[:, 'stock_id'] = str(i) + 'c1'
        feats.append(chunk_df)

    feats = pd.concat(feats).reset_index()
    if "target" in feats.columns:
        feats.drop(columns=['target'], inplace=True)

    feats = feats.pivot(index='time_id', columns='stock_id')
    feats.columns = ["_".join(x) for x in feats.columns.ravel()]
    feats.reset_index(inplace=True)

    return pd.merge(df, feats, how="left", on="time_id")
    pass

In [3]:
# config
class cfg:
    
    paths = {
        # train path
        "train_csv"  : "../input/optiver-realized-volatility-prediction/train.csv",
        "train_book" : "../input/optiver-realized-volatility-prediction/book_train.parquet",
        "train_trade": "../input/optiver-realized-volatility-prediction/trade_train.parquet",

        # test path
        "test_csv"   : "../input/optiver-realized-volatility-prediction/test.csv",
        "test_book"  : "../input/optiver-realized-volatility-prediction/book_test.parquet",
        "test_trade" : "../input/optiver-realized-volatility-prediction/trade_test.parquet",
        
        # model paths
        "nnse": "./nnse"
    }

    feature_dict_book = {
        "seconds_in_bucket": [count_unique],
        "wap1":              [np.sum, np.mean, np.std, np.max],
        "wap2":              [np.sum, np.mean, np.std, np.max],
        "wap_agg":           [np.sum, np.mean, np.std, np.max],
        
        "iwap1":             [np.sum, np.mean, np.std, np.max],
        "iwap2":             [np.sum, np.mean, np.std, np.max],
        "iwap_agg":          [np.sum, np.mean, np.std, np.max],
        
        "log_return1":       [np.sum, calculate_rv, calculate_rv_quarticity, calculate_rv_vol_weighted, np.mean, np.std],
        "log_return2":       [np.sum, calculate_rv, calculate_rv_quarticity, calculate_rv_vol_weighted, np.mean, np.std],
        'log_return_agg':    [np.sum, calculate_rv, calculate_rv_quarticity, calculate_rv_vol_weighted, np.mean, np.std],
        
        "inter_log_return1": [np.sum, calculate_rv, calculate_rv_quarticity, calculate_rv_vol_weighted, np.mean, np.std],
        "inter_log_return2": [np.sum, calculate_rv, calculate_rv_quarticity, calculate_rv_vol_weighted, np.mean, np.std],
        'inter_log_return_agg': [np.sum, calculate_rv, calculate_rv_quarticity, calculate_rv_vol_weighted, np.mean, np.std],
        
        "wap_balance":       [np.sum, np.mean, np.std, np.max],
        "volume_imbalance":  [np.sum, np.mean, np.std, np.max],
        "total_volume":      [np.sum, np.mean, np.std, np.max],
        
        "price_spread1":     [np.sum, np.mean, np.std, np.max],
        "price_spread2":     [np.sum, np.mean, np.std, np.max],
        "bid_spread":        [np.sum, np.mean, np.std, np.max],
        "ask_spread":        [np.sum, np.mean, np.std, np.max],
        
        'depth':             [np.sum, np.mean, np.std, np.max],
        'slope':             [np.sum, np.mean, np.std, np.max],
        'dispersion':        [np.sum, np.mean, np.std, np.max],
        'price_impact':      [np.sum, np.mean, np.std, np.max],
        'ofi':               [np.sum, np.mean, np.std, np.max],
        'turn_over':         [np.sum, np.mean, np.std, np.max],
    }

    feature_dict_book_time = {        
        "log_return1":       [calculate_rv, calculate_rv_quarticity, calculate_rv_vol_weighted],
        "log_return2":       [calculate_rv, calculate_rv_quarticity, calculate_rv_vol_weighted],
        "log_return_agg":    [calculate_rv, calculate_rv_quarticity, calculate_rv_vol_weighted],
        
        "inter_log_return1": [calculate_rv, calculate_rv_quarticity, calculate_rv_vol_weighted],
        "inter_log_return2": [calculate_rv, calculate_rv_quarticity, calculate_rv_vol_weighted],
        "inter_log_return_agg": [calculate_rv, calculate_rv_quarticity, calculate_rv_vol_weighted],
    }

    feature_dict_trade = {
        'seconds_in_bucket': [count_unique],       
        'log_return':        [np.sum, calculate_rv, calculate_rv_quarticity, calculate_rv_vol_weighted, np.mean, np.std],
        'size':              [np.sum, np.mean, np.std, np.max],
        'order_count':       [np.sum, np.mean, np.std, np.max],
        'amount':            [np.sum, np.mean, np.std, np.max],
    }
    
    feature_dict_trade_time = {
        'log_return':        [calculate_rv, calculate_rv_quarticity, calculate_rv_vol_weighted],
        'seconds_in_bucket': [count_unique],
        'size':              [np.sum, np.std],
        'order_count':       [np.sum, np.std],
        'amount':            [np.sum, np.std],
    }
    
    bucket_windows = [100, 200, 300, 400, 500]
    random_state = SEED
    pass

In [4]:
# order book features
def get_book_features(file_path):
    book_df = pd.read_parquet(file_path)

    # calculate wap
    book_df['wap1'] = calculate_wap(book_df, rank="1")
    book_df['wap2'] = calculate_wap(book_df, rank="2")
    book_df["wap_agg"] = calculate_agg_wap(book_df)
    
    book_df['iwap1'] = calculate_inter_wap(book_df, rank="1")
    book_df['iwap2'] = calculate_inter_wap(book_df, rank="2")
    book_df["iwap_agg"] = calculate_agg_inter_wap(book_df)

    # calculate log return
    book_df["log_return1"] = book_df.groupby(["time_id"])["wap1"].apply(calculate_log_return)
    book_df["log_return2"] = book_df.groupby(["time_id"])["wap2"].apply(calculate_log_return)
    book_df["log_return_agg"] = book_df.groupby(["time_id"])["wap_agg"].apply(calculate_log_return)
    
    book_df["inter_log_return1"] = book_df.groupby(["time_id"])["iwap1"].apply(calculate_log_return)
    book_df["inter_log_return2"] = book_df.groupby(["time_id"])["iwap2"].apply(calculate_log_return)
    book_df["inter_log_return_agg"] = book_df.groupby(["time_id"])["iwap_agg"].apply(calculate_log_return)

    # calculate balance
    book_df["wap_balance"] = abs(book_df["wap1"] - book_df["wap2"])
    book_df["volume_imbalance"] = abs(
        (book_df["ask_size1"] + book_df["ask_size2"]) - (book_df["bid_size1"] + book_df["bid_size2"]))
    book_df["total_volume"] = book_df["ask_size1"] + book_df["ask_size2"] + book_df["bid_size1"] + book_df[
        "bid_size2"]

    # calculate spread
    book_df["price_spread1"] = (book_df["ask_price1"] - book_df["bid_price1"]) / (
            (book_df["ask_price1"] + book_df["bid_price1"]) / 2)
    book_df["price_spread2"] = (book_df["ask_price2"] - book_df["bid_price2"]) / (
            (book_df["ask_price2"] + book_df["bid_price2"]) / 2)
    book_df["bid_spread"] = book_df["bid_price1"] - book_df["bid_price2"]
    book_df["ask_spread"] = book_df["ask_price1"] - book_df["ask_price2"]
    
    book_df["depth"] = calc_depth(book_df)
    book_df["slope"], _ = calc_slope(book_df)
    book_df["dispersion"], _ = calc_dispersion(book_df)
    book_df["price_impact"], _ = calc_price_impact(book_df)
    book_df["ofi"] = calc_ofi(book_df)
    book_df["turn_over"] = calc_tt1(book_df)
    
    book_df_merged = window_stats(book_df, cfg.feature_dict_book, cfg.feature_dict_book_time, cfg.bucket_windows)

    book_df_merged["row_id"] = book_df_merged["time_id_"].apply(lambda x: f"{file_path.split('=')[1]}-{x}")
    book_df_merged.drop(["time_id_"], axis=1, inplace=True)

    return book_df_merged.bfill().ffill()
                                                                
# trade features
def get_trade_price_features(df):
    res = []
    for n_time_id in df['time_id'].unique():
        df_id = df[df['time_id'] == n_time_id]
        vol_tendency = tendency(df_id['price'].values, df_id['size'].values)
        f_max = np.sum(df_id['price'].values > np.mean(df_id['price'].values))
        f_min = np.sum(df_id['price'].values < np.mean(df_id['price'].values))
        df_max = np.sum(np.diff(df_id['price'].values) > 0)
        df_min = np.sum(np.diff(df_id['price'].values) < 0)
        abs_diff = np.median(np.abs(df_id['price'].values - np.mean(df_id['price'].values)))
        energy = np.mean(df_id['price'].values ** 2)
        iqr_p = np.percentile(df_id['price'].values, 75) - np.percentile(df_id['price'].values, 25)
        abs_diff_v = np.median(np.abs(df_id['size'].values - np.mean(df_id['size'].values)))
        energy_v = np.sum(df_id['size'].values ** 2)
        iqr_p_v = np.percentile(df_id['size'].values, 75) - np.percentile(df_id['size'].values, 25)

        res.append({'time_id': n_time_id,
                    'tendency': vol_tendency,
                    'f_max': f_max,
                    'f_min': f_min,
                    'df_max': df_max,
                    'df_min': df_min,
                    'abs_diff': abs_diff,
                    'energy': energy,
                    'iqr_p': iqr_p,
                    'abs_diff_v': abs_diff_v,
                    'energy_v': energy_v,
                    'iqr_p_v': iqr_p_v})

    return pd.DataFrame(res)
    pass


def tau_features(df, sec, weight):
    tau_feat = 'tau_' + str(sec)
    bucket_col = 'trade_seconds_in_bucket_count_unique_' + str(sec)
    df[tau_feat] = np.sqrt(weight/df[bucket_col])

    size_feat = 'size_' + str(sec)
    order_col = 'trade_order_count_sum_' + str(sec)
    df[size_feat] = np.sqrt(weight/df[order_col])

    return df
    pass


def get_trade_features(file_path, buck_windows=cfg.bucket_windows):
    trade_df = pd.read_parquet(file_path)

    trade_df["log_return"] = trade_df.groupby(["time_id"])["price"].apply(calculate_log_return)
    trade_df["amount"] = trade_df["size"] * trade_df["price"]

    price_features = get_trade_price_features(trade_df)
    trade_df_merged = window_stats(trade_df, cfg.feature_dict_trade, cfg.feature_dict_trade_time, buck_windows, additional_dfs=price_features)

    trade_df_merged = trade_df_merged.add_prefix("trade_")

    trade_df_merged["row_id"] = trade_df_merged["trade_time_id_"].apply(lambda x: f"{file_path.split('=')[1]}-{x}")
    trade_df_merged.drop(["trade_time_id_"], axis=1, inplace=True)

    for sec in buck_windows:
        trade_df_merged = tau_features(trade_df_merged, sec, weight=sec/600)
    return trade_df_merged.bfill().ffill() 

In [5]:
# create dataset
class GetData:
    def __init__(self, df, book_path, trade_path, is_train=True):
        self.df = df.copy(deep=True)
        self.order_book_path = book_path
        self.trade_path = trade_path
        self.is_train = is_train

        self._get_rowid()

    def _get_rowid(self):
        self.df["row_id"] = self.df["stock_id"].astype(str) + "-" + self.df["time_id"].astype(str)

    def get_time_stock(self, buck_windows=cfg.bucket_windows):
        vol_cols = []
        feat_set = ['log_return1_calculate_rv', 'log_return2_calculate_rv', 'log_return_agg_calculate_rv', 'trade_log_return_calculate_rv']
        for feat in feat_set:
            for sec in buck_windows:
                vol_cols.append(feat + f'_{sec}')
        vol_cols += feat_set

        df_stock_id = self.df.groupby(['stock_id'])[vol_cols].agg(['mean', 'std', 'max', 'min']).reset_index()
        df_stock_id.columns = ['_'.join(col) for col in df_stock_id.columns]
        df_stock_id = df_stock_id.add_suffix('_' + 'stock')

        df_time_id = self.df.groupby(['time_id'])[vol_cols].agg(['mean', 'std', 'max', 'min']).reset_index()
        df_time_id.columns = ['_'.join(col) for col in df_time_id.columns]
        df_time_id = df_time_id.add_suffix('_' + 'time')

        # Merge with original dataframe
        self.df = self.df.merge(df_stock_id, how='left', left_on=['stock_id'], right_on=['stock_id__stock'])
        self.df = self.df.merge(df_time_id, how='left', left_on=['time_id'], right_on=['time_id__time'])
        self.df.drop(['stock_id__stock', 'time_id__time'], axis=1, inplace=True)
        return self.df

    def process_features(self, list_stock_ids):
        def parallel_helper(stock_id):
            book_sample_path = os.path.join(self.order_book_path, f"stock_id={stock_id}")
            trade_sample_path = os.path.join(self.trade_path, f"stock_id={stock_id}")

            return pd.merge(get_book_features(book_sample_path), get_trade_features(trade_sample_path),
                            on="row_id",
                            how="left")

        df = Parallel(n_jobs=-1, verbose=1)(delayed(parallel_helper)(stock_id) for stock_id in list_stock_ids)
        df = pd.concat(df, ignore_index=True)

        return df

    def _get_features(self):
        features_df = self.process_features(self.df["stock_id"].unique())
        self.df = self.df.merge(features_df, on=["row_id"], how="left")

        return self.get_time_stock()
        pass

    def get_all_features(self, stock_groups):
        return create_cluster_aggregations(self._get_features(), stock_groups)
        pass

In [6]:
def swish(x, beta = 1):
    return (x * K.sigmoid(beta * x))

def rmspe(y_true, y_pred):
    return np.sqrt(np.mean(np.square((y_true-y_pred)/y_true)))
    pass

def root_mean_squared_per_error(y_true, y_pred):
         return K.sqrt(K.mean(K.square((y_true-y_pred)/y_true)))

# add as activation
get_custom_objects().update({'swish': Activation(swish)})


def base_model(inp_shape, n_cat_data, stock_embedding_size, hidden_units):
    stock_id_input = Input(shape=(1,), name='stock_id')
    num_input = Input(shape=(inp_shape,), name='num_data')

    stock_embedded = Embedding(n_cat_data+1,
                               stock_embedding_size, 
                               input_length=1,
                               name='stock_embedding')(stock_id_input)
    stock_flattened = Flatten()(stock_embedded)
    out = Concatenate()([stock_flattened, num_input])

    for n_hidden in hidden_units:
        out = Dense(n_hidden, activation='swish')(out)

    out = Dense(1, activation='linear', name='prediction')(out)
    
    model = Model(
        inputs = [stock_id_input, num_input],
        outputs = out
    )
    
    return model

def encode_stocks(df):
    stock_encoder = LabelEncoder()
    df["stock_id"] = stock_encoder.fit_transform(df["stock_id"])
    pickle.dump(stock_encoder, open("./label_stocks_global.pkl", "wb"))
    
    return df

def get_quantile_transform(train_df, test_df=None):
    print("[INFO] Applying Quantile Transformation...")
    for col in tqdm(train_df.columns):
        if col=="stock_id":
            continue
        qt = QuantileTransformer(random_state=21, n_quantiles=2000, output_distribution='normal')
        train_df[col] = qt.fit_transform(train_df[[col]])
        if test_df is not None: test_df[col] = qt.transform(test_df[[col]])
        
    return train_df, test_df
    pass


def get_transform(df, val_df, name, file_path=None):
    print(f"[INFO] Using {name} scaler...\n")
    if name=="mm":
        scaler = MinMaxScaler().fit(df.drop(["stock_id"], axis=1))
    elif name=="mm_11":
        scaler = MinMaxScaler(feature_range=(-1, 1)).fit(df.drop(["stock_id"], axis=1))
    else:
        scaler = StandardScaler().fit(df.drop(["stock_id"], axis=1))
        
    df.iloc[:, 1:] = scaler.transform(df.iloc[:, 1:])
    if val_df is not None: val_df.iloc[:, 1:] = scaler.transform(val_df.iloc[:, 1:])
    if file_path is not None:
        pickle.dump(scaler, open(file_path, "wb"))
    
    del scaler
    _ = gc.collect()
    
    return df, val_df
    pass

class TrainFer:
    def __init__(self, n_splits, model_path, random_state):
        self.n_splits = n_splits
        self.random_state = random_state
        self.model_path = model_path
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
            
    
    def train(self, X, y, callback_list, scaler_name="mm"):
        X = encode_stocks(X)
        
        oof_predictions = np.zeros(X.shape[0])
        kfold = KFold(n_splits=self.n_splits, random_state=0, shuffle=True)
        oof_scores = []

        for fold, (train_idx, val_idx) in enumerate(kfold.split(X)):
            print(f"\nFold - {fold}\n")

            x_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
            x_val, y_val = X.iloc[val_idx], y.iloc[val_idx]
            
            x_train, x_val = get_quantile_transform(x_train, x_val)
            x_train, x_val = get_transform(x_train, x_val, scaler_name)
            
            model = base_model(inp_shape=486, n_cat_data=112, stock_embedding_size=48, hidden_units=[196, 128, 64, 32])
            model.compile(
                tf.keras.optimizers.Adam(learning_rate=0.005),
                loss=root_mean_squared_per_error
            )
            
        
            cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(self.model_path, f"nnse_{fold}.h5"),
                                                             save_weights_only=True,
                                                             verbose=0)
            model.fit([x_train["stock_id"], x_train.drop(["stock_id"], axis=1)],
                      y_train,              
                      batch_size=2048,
                      epochs=200,
                      validation_data=([x_val["stock_id"], x_val.drop(["stock_id"], axis=1)], y_val),
                      callbacks=callback_list+[cp_callback],
                      validation_batch_size=len(y_val),
                      shuffle=True,
                      verbose=1)
            
            fold_preds = model.predict([x_val["stock_id"], x_val.drop(["stock_id"], axis=1)])[:, 0]
            oof_score = rmspe(y_val.values, fold_preds)
            print(f"\nRMSPE of fold {fold}: {oof_score}")
            
            oof_scores.append(oof_score)
            oof_predictions[val_idx] = fold_preds
            
            del x_train, x_val, y_train, y_val, model, fold_preds
            _ = gc.collect()
        
        print(f"\nOOF Scores: {oof_scores}\n")
        rmspe_score = rmspe(y, oof_predictions)
        print(f"OOF RMSPE: {rmspe_score}")
        
        return y, oof_predictions

In [7]:
if __name__ == "__main__":
    _ = gc.collect()
    
    train_feats = pickle.load(open("../input/processed-dataset-orvp/train_df.pkl", "rb"))
    train_feats.fillna(-1, inplace=True)
    reg = TrainFer(n_splits=5, model_path=cfg.paths["nnse"], random_state=cfg.random_state) 

    es = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=20,
        verbose=1,
        mode='min',
        restore_best_weights=True)

    plateau = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=7,
        verbose=1,
        mode='min')
    
    callback_list = [es, plateau]
    y_targets, oof_preds = reg.train(train_feats.drop(columns=["row_id", "target", "time_id"]), train_feats["target"], callback_list, "ss")
    pickle.dump(y_targets, open("./y_targets_ss.pkl", "wb"))
    pickle.dump(oof_preds, open("./oof_preds_ss.pkl", "wb"))
    pass


Fold - 0

[INFO] Applying Quantile Transformation...


  0%|          | 0/487 [00:00<?, ?it/s]

[INFO] Using ss scaler...

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200

Epoch 00029: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200

Epoch 00044: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
E

  0%|          | 0/487 [00:00<?, ?it/s]

[INFO] Using ss scaler...

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200

Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200

Epoch 00047: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200

Epoch 00062: 

  0%|          | 0/487 [00:00<?, ?it/s]

[INFO] Using ss scaler...

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200

Epoch 00020: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200

Epoch 00058: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
E

  0%|          | 0/487 [00:00<?, ?it/s]

[INFO] Using ss scaler...

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200

Epoch 00021: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200

Epoch 00036: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200

Epoch 00046: ReduceLROnPlateau reducing learning rate to 3.9999996079131965e-05.
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/2

  0%|          | 0/487 [00:00<?, ?it/s]

[INFO] Using ss scaler...

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200

Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200

Epoch 00042: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
E

EOF