# MATH7016 Deep Learning
### Coursework 2025

In [26]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from numpy.random import seed
import tensorflow as tf
from tensorflow import keras
from keras import backend as K

from sklearn.model_selection import KFold
from keras.layers import Activation
from keras.utils import get_custom_objects
from sklearn.preprocessing import StandardScaler

tf.random.set_seed(42)

In [3]:
train_df = pd.read_csv('DL-2025-CW-data/Data_A.csv')
test_df = pd.read_csv('DL-2025-CW-data/Data_B_nolabels.csv')

columns_names = ['target']
level_cols=[]
for i in range(4):
    level_cols +=[f'ask_price{i+1}',f'ask_size{i+1}',f'bid_price{i+1}',f'bid_size{i+1}']
change_cols =  []
for i in range(5):
    change_cols+=[f'midprice_change{i+1}']
columns_names+=level_cols+change_cols
train_df.columns = columns_names
test_df.columns = columns_names[1:]
test_df.head()

Unnamed: 0,ask_price1,ask_size1,bid_price1,bid_size1,ask_price2,ask_size2,bid_price2,bid_size2,ask_price3,ask_size3,...,bid_size3,ask_price4,ask_size4,bid_price4,bid_size4,midprice_change1,midprice_change2,midprice_change3,midprice_change4,midprice_change5
0,696400.0,16,696000.0,12,696500.0,57,695900.0,118,696600.0,100,...,262,696700.0,150,695700.0,104,1,0,1,0,1
1,740800.0,2,740400.0,20,741000.0,60,740200.0,27,741200.0,156,...,31,741300.0,200,740000.0,170,0,1,0,0,1
2,730900.0,1,730200.0,230,731000.0,111,730100.0,86,731100.0,42,...,136,731200.0,100,729900.0,132,1,1,0,0,1
3,630600.0,100,630300.0,69,630700.0,110,630200.0,2,630800.0,219,...,1,630900.0,101,630000.0,104,0,1,0,0,0
4,851100.0,579,850300.0,25,851200.0,17,850100.0,287,851400.0,307,...,270,851500.0,223,849900.0,72,1,0,1,0,1


In [4]:
train_df

Unnamed: 0,target,ask_price1,ask_size1,bid_price1,bid_size1,ask_price2,ask_size2,bid_price2,bid_size2,ask_price3,...,bid_size3,ask_price4,ask_size4,bid_price4,bid_size4,midprice_change1,midprice_change2,midprice_change3,midprice_change4,midprice_change5
0,0,650400.0,501,650200.0,106,650500.0,245,650100.0,259,650600.0,...,328,650700.0,141,649900.0,277,0,0,1,1,1
1,1,636200.0,153,635800.0,150,636300.0,100,635700.0,15,636400.0,...,110,636500.0,105,635500.0,201,0,1,0,1,0
2,0,724800.0,4,724500.0,14,724900.0,50,724300.0,312,725100.0,...,100,725200.0,379,724100.0,56,1,0,0,1,1
3,0,622900.0,110,622700.0,100,623000.0,523,622600.0,300,623100.0,...,249,623200.0,605,622400.0,200,1,0,1,1,0
4,1,620100.0,374,619900.0,110,620200.0,495,619800.0,210,620300.0,...,601,620400.0,310,619600.0,100,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199994,0,429000.0,400,428800.0,500,429100.0,300,428700.0,700,429200.0,...,655,429300.0,300,428500.0,400,1,0,1,1,1
199995,1,413600.0,100,413500.0,500,413700.0,611,413400.0,457,413800.0,...,400,413900.0,300,413200.0,800,0,1,0,1,0
199996,0,381900.0,401,381800.0,100,382000.0,705,381700.0,800,382100.0,...,400,382200.0,400,381500.0,400,1,0,1,0,0
199997,1,443000.0,100,442900.0,29,443100.0,300,442800.0,100,443200.0,...,400,443300.0,800,442600.0,200,0,1,0,1,0


In [5]:
def calc_wap(df, bid_p, ask_p, bid_s, ask_s):
    return (df[bid_p] * df[ask_s] + df[ask_p] * df[bid_s]) / (df[bid_s] + df[ask_s])

def calc_wmp(df, bid_p, ask_p, bid_s, ask_s):
    return (df[bid_p] * df[bid_s] + df[ask_p] * df[ask_s]) / (df[bid_s] + df[ask_s])

def mid_price(df,  bid_p, ask_p):
    return (df[bid_p] + df[ask_p])/2


In [47]:
def preprocessor(df):
    # Step 1: Calculate price & volume features
    for i in range(1,5):
        df[f"wap{i}"] = calc_wap(df, f"bid_price{i}", f"ask_price{i}", f"bid_size{i}", f"ask_size{i}")
        df[f"wmp{i}"] = calc_wmp(df, f"bid_price{i}", f"ask_price{i}", f"bid_size{i}", f"ask_size{i}")
        df[f"price_spread{i}"] = (df[f"ask_price{i}"] - df[f"bid_price{i}"]) / ((df[f"ask_price{i}"] + df[f"bid_price{i}"]) / 2)
        df[f'midprice{i}']=mid_price(df,f'bid_price{i}',f'ask_price{i}')
 
        df[f"order_imbalance{i}"] = (df[f"bid_size{i}"] - df[f"ask_size{i}"]) / (df[f"bid_size{i}"] + df[f"ask_size{i}"])
        df[f'bid_ask_ratio{i}'] = df[f'bid_size{i}'] / (df[f'bid_size{i}'] + df[f'ask_size{i}'])
        
    # Price features
    df["wap_balance"] = (df["wap1"] - df["wap2"]).abs()
    df['bid_depth_ratio'] = df['bid_size1'] / (df[['bid_size1','bid_size2','bid_size3','bid_size4']].sum(axis=1))
    df['ask_depth_ratio'] = df['ask_size1'] / (df[['ask_size1','ask_size2','ask_size3','ask_size4']].sum(axis=1))

    # Volume features
    df["total_volume"] = df[["ask_size1", "ask_size2", "bid_size1", "bid_size2"]].sum(axis=1)
    df["volume_imbalance"] = (
        (df["ask_size1"] + df["ask_size2"]) - (df["bid_size1"] + df["bid_size2"])
    ).abs()

    # Order imbalances 
    df["order_imbalance_total"] = (
        (df["bid_size1"] + df["bid_size2"] - df["ask_size1"] - df["ask_size2"]) /
        (df["bid_size1"] + df["bid_size2"] + df["ask_size1"] + df["ask_size2"])
    )

    df['bid_vol_ratio'] = df[['bid_size1','bid_size2']].sum(axis=1) / df[['bid_size1','bid_size2','ask_size1','ask_size2']].sum(axis=1)
    df['ask_vol_ratio'] = 1 - df['bid_vol_ratio']

    # Step 3: Drop raw order book columns
    #df = df.drop(columns=level_cols)
    
    return df


In [48]:
train_nn = preprocessor(train_df)
test_nn = preprocessor(test_df)

In [30]:
train_nn=train_df
test_nn = test_df

In [44]:
train_nn

Unnamed: 0,target,ask_price1,ask_size1,bid_price1,bid_size1,ask_price2,ask_size2,bid_price2,bid_size2,ask_price3,...,order_imbalance4,bid_ask_ratio4,wap_balance,bid_depth_ratio,ask_depth_ratio,total_volume,volume_imbalance,order_imbalance_total,bid_vol_ratio,ask_vol_ratio
0,0,650400.0,501,650200.0,106,650500.0,245,650100.0,259,650600.0,...,0.325359,0.662679,70.629691,0.109278,0.484058,1111,381,-0.342934,0.328533,0.671467
1,1,636200.0,153,635800.0,150,636300.0,100,635700.0,15,636400.0,...,0.313725,0.656863,219.758932,0.315126,0.301181,418,88,-0.210526,0.394737,0.605263
2,0,724800.0,4,724500.0,14,724900.0,50,724300.0,312,725100.0,...,-0.742529,0.128736,83.793738,0.029046,0.006861,380,272,0.715789,0.857895,0.142105
3,0,622900.0,110,622700.0,100,623000.0,523,622600.0,300,623100.0,...,-0.503106,0.248447,49.430076,0.117786,0.065632,1033,233,-0.225557,0.387222,0.612778
4,1,620100.0,374,619900.0,110,620200.0,495,619800.0,210,620300.0,...,-0.512195,0.243902,26.305609,0.107738,0.269258,1189,549,-0.461733,0.269134,0.730866
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199994,0,429000.0,400,428800.0,500,429100.0,300,428700.0,700,429200.0,...,0.142857,0.571429,68.888889,0.221729,0.210526,1900,500,0.263158,0.631579,0.368421
199995,1,413600.0,100,413500.0,500,413700.0,611,413400.0,457,413800.0,...,0.454545,0.727273,54.962547,0.231803,0.070872,1668,246,0.147482,0.573741,0.426259
199996,0,381900.0,401,381800.0,100,382000.0,705,381700.0,800,382100.0,...,0.000000,0.500000,39.508359,0.058824,0.179821,2006,206,-0.102692,0.448654,0.551346
199997,1,443000.0,100,442900.0,29,443100.0,300,442800.0,100,443200.0,...,-0.600000,0.200000,47.480620,0.039781,0.066667,529,271,-0.512287,0.243856,0.756144


In [53]:

def swish(x, beta = 1):
    return (x * K.sigmoid(beta * x))


get_custom_objects().update({'swish': Activation(swish)})

hidden_units = (128,64,32)
feature_cols = [c for c in train_nn.columns if c != 'target']
num_features = len(feature_cols)


def base_model():
  
    num_input = keras.Input(shape=(num_features), name='num_data')

    x = keras.layers.Concatenate()([num_input])
   
    for n_hidden in hidden_units:
        x = keras.layers.Dense(n_hidden, activation='swish')(x)
        x = keras.layers.BatchNormalization()(x)
        x = keras.layers.Dropout(0.3)(x)
    out = keras.layers.Dense(1, activation='sigmoid', name='prediction')(x)

    model = keras.Model(
        inputs=[num_input],
        outputs=out
    )
    return model





In [54]:
train_nn = train_nn.fillna(0)
test_nn = test_nn.fillna(0)

X = train_nn[feature_cols]
y = train_nn['target']

test_predictions_nn = np.zeros(test_nn.shape[0])
scores_folds = {'NN_model': []}

# KFold 
kfolds = 5
kf = KFold(n_splits=kfolds, shuffle=True, random_state=42)

counter = 1

for train_index, val_index in kf.split(X):

    X_train = X.iloc[train_index]
    y_train = y.iloc[train_index]
    X_val = X.iloc[val_index]
    y_val = y.iloc[val_index]
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    model = base_model()
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.005),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    # ---- callbacks ----
    # Early stopping callback
    es = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=20, verbose=0,
        mode='min', restore_best_weights=True)

    # Learning rate reduction callback 
    plateau = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.2, patience=7, verbose=0,
        mode='min')
    model.fit(
        [X_train_scaled],
        y_train,
        batch_size=512,
        epochs=200,
        validation_data=([X_val_scaled], y_val),
        callbacks=[es, plateau],
        shuffle=True,
        verbose=1
    )
    
    preds = model.predict([X_val_scaled]).reshape(-1)
    score = np.mean((preds.round() != y_val.values).astype(float)) 
    print(f'Fold {counter}: Accuracy = {1 - score:.5f}')
    scores_folds['NN_model'].append(1 - score)
    
    X_test = test_nn[feature_cols]
    X_test_scaled = scaler.transform(X_test)


    test_predictions_nn += model.predict([X_test_scaled]).reshape(-1) / kfolds
    
    counter += 1


print("CV folds scores:", scores_folds['NN_model'])



Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Fold 1: Accuracy = 0.72425




Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78



Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78



Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78



Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78