# Import Libraries

In [1]:
import torch
import optuna
import numpy as np
import torch.nn.functional as F
from torch.nn.utils import clip_grad_norm_
from torch.optim import Adam
import pandas as pd
from network import TabNet
from dotenv import dotenv_values
from datetime import datetime

# Setup and Configuration

In [2]:
config = dotenv_values("../.env")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using", device, "device")
BATCH_SIZE = 1024
PATIENCE = 3 # How many epochs will we wait until performance gets better or not?
SAVE_PATH = "./model_parameters/"
TIMEOUT = 12*60*60

Using cuda device


# Metric Used For Validation Step

In [3]:
def amex_metric_mod(y_true, y_pred):
    labels     = np.transpose(np.array([y_true, y_pred]))
    labels     = labels[labels[:, 1].argsort()[::-1]]
    weights    = np.where(labels[:,0]==0, 20, 1)
    cut_vals   = labels[np.cumsum(weights) <= int(0.04 * np.sum(weights))]
    top_four   = np.sum(cut_vals[:,0]) / np.sum(labels[:,0])

    gini = [0,0]
    for i in [1,0]:
        labels         = np.transpose(np.array([y_true, y_pred]))
        labels         = labels[labels[:, i].argsort()[::-1]]
        weight         = np.where(labels[:,0]==0, 20, 1)
        weight_random  = np.cumsum(weight / np.sum(weight))
        total_pos      = np.sum(labels[:, 0] *  weight)
        cum_pos_found  = np.cumsum(labels[:, 0] * weight)
        lorentz        = cum_pos_found / total_pos
        gini[i]        = np.sum((lorentz - weight_random) * weight)

    return 0.5 * (gini[1]/gini[0] + top_four)

# Optuna Setup and Training

In [4]:
def objective(trial):
    # Model Hyperparameters
    param = {
        "input_dim": 2319,
        "output_dim": 1,
        "n_d": trial.suggest_int("n_d", 4, 64),
        "n_a": trial.suggest_int("n_a", 4, 64),
        "n_steps": trial.suggest_int("n_steps", 3, 10),
        "gamma": trial.suggest_float("gamma", 1.0, 2.0, log=True),
        "cat_idxs": [],
        "cat_dims": [],
        "cat_emb_dim": 1,
        "n_independent": 2,
        "n_shared": 2,
        "epsilon": 1e-15,
        "vbs": 128,
        "momentum": trial.suggest_float("momentum", 0.02, 1.0, log=True)
    }
    model = TabNet(**param).to(device)
    optimizer = Adam(model.parameters(), lr=2e-2)

    first_pass = True
    oof_tensors = {}
    best_metric = 0.0 # Keeps track of best metric performance
    patience_count = 0
    for epoch in range(1,101): # Runs maximum of 100 epochs
        # Load Data
        labels = pd.read_csv(config["TRAIN_LABELS_PATH"], 
                             chunksize=BATCH_SIZE)
        df = pd.read_csv(config["WRANGLED_DATA"] + "scaled_train/train-0.csv.part", 
                         chunksize=BATCH_SIZE)
        total_loss = 0.0
        model.train()
        for i, (chunk, chunk_labels) in enumerate(zip(df, labels)):
            random = np.random.randint(5) # Determines which entries are going to be used in oof prediction
            x = torch.Tensor(chunk.values).to(device, non_blocking=True)
            y = torch.Tensor(chunk_labels["target"].values).reshape(-1, 1).to(device, non_blocking=True)
            if random == 0 and first_pass:
                # If it's the first pass create the validation set
                oof_tensors[i] = (x, y)
                continue
            if not first_pass and i in oof_tensors.keys():
                # If not the first pass then skip the training on current entry
                continue

            # Train Model
            y_hat, M_loss = model(x)
            loss = F.mse_loss(y_hat, y) - (1e-3*M_loss)
            loss.backward()
            clip_grad_norm_(model.parameters(), 1) # Clip gradient
            optimizer.step()
            total_loss += loss.cpu().detach().numpy().item()
            
            if i % 2 == 0:
                optimizer.zero_grad(set_to_none=True)
        # Validate 
        model.eval()
        preds = []
        labels = []
        for x, y in list(oof_tensors.values()):
            y_hat, _ = model(x)
            preds += y_hat.cpu().detach().numpy().flatten().tolist()
            labels += y.cpu().detach().numpy().flatten().tolist()
        metric = amex_metric_mod(labels, preds)
        print(f"Epoch {epoch} | train_loss: {total_loss / (i-len(oof_tensors)+1):.4f} | validation_metric: {metric:.4f}")
        first_pass = False

        # Saves model based on performance over time
        if metric > best_metric:
            best_metric = metric
            patience_count = 0
            id = datetime.now().strftime("%d-%m-%Y-%H%M%S")
            torch.save(model.state_dict(), SAVE_PATH+f"Optuna_TabNet_{best_metric:.4f}_{id}.pt")
        else:
            patience_count += 1
        
        # If model hasn't improved in given time training stops
        if patience_count >= PATIENCE:
            print("Early Stopping Activated!!!")
            break
    
    return best_metric

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, timeout=TIMEOUT)


print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2022-11-05 11:28:53,633][0m A new study created in memory with name: no-name-4ee1d826-4c6c-46df-a94d-0367340360ef[0m


Epoch 1 | train_loss: 0.4185 | validation_metric: 0.6046
Epoch 2 | train_loss: 0.1027 | validation_metric: 0.6506
Epoch 3 | train_loss: 0.0913 | validation_metric: 0.6852
Epoch 4 | train_loss: 0.0834 | validation_metric: 0.7557
Epoch 5 | train_loss: 0.0782 | validation_metric: 0.7671
Epoch 6 | train_loss: 0.0757 | validation_metric: 0.7681
Epoch 7 | train_loss: 0.0739 | validation_metric: 0.7751
Epoch 8 | train_loss: 0.0728 | validation_metric: 0.7764
Epoch 9 | train_loss: 0.0716 | validation_metric: 0.7740
Epoch 10 | train_loss: 0.0707 | validation_metric: 0.7798
Epoch 11 | train_loss: 0.0701 | validation_metric: 0.7729
Epoch 12 | train_loss: 0.0708 | validation_metric: 0.7786


[32m[I 2022-11-05 12:27:08,914][0m Trial 0 finished with value: 0.7797502432084173 and parameters: {'n_d': 40, 'n_a': 55, 'n_steps': 7, 'gamma': 1.71575598678367, 'momentum': 0.0764338735749953}. Best is trial 0 with value: 0.7797502432084173.[0m


Epoch 13 | train_loss: 0.0770 | validation_metric: 0.7743
Early Stopping Activated!!!
Epoch 1 | train_loss: 0.6492 | validation_metric: 0.5245
Epoch 2 | train_loss: 0.1061 | validation_metric: 0.6629
Epoch 3 | train_loss: 0.0876 | validation_metric: 0.7270
Epoch 4 | train_loss: 0.0803 | validation_metric: 0.7476
Epoch 5 | train_loss: 0.0762 | validation_metric: 0.7643
Epoch 6 | train_loss: 0.0755 | validation_metric: 0.7696
Epoch 7 | train_loss: 0.0727 | validation_metric: 0.7754
Epoch 8 | train_loss: 0.0746 | validation_metric: 0.7541
Epoch 9 | train_loss: 0.0747 | validation_metric: 0.7737


[32m[I 2022-11-05 13:13:33,673][0m Trial 1 finished with value: 0.7754387018359095 and parameters: {'n_d': 29, 'n_a': 26, 'n_steps': 10, 'gamma': 1.1968457992546584, 'momentum': 0.03280130914370577}. Best is trial 0 with value: 0.7797502432084173.[0m


Epoch 10 | train_loss: 0.0721 | validation_metric: 0.7700
Early Stopping Activated!!!
Epoch 1 | train_loss: 2.3147 | validation_metric: 0.6358
Epoch 2 | train_loss: 0.0988 | validation_metric: 0.6667
Epoch 3 | train_loss: 0.0914 | validation_metric: 0.6843
Epoch 4 | train_loss: 0.0844 | validation_metric: 0.7299
Epoch 5 | train_loss: 0.0831 | validation_metric: 0.7521
Epoch 6 | train_loss: 0.0809 | validation_metric: 0.7533
Epoch 7 | train_loss: 0.0753 | validation_metric: 0.7671
Epoch 8 | train_loss: 0.0749 | validation_metric: 0.7688
Epoch 9 | train_loss: 0.0731 | validation_metric: 0.7724
Epoch 10 | train_loss: 0.0721 | validation_metric: 0.7743
Epoch 11 | train_loss: 0.0715 | validation_metric: 0.7767
Epoch 12 | train_loss: 0.0709 | validation_metric: 0.7753
Epoch 13 | train_loss: 0.0709 | validation_metric: 0.7747


[32m[I 2022-11-05 14:20:19,293][0m Trial 2 finished with value: 0.7766513931109005 and parameters: {'n_d': 47, 'n_a': 27, 'n_steps': 10, 'gamma': 1.8719326843827975, 'momentum': 0.042788662750223964}. Best is trial 0 with value: 0.7797502432084173.[0m


Epoch 14 | train_loss: 0.0698 | validation_metric: 0.7731
Early Stopping Activated!!!
Epoch 1 | train_loss: 0.1606 | validation_metric: 0.7275
Epoch 2 | train_loss: 0.0791 | validation_metric: 0.7508
Epoch 3 | train_loss: 0.0750 | validation_metric: 0.7700
Epoch 4 | train_loss: 0.0729 | validation_metric: 0.7742
Epoch 5 | train_loss: 0.0717 | validation_metric: 0.7693
Epoch 6 | train_loss: 0.0714 | validation_metric: 0.7796
Epoch 7 | train_loss: 0.0714 | validation_metric: 0.7752
Epoch 8 | train_loss: 0.0695 | validation_metric: 0.7768


[32m[I 2022-11-05 15:00:14,057][0m Trial 3 finished with value: 0.7795747148945578 and parameters: {'n_d': 53, 'n_a': 43, 'n_steps': 3, 'gamma': 1.6330216585133142, 'momentum': 0.5191392082414036}. Best is trial 0 with value: 0.7797502432084173.[0m


Epoch 9 | train_loss: 0.0689 | validation_metric: 0.7678
Early Stopping Activated!!!
Epoch 1 | train_loss: 0.5245 | validation_metric: 0.6387
Epoch 2 | train_loss: 0.1054 | validation_metric: 0.6841
Epoch 3 | train_loss: 0.0920 | validation_metric: 0.7196
Epoch 4 | train_loss: 0.0831 | validation_metric: 0.6392
Epoch 5 | train_loss: 0.0836 | validation_metric: 0.7579
Epoch 6 | train_loss: 0.0757 | validation_metric: 0.7702
Epoch 7 | train_loss: 0.0840 | validation_metric: 0.7552
Epoch 8 | train_loss: 0.0763 | validation_metric: 0.7720
Epoch 9 | train_loss: 0.0733 | validation_metric: 0.7776
Epoch 10 | train_loss: 0.0728 | validation_metric: 0.7777
Epoch 11 | train_loss: 0.0719 | validation_metric: 0.7810
Epoch 12 | train_loss: 0.0710 | validation_metric: 0.7819
Epoch 13 | train_loss: 0.0702 | validation_metric: 0.7808
Epoch 14 | train_loss: 0.0694 | validation_metric: 0.7783


[32m[I 2022-11-05 16:07:35,933][0m Trial 4 finished with value: 0.7818938545255564 and parameters: {'n_d': 30, 'n_a': 58, 'n_steps': 7, 'gamma': 1.616591063855307, 'momentum': 0.08451094264630266}. Best is trial 4 with value: 0.7818938545255564.[0m


Epoch 15 | train_loss: 0.0687 | validation_metric: 0.7769
Early Stopping Activated!!!
Epoch 1 | train_loss: 1.5053 | validation_metric: 0.5204
Epoch 2 | train_loss: 0.1060 | validation_metric: 0.6370
Epoch 3 | train_loss: 0.0999 | validation_metric: 0.6836
Epoch 4 | train_loss: 0.0898 | validation_metric: 0.7071
Epoch 5 | train_loss: 0.0860 | validation_metric: 0.7052
Epoch 6 | train_loss: 0.0811 | validation_metric: 0.7330
Epoch 7 | train_loss: 0.0818 | validation_metric: 0.7581
Epoch 8 | train_loss: 0.0750 | validation_metric: 0.7690
Epoch 9 | train_loss: 0.0743 | validation_metric: 0.7753
Epoch 10 | train_loss: 0.0727 | validation_metric: 0.7771
Epoch 11 | train_loss: 0.0718 | validation_metric: 0.7800
Epoch 12 | train_loss: 0.0711 | validation_metric: 0.7801
Epoch 13 | train_loss: 0.0704 | validation_metric: 0.7804
Epoch 14 | train_loss: 0.0698 | validation_metric: 0.7802
Epoch 15 | train_loss: 0.0692 | validation_metric: 0.7788


[32m[I 2022-11-05 17:21:45,556][0m Trial 5 finished with value: 0.7803673035102572 and parameters: {'n_d': 32, 'n_a': 60, 'n_steps': 10, 'gamma': 1.6547811503254615, 'momentum': 0.17393874629135292}. Best is trial 4 with value: 0.7818938545255564.[0m


Epoch 16 | train_loss: 0.0684 | validation_metric: 0.7742
Early Stopping Activated!!!
Epoch 1 | train_loss: 0.4933 | validation_metric: 0.6026
Epoch 2 | train_loss: 0.1025 | validation_metric: 0.6815
Epoch 3 | train_loss: 0.0862 | validation_metric: 0.7382
Epoch 4 | train_loss: 0.0780 | validation_metric: 0.7635
Epoch 5 | train_loss: 0.0757 | validation_metric: 0.7588
Epoch 6 | train_loss: 0.0819 | validation_metric: 0.7588
Epoch 7 | train_loss: 0.0751 | validation_metric: 0.7726
Epoch 8 | train_loss: 0.0731 | validation_metric: 0.7769
Epoch 9 | train_loss: 0.0733 | validation_metric: 0.7748
Epoch 10 | train_loss: 0.0721 | validation_metric: 0.7800
Epoch 11 | train_loss: 0.0713 | validation_metric: 0.7822
Epoch 12 | train_loss: 0.0724 | validation_metric: 0.7773
Epoch 13 | train_loss: 0.0716 | validation_metric: 0.7808


[32m[I 2022-11-05 18:25:56,862][0m Trial 6 finished with value: 0.7822471664786879 and parameters: {'n_d': 13, 'n_a': 9, 'n_steps': 9, 'gamma': 1.3077154313342185, 'momentum': 0.03072144877400083}. Best is trial 6 with value: 0.7822471664786879.[0m


Epoch 14 | train_loss: 0.0729 | validation_metric: 0.7808
Early Stopping Activated!!!
Epoch 1 | train_loss: 1.2040 | validation_metric: 0.6062
Epoch 2 | train_loss: 0.1011 | validation_metric: 0.6329
Epoch 3 | train_loss: 0.1074 | validation_metric: 0.6033
Epoch 4 | train_loss: 0.1052 | validation_metric: 0.6887
Epoch 5 | train_loss: 0.0912 | validation_metric: 0.7162
Epoch 6 | train_loss: 0.0807 | validation_metric: 0.7358
Epoch 7 | train_loss: 0.0799 | validation_metric: 0.7653
Epoch 8 | train_loss: 0.0749 | validation_metric: 0.7733
Epoch 9 | train_loss: 0.0738 | validation_metric: 0.7798
Epoch 10 | train_loss: 0.0729 | validation_metric: 0.7798
Epoch 11 | train_loss: 0.0715 | validation_metric: 0.7792
Epoch 12 | train_loss: 0.0709 | validation_metric: 0.7812
Epoch 13 | train_loss: 0.0701 | validation_metric: 0.7814
Epoch 14 | train_loss: 0.0750 | validation_metric: 0.7800
Epoch 15 | train_loss: 0.0710 | validation_metric: 0.7814
Epoch 16 | train_loss: 0.0700 | validation_metric: 0.

[32m[I 2022-11-05 19:49:28,969][0m Trial 7 finished with value: 0.7813892168267009 and parameters: {'n_d': 28, 'n_a': 55, 'n_steps': 10, 'gamma': 1.8124855419952668, 'momentum': 0.04553844226627447}. Best is trial 6 with value: 0.7822471664786879.[0m


Epoch 18 | train_loss: 0.0684 | validation_metric: 0.7706
Early Stopping Activated!!!
Epoch 1 | train_loss: 1.0007 | validation_metric: 0.5335
Epoch 2 | train_loss: 0.1063 | validation_metric: 0.6632
Epoch 3 | train_loss: 0.0869 | validation_metric: 0.6897
Epoch 4 | train_loss: 0.0851 | validation_metric: 0.7413
Epoch 5 | train_loss: 0.0773 | validation_metric: 0.7584
Epoch 6 | train_loss: 0.0740 | validation_metric: 0.7605
Epoch 7 | train_loss: 0.0733 | validation_metric: 0.5959
Epoch 8 | train_loss: 0.0770 | validation_metric: 0.7722
Epoch 9 | train_loss: 0.0729 | validation_metric: 0.7797
Epoch 10 | train_loss: 0.0734 | validation_metric: 0.7813
Epoch 11 | train_loss: 0.0758 | validation_metric: 0.7728
Epoch 12 | train_loss: 0.0797 | validation_metric: 0.7738


[32m[I 2022-11-05 20:49:24,294][0m Trial 8 finished with value: 0.781341239349128 and parameters: {'n_d': 35, 'n_a': 32, 'n_steps': 9, 'gamma': 1.3974897430745419, 'momentum': 0.8289930412147597}. Best is trial 6 with value: 0.7822471664786879.[0m


Epoch 13 | train_loss: 0.0723 | validation_metric: 0.7799
Early Stopping Activated!!!
Epoch 1 | train_loss: 0.6101 | validation_metric: 0.6342
Epoch 2 | train_loss: 0.0991 | validation_metric: 0.6915
Epoch 3 | train_loss: 0.0850 | validation_metric: 0.7400
Epoch 4 | train_loss: 0.0791 | validation_metric: 0.7343
Epoch 5 | train_loss: 0.0809 | validation_metric: 0.7615
Epoch 6 | train_loss: 0.0756 | validation_metric: 0.6872
Epoch 7 | train_loss: 0.0796 | validation_metric: 0.7578
Epoch 8 | train_loss: 0.0744 | validation_metric: 0.7712
Epoch 9 | train_loss: 0.0724 | validation_metric: 0.7717
Epoch 10 | train_loss: 0.0719 | validation_metric: 0.7743
Epoch 11 | train_loss: 0.0707 | validation_metric: 0.7769
Epoch 12 | train_loss: 0.0700 | validation_metric: 0.7782
Epoch 13 | train_loss: 0.0694 | validation_metric: 0.7747
Epoch 14 | train_loss: 0.0692 | validation_metric: 0.7759
Epoch 15 | train_loss: 0.0705 | validation_metric: 0.7794
Epoch 16 | train_loss: 0.0683 | validation_metric: 0.

[32m[I 2022-11-05 22:13:09,727][0m Trial 9 finished with value: 0.7794365913343463 and parameters: {'n_d': 50, 'n_a': 39, 'n_steps': 6, 'gamma': 1.5793391148488611, 'momentum': 0.022291812159909608}. Best is trial 6 with value: 0.7822471664786879.[0m


Epoch 18 | train_loss: 0.0741 | validation_metric: 0.7701
Early Stopping Activated!!!
Epoch 1 | train_loss: 0.1628 | validation_metric: 0.6785
Epoch 2 | train_loss: 0.0952 | validation_metric: 0.6994
Epoch 3 | train_loss: 0.0904 | validation_metric: 0.7031
Epoch 4 | train_loss: 0.0901 | validation_metric: 0.6967
Epoch 5 | train_loss: 0.0901 | validation_metric: 0.7021
Epoch 6 | train_loss: 0.0894 | validation_metric: 0.7053
Epoch 7 | train_loss: 0.0892 | validation_metric: 0.7092
Epoch 8 | train_loss: 0.0861 | validation_metric: 0.7295
Epoch 9 | train_loss: 0.0814 | validation_metric: 0.7503
Epoch 10 | train_loss: 0.0819 | validation_metric: 0.7380
Epoch 11 | train_loss: 0.0825 | validation_metric: 0.7478
