In [None]:
import os
import copy
import torch
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from itertools import product
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

random_state = 42
preproc = False

In [None]:
def fix_random(seed):
    torch.manual_seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

fix_random(random_state)

## Device

In [None]:
# PyTorch Device
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print("Device: {}".format(device))

## Data Loading


In [None]:
df = pd.read_csv("train.csv")
num_rows, num_cols = df.shape
print("Rows: ", num_rows)
print("Columns: ", num_cols)

In [None]:
print("Null rows:", df.shape[0] - df.dropna().shape[0])
print("Duplicated rows:", df.duplicated().sum())
df.drop_duplicates(inplace=True)

### Train Test Split

In [None]:
# Separate indices in train/val/set
# "stratify=y" makes sure to keep the classes proportions on the dataset (useful on imbalanced classes)
train, test = train_test_split(df, stratify=df["Year"], test_size=0.3, random_state=random_state)
val, test = train_test_split(test, stratify=test["Year"], test_size=(1 / 3), random_state=random_state)

X_train = train.drop(columns=["Year"])
y_train = train["Year"]

X_val = val.drop(columns=["Year"])
y_val = val["Year"]

X_test = test.drop(columns=["Year"])
y_test = test["Year"]

### Preprocessing 

In [None]:
from sklearn import preprocessing
from sklearn.pipeline import Pipeline

pipeline = Pipeline(
    steps=[
        ("std", preprocessing.StandardScaler()),
        ("l2", preprocessing.Normalizer(norm="l2")),
    ]
)

if preproc==True:
    # Fit the pipeline to the data
    pipeline.fit(X_train, y_train)

    # Transform the data using the pipeline
    X_train = pipeline.transform(X_train)
    X_test = pipeline.transform(X_test)
    X_val = pipeline.transform(X_val)

    X_train = pd.DataFrame(X_train)
    X_test = pd.DataFrame(X_test)
    X_val = pd.DataFrame(X_val)

    train = pd.DataFrame(X_train)
    test = pd.DataFrame(X_test)
    val = pd.DataFrame(X_val)
    
    y_train.reset_index(drop=True, inplace=True)
    y_test.reset_index(drop=True, inplace=True) 
    y_val.reset_index(drop=True, inplace=True)

    train["Year"] = y_train
    test["Year"] = y_test
    val["Year"] = y_val

## Config


In [None]:
target = ["Year"]
continous_cols = list(train.columns)[:-1]

In [None]:
from pytorch_tabular import TabularModel
from pytorch_tabular.models import TabNetModelConfig, TabTransformerConfig
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig, ExperimentConfig
from pytorch_tabular.models.common.heads import LinearHeadConfig

In [None]:
data_config = DataConfig(target=target, continuous_cols=continous_cols, num_workers=0)

optimizer_config = OptimizerConfig(
    optimizer="AdamW",
    lr_scheduler="ReduceLROnPlateau",
    lr_scheduler_params={"patience": 9, "threshold": 1, "threshold_mode": "abs"},
)

head_config = LinearHeadConfig(
    layers="",  # No additional layer in head, just a mapping layer to output_dim
    # dropout=0.2,
    initialization="kaiming",
).__dict__  # Convert to dict to pass to the model config (OmegaConf doesn't accept objects)

if preproc == False:
    experiment_config = ExperimentConfig(
        project_name="TabTransformer",
        run_name="TabTransformer-raw",
        log_target="tensorboard",
    )
else:
    experiment_config = ExperimentConfig(
        project_name="TabTransformer",
        run_name="TabTransformer-preproc",
        log_target="tensorboard",
    )

## Train

### TabTransformer

In [None]:
virtual_batch_sizes = [64, 128]
batch_sizes = [256, 512]
n_epochs = [100]
learning_rates = [0.01]
num_heads = [8]  # default is 8
num_attn_blocks = [6]  # default is 6
transformer_activation = ['ReLU', 'LeakyReLU', 'GEGLU', 'ReGLU']

params = list(
    product(
        learning_rates, batch_sizes, n_epochs, virtual_batch_sizes, num_heads, num_attn_blocks, transformer_activation
    )
)

comb = (
    len(learning_rates)
    * len(batch_sizes)
    * len(n_epochs)
    * len(virtual_batch_sizes)
    * len(num_heads)
    * len(num_attn_blocks)
    * len(transformer_activation)
)


print("Number of combinations: ", comb)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error


best_mse_tt = float("inf")
best_model_tt = None
best_params_tt = None
iter = 0
results_tt = pd.DataFrame(
    columns=[
        "loss",
        "r2",
        "learning_rate",
        "epochs",
        "batch_size",
        "virtual_batch_size",
        "num_heads",
        "num_attn_blocks",
        "transformer_activation",
    ]
)

for learning_rate, batch_size, epochs, virtual_batch_size, num_heads, num_attn_blocks, transformer_activation in params:
    iter += 1
    print(f"\nIteration: {iter} of {comb}")
    trainer_config = TrainerConfig(batch_size=batch_size, max_epochs=epochs, early_stopping_patience=10, load_best=True)

    model_config = TabTransformerConfig(
        task="regression",
        head="LinearHead",  # Linear Head
        head_config=head_config,  # Linear Head Config
        loss="MSELoss",
        seed=random_state,
        learning_rate=learning_rate,
        virtual_batch_size=virtual_batch_size,
        num_heads=num_heads,
        num_attn_blocks=num_attn_blocks,
        ff_hidden_multiplier=64,
        transformer_activation=transformer_activation
    )

    tabular_model = TabularModel(
        data_config=data_config,
        model_config=model_config,
        optimizer_config=optimizer_config,
        trainer_config=trainer_config,
        experiment_config=experiment_config,
    )

    tabular_model.fit(train=train, validation=val)
    tabular_model.evaluate(test)

    y_pred = tabular_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    config = [mse, r2, learning_rate, batch_size, epochs, virtual_batch_size, num_heads, num_attn_blocks, transformer_activation]

    print("MSE: ", mse)
    print("MAE: ", mae)
    print("R2: ", r2)

    if mse < best_mse_tt:
        best_mse_tt = mse
        best_model_tt = copy.deepcopy(tabular_model)
        best_params_tt = (learning_rate, batch_size, epochs, virtual_batch_size, num_heads, num_attn_blocks, transformer_activation)
        print("Best model updated")

    results_tt.loc[len(results_tt)] = config

In [None]:
results_tt.sort_values(by="r2", ascending=False).head()

In [None]:
if preproc==True:
    results_tt.sort_values(by="r2", ascending=False).to_csv('tabtransformer-preproc.csv')
else:
    results_tt.sort_values(by="r2", ascending=False).to_csv('tabtransformer-raw.csv')