In [114]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tab_transformer_pytorch import TabTransformer, FTTransformer
from preprocessing import get_features_and_target
from sklearn.preprocessing import LabelEncoder
from RMSELoss import RMSELoss
import plotly.graph_objects as go

# Getting Dataframe

In [103]:
train_df = pd.read_csv("data/train_data.csv")
dev_df = pd.read_csv("data/development_data.csv")

target_column = "PullTest (N)"  

x_train, y_train = get_features_and_target(train_df, target_column)
x_dev, y_dev = get_features_and_target(dev_df, target_column)




# Encode Categorical_Features

In [104]:
# Define the categorical features
categorical_features = ["Material"]

le = LabelEncoder()

for feature in categorical_features:
    x_train[feature] = le.fit_transform(x_train[feature])
    x_dev[feature] = le.transform(x_dev[feature])  

# Split Categorical_Features

In [105]:
# Drop categorical features to get the continuous features
x_train_numerical_features = x_train.drop(categorical_features, axis=1)
x_dev_numerical_features = x_dev.drop(categorical_features, axis=1)

# Seperate the categorical features
x_train_categorical_features = x_train[categorical_features]
x_dev_categorical_features = x_dev[categorical_features]

# Change df into Tensors

In [106]:
train_tensor = torch.tensor(x_train.to_numpy(), dtype=torch.float)
x_train_numer_tensor = torch.tensor(x_train_numerical_features.to_numpy(),dtype=torch.float)
x_dev_numer_tensor = torch.tensor(x_dev_numerical_features.to_numpy(),dtype=torch.float)
y_train_tensor = torch.tensor(y_train.to_numpy(), dtype=torch.float)

dev_tensor = torch.tensor(x_dev.to_numpy(), dtype=torch.float)
x_train_categorical_features_tensor = torch.tensor(x_train_categorical_features.to_numpy(),dtype=torch.long)
x_dev_categorical_features_tensor = torch.tensor(x_dev_categorical_features.to_numpy(),dtype=torch.long)
y_dev_tensor = torch.tensor(y_dev.to_numpy(), dtype=torch.float)

from torch.utils.data import TensorDataset,DataLoader

train_ds = TensorDataset(
    x_train_categorical_features_tensor,
    x_train_numer_tensor,
    y_train_tensor
)
val_ds = TensorDataset(
    x_dev_categorical_features_tensor,
    x_dev_numer_tensor,
    y_dev_tensor
)
g = torch.Generator()
g.manual_seed(42)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, generator= g)
val_loader   = DataLoader(val_ds,   batch_size=32)



# Check Tensors


In [6]:
#torch.set_printoptions(sci_mode=False, precision=3)
#print(x_train_numer_tensor)

# Define Model

In [236]:
# categories is defined as a tuple, we only have one categorical feature "Material"
# the second parameter has to be empty for the model to work correctly
# in hard numbers this is displaying (1,)
model = FTTransformer(
    categories=(x_train_categorical_features.shape[1],),
    num_continuous=x_train_numerical_features.shape[1],
    dim=9,
    dim_out=1,
    depth=4,
    heads=4,
    attn_dropout=0.1,
    ff_dropout=0.1
)

# (Alternative) Initiate a saved Model


In [238]:
#to be able to match the model, define the same parameters above in define the model
path = 'trained_models/regular_training/model_FTTransformer_lr0.00025_9_1_4_4_.1_0.1__epoch15600.pt'
model.load_state_dict(torch.load(path))

<All keys matched successfully>

# Define Training Epoch

In [211]:
def train_one_epoch(train_loader):
    total_loss = 0.0

    for x_cat, x_cont, y in train_loader:
        optimizer.zero_grad()

        # bring y to shape [B,1]
        y = y.unsqueeze(-1)

        # forward + backward + step
        pred = model(x_cat, x_cont)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        

    # return the average loss over ALL batches
    return total_loss / len(train_loader)

# Training

In [212]:
from datetime import datetime

criterion = RMSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.00025)

#Changeable Parameters
# ----------------------------------------------------#
#Model description for saving
model_description    = 'FTTransformer_lr0.00025_8_1_4_4_.1_0.1'

# Number of epochs to train
EPOCHS = 16000
#-----------------------------------------------------#

# Lists to store per‐epoch losses
train_losses = []
val_losses   = []

best_vloss   = float('inf')
last_ckpt  = None
#timestamp for manual checkpoint selection
timestamp    = datetime.now().strftime('%Y%m%d_%H%M%S')


for epoch in range(EPOCHS):
    print(f'\nEPOCH {epoch+1}/{EPOCHS}')

    # --------------------
    # 1) TRAINING PHASE
    # --------------------
    model.train()
    avg_loss = train_one_epoch(train_loader)
    train_losses.append(avg_loss)
    print(f'train loss: {avg_loss:.4f}')

    # --------------------
    # 2) VALIDATION PHASE
    # --------------------
    model.eval()
    
    val_loss = 0.0
    with torch.no_grad():
        for x_cat, x_cont, y in val_loader:
            y = y.unsqueeze(-1)
            pred = model(x_cat, x_cont)
            val_loss += criterion(pred, y).item()

    

    avg_vloss = val_loss / len(val_loader)
    val_losses.append(avg_vloss)
    
    print(f'valid loss: {avg_vloss:.4f}')

    # --------------------
    # 3) CHECKPOINTING
    # --------------------
    # to sort through the checkpoints manually, add timestamp to the filename
    # remove avg_total < best_vloss constraint and 2nd if condition

    #_date_{timestamp}
    if (epoch + 1) % 100 == 0 and avg_vloss < best_vloss:
        if last_ckpt is not None:
            os.remove(last_ckpt)

        best_vloss = avg_vloss

        ckpt_path = f'trained_models/regular_training/model_{model_description}__epoch{epoch+1}.pt'
        torch.save(model.state_dict(), ckpt_path)
        last_ckpt = ckpt_path

    if (epoch + 1) == 1000:
        ckpt_path = f'trained_models/regular_training/model_{model_description}__epoch{epoch+1}.pt'
        torch.save(model.state_dict(), ckpt_path)





EPOCH 1/16000
train loss: 2974.3977
valid loss: 3021.9925

EPOCH 2/16000
train loss: 2966.8974
valid loss: 3021.8589

EPOCH 3/16000
train loss: 2962.7812
valid loss: 3021.7389

EPOCH 4/16000
train loss: 2964.6424
valid loss: 3021.6226

EPOCH 5/16000
train loss: 2964.2738
valid loss: 3021.5990

EPOCH 6/16000
train loss: 2963.0450
valid loss: 3021.5884

EPOCH 7/16000
train loss: 2960.6493
valid loss: 3021.5723

EPOCH 8/16000
train loss: 2963.3314
valid loss: 3021.4902

EPOCH 9/16000
train loss: 2963.9340
valid loss: 3021.3772

EPOCH 10/16000
train loss: 2969.5433
valid loss: 3021.2729

EPOCH 11/16000
train loss: 2965.0787
valid loss: 3021.1864

EPOCH 12/16000
train loss: 2964.0060
valid loss: 3021.1165

EPOCH 13/16000
train loss: 2970.4397
valid loss: 3021.0624

EPOCH 14/16000
train loss: 2975.3011
valid loss: 3021.0240

EPOCH 15/16000
train loss: 2964.9350
valid loss: 3020.9976

EPOCH 16/16000
train loss: 2961.5526
valid loss: 3020.9788

EPOCH 17/16000
train loss: 2963.2645
valid loss:

In [226]:
import plotly.graph_objects as go

epochs = list(range(1, len(train_losses) + 1))

fig = go.Figure()

# Training loss trace
fig.add_trace(go.Scatter(
    x=epochs,
    y=train_losses,
    mode="lines",
    name="Training Loss",
    line=dict(color="royalblue", width=2)
))

# Validation loss trace
fig.add_trace(go.Scatter(
    x=epochs,
    y=val_losses,
    mode="lines",
    name="Validation Loss",
    line=dict(color="firebrick", width=2, dash="dash")
))

# Layout enhancements
fig.update_layout(
    title="Training & Validation Loss over 16 000 Epochs - lr0.00025 8_1_4_4_.1_0.1",
    xaxis_title="Epoch",
    yaxis_title="Loss",
    xaxis=dict(
        tickmode='array',
        tickvals=list(range(0, 16500, 500)),  # Show ticks every 100 epochs
        tickfont=dict(size=10)
    ),
    yaxis=dict(
        tickformat=".2e" if max(train_losses + val_losses) > 1e4 else ".4f",  # Dynamic formatting
        gridcolor="lightgray"
    ),
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    template="plotly_white",
    margin=dict(t=60, b=40)
)

fig.show()


In [240]:
# Pass over val_loader:
all_true, all_pred = [], []

with torch.no_grad():
    for x_cat, x_cont, y in val_loader:
        y   = y.unsqueeze(-1)                            
        r_p = model(x_cat, x_cont)                       
        y_p = (r_p)                             

        all_true.append(y.ravel())
        all_pred.append(y_p.ravel())

# flatten
all_true = np.concatenate(all_true)
all_pred = np.concatenate(all_pred)

# plot per‐sample
sample = np.arange(len(all_true))

fig = go.Figure()
fig.add_trace(go.Scatter(x=sample, y=all_true, mode="lines+markers",
                         name="True Data", marker=dict(color="black", size=6)))
fig.add_trace(go.Scatter(x=sample, y=all_pred, mode="lines+markers",
                         name="Prediction", marker=dict(color="blue", size=6)))

fig.update_layout(
    title="All Validation Samples: True vs Prediction Epochs 200 000 lr0.0001_8_1_4_4_.1_.1",
    xaxis_title="Sample Index",
    yaxis_title="Pull-Force",
    template="seaborn"
)
fig.show()

In [239]:
# Collect predictions and true values from the validation set
model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for x_cat, x_cont, y in val_loader:
        y = y.unsqueeze(-1)               
        all_targets.append(y)

        # 2) Error
        Loss = model(x_cat, x_cont)


        all_preds.append(Loss)

# Concatenate batches
y_true = np.vstack(all_targets)
y_pred = np.vstack(all_preds)

# Calculate MAE and RMSE and R2
mae  = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
R2   = r2_score(y_true, y_pred)

print(f"MAE:  {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R2: {R2:.2f}")

MAE:  176.10
RMSE: 287.58
R2: 0.64


In [113]:
import plotly.graph_objects as go

fig = go.Figure()

# Training loss trace
fig.add_trace(go.Scatter(
    y=train_losses,
    mode="lines+markers",
    name="Train Loss",
    line=dict(color="royalblue", width=2),
    marker=dict(size=4)
))

# Validation loss trace
fig.add_trace(go.Scatter(
    y=val_losses,
    mode="lines+markers",
    name="Validation Loss",
    line=dict(color="tomato", width=2),
    marker=dict(size=4)
))

# Layout
fig.update_layout(
    title="Training & Validation Loss over Epochs  - lr0.00025 8_1_4_4_.1_0.1",
    xaxis_title="Epoch",
    yaxis_title="Loss",
    xaxis=dict(
        tickmode='array',
        tickvals=list(range(0, 10500, 500)),  # Show ticks every 100 epochs
        tickfont=dict(size=10)
    ),
    template="plotly_white",
    legend=dict(x=0.05, y= -0.25, bgcolor="rgba(255,255,255,0)", borderwidth=0)
)

fig.show()


# Skip

In [None]:
from sklearn.model_selection import ParameterGrid
import torch
import numpy as np

param_grid = {
    "dim":       [2, 4, 8],
    "depth":     [2, 4],
    "heads":     [2, 4],
    "attn_dropout":[0.1, 0.3],
    "ff_dropout":[0.1, 0.3],
    "lr":        [0.00025, 0.0003],
    "weight_decay":[0, 0.00001]
}

def run_experiment(params, train_loader, val_loader):
    results = []
    for params in ParameterGrid(param_grid):
        # 1) Build model & optimizer
        model = FTTransformer(
            categories=(1,),
            num_continuous=8,
            dim=params["dim"],
            dim_out=1,
            depth=params["depth"],
            heads=params["heads"],
            attn_dropout=params["attn_dropout"],
            ff_dropout=params["ff_dropout"]
        )
        optimizer = torch.optim.AdamW(
            model.parameters(),
            lr=params["lr"],
            weight_decay=params["weight_decay"]
        )
        criterion = RMSELoss()

def train_one_epoch(train_loader):
    total_loss = 0.0

    for x_cat, x_cont, y in train_loader:
        optimizer.zero_grad()

        # bring y to shape [B,1]
        y = y.unsqueeze(-1) if y.dim()==1 else y

        # forward + backward + step
        pred = model(x_cat, x_cont)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        

    # return the average loss over ALL batches
    return total_loss / len(train_loader)

from datetime import datetime

criterion = RMSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.00025)


EPOCHS = 1

# Lists to store per‐epoch losses
train_losses = []
val_losses   = []

best_vloss   = float('inf')
timestamp    = datetime.now().strftime('%Y%m%d_%H%M%S')


for epoch in range(EPOCHS):
    print(f'\nEPOCH {epoch+1}/{EPOCHS}')

    # --------------------
    # 1) TRAINING PHASE
    # --------------------
    model.train()
    avg_loss = train_one_epoch(train_loader)
    train_losses.append(avg_loss)
    print(f'train loss: {avg_loss:.4f}')

    # --------------------
    # 2) VALIDATION PHASE
    # --------------------
    model.eval()
    
    val_loss = 0.0
    with torch.no_grad():
        for x_cat, x_cont, y in val_loader:
            y = y.unsqueeze(-1) if y.dim()==1 else y
            pred = model(x_cat, x_cont)
            val_loss += criterion(pred, y).item()

    avg_vloss = val_loss / len(val_loader)
    val_losses.append(avg_vloss)
    print(f'valid loss: {avg_vloss:.4f}')

    # --------------------
    # 3) CHECKPOINTING
    # --------------------
    if (EPOCHS + 1) % 1000 == 0 and avg_vloss < best_vloss:
        best_vloss = avg_vloss
        ckpt_path = f'model_{timestamp}_epoch{epoch+1}.pt'
        torch.save(model.state_dict(), ckpt_path)


    


EPOCH 1/1
train loss: 384.0183
valid loss: 381.4391
