In [13]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tab_transformer_pytorch import TabTransformer, FTTransformer
from preprocessing import get_features_and_target
from sklearn.preprocessing import LabelEncoder
from RMSELoss import RMSELoss
import plotly.graph_objects as go

# Getting Dataframe

In [14]:
test_df = pd.read_csv("data/test_data.csv")


target_column = "PullTest (N)"  
x_test, y_test = get_features_and_target(test_df, target_column)





# Encode Categorical_Features

In [15]:
# Define the categorical features
categorical_features = ["Material"]

le = LabelEncoder()

for feature in categorical_features:
    x_test[feature] = le.fit_transform(x_test[feature])

# Split Categorical_Features

In [16]:
# Drop categorical features to get the continuous features
x_test_numerical_features = x_test.drop(categorical_features, axis=1)

# Seperate the categorical features
x_test_categorical_features = x_test[categorical_features]

# Change df into Tensors

In [17]:

test_tensor = torch.tensor(x_test.to_numpy(), dtype=torch.float)
x_test_numer_tensor = torch.tensor(x_test_numerical_features.to_numpy(),dtype=torch.float)
x_test_categorical_features_tensor = torch.tensor(x_test_categorical_features.to_numpy(),dtype=torch.long)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.float)

from torch.utils.data import TensorDataset,DataLoader


test_ds = TensorDataset(
    x_test_categorical_features_tensor,
    x_test_numer_tensor,
    y_test_tensor
)

test_loader   = DataLoader(test_ds,   batch_size=32)



# Check Tensors


In [None]:
torch.set_printoptions(sci_mode=False, precision=3)
print(x_test_numer_tensor)

8


# Define Model

In [61]:
# categories is defined as a tuple, we only have one categorical feature "Material"
# the second parameter has to be empty for the model to work correctly
# in hard numbers this is displaying (1,)
model_data_driven = FTTransformer(
    categories=(x_test_categorical_features.shape[1],),
    num_continuous=x_test_numerical_features.shape[1],
    dim=9,
    dim_out=1,
    depth=4,
    heads=4,
    attn_dropout=0.1,
    ff_dropout=0.1
)

model_physic_loss = FTTransformer(
    categories=(x_test_categorical_features.shape[1],),
    num_continuous=x_test_numerical_features.shape[1],
    dim=9,
    dim_out=1,
    depth=4,
    heads=4,
    attn_dropout=0.1,
    ff_dropout=0.1
)

model_error_loss = FTTransformer(
    categories=(x_test_categorical_features.shape[1],),
    num_continuous=x_test_numerical_features.shape[1],
    dim=8,
    dim_out=1,
    depth=4,
    heads=4,
    attn_dropout=0.3,
    ff_dropout=0.3
)

# Saved Models



In [62]:
#to be able to match the model, define the same parameters above in define the model
#Seleted Models:

#Regular Training
data_driven_model = 'trained_models/regular_training/model_FTTransformer_lr0.00025_9_1_4_4_.1_0.1__epoch15600.pt'

#Physical Loss Training
physical_loss_model = 'trained_models/physical_loss_training/model_lr0.00025_9_1_4_4_.1_.1PhysicsLambda0.2_epoch20000.pt'

#Physical Error Training
physical_error_model = 'trained_models/physical_error_training/model_lr0.00025_8_1_4_4_.3_.3Error_date_20250812_epoch9500.pt'


pathD = data_driven_model
pathL = physical_loss_model
pathE = physical_error_model
model_data_driven.load_state_dict(torch.load(pathD))
model_physic_loss.load_state_dict(torch.load(pathL))
model_error_loss.load_state_dict(torch.load(pathE))

<All keys matched successfully>

In [47]:
idxA, idxB = 5, 6  

def physics_pull_force(x_cont):
    # x_cont: [B, num_cont_features]
    A = x_cont[:, idxA]               
    B = x_cont[:, idxB]               

    # element‐wise minimum of each row
    t = torch.minimum(A, B)           
    # f_phys = (π/4)*(5*√t)^2*(0.6*365)
    return ((torch.pi / 4) * (4 * torch.sqrt(t)).pow(2) * (0.8 * 365))

In [73]:
# Pass over val_loader:
all_true, all_pred, all_pred_error, all_pred_phyloss, all_phys = [], [], [],[], []

with torch.no_grad():
    for x_cat, x_cont, y in test_loader:
        y   = y.unsqueeze(-1)
        f_p = physics_pull_force(x_cont).unsqueeze(-1)                            
        r_p = model_data_driven(x_cat, x_cont)   
        e_p = model_error_loss(x_cat, x_cont)  
        l_p = model_physic_loss(x_cat, x_cont)                  
        y_p = (r_p) 
        ye_p = (f_p + e_p) 
        yphy_p = (l_p)                           

        all_true.append(y.ravel())
        all_pred.append(y_p.ravel())
        all_pred_phyloss.append(yphy_p.ravel())
        all_pred_error.append(ye_p.ravel())
        all_phys.append(f_p.ravel())

# flatten
all_true = np.concatenate(all_true)
all_pred = np.concatenate(all_pred)
all_pred_phyloss = np.concatenate(all_pred_phyloss)
all_pred_error = np.concatenate(all_pred_error)
all_phys = np.concatenate(all_phys)

# plot per‐sample
sample = np.arange(len(all_true))

fig = go.Figure()
fig.add_trace(go.Scatter(x=sample, y=all_true, mode="lines+markers",
                         name="True Data", marker=dict(color="black", size=6)))
fig.add_trace(go.Scatter(x=sample, y=all_pred, mode="lines+markers",
                         name="Data Driven Prediction", marker=dict(color="blue", size=6)))
fig.add_trace(go.Scatter(x=sample, y=all_pred_phyloss, mode="lines+markers",
                         name="Phyisical Loss Prediction", marker=dict(color="orange", size=6)))
fig.add_trace(go.Scatter(x=sample, y=all_pred_error, mode="lines+markers",
                         name="Error Loss Prediction", marker=dict(color="green", size=6)))
fig.add_trace(go.Scatter(x=sample, y=all_phys, mode="lines+markers",
                         name="Physics Only", marker=dict(color="firebrick", size=6)))


fig.update_layout(
    title="Test Dataset",
    xaxis_title="Sample Index",
    yaxis_title="Pull-Force",
    legend_title="Series",
    legend=dict(
        x=1.005,        # push legend past the right edge
        y=1,
        xanchor="left",
        borderwidth=1
    ),
    margin=dict(r=100),  # give extra room on right for the legend
    template="seaborn"
)

fig.show()

In [77]:
# Collect predictions and true values from the validation set
model_physic_loss.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for x_cat, x_cont, y in test_loader:
        y = y.unsqueeze(-1) 
        phys = physics_pull_force(x_cont).unsqueeze(-1)                
        all_targets.append(y)

        # 2) Error
        Loss = model_physic_loss(x_cat, x_cont)


        all_preds.append(Loss)

# Concatenate batches
y_true = np.vstack(all_targets)
y_pred = np.vstack(all_preds)

# Calculate MAE and RMSE and R2
mae  = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
R2   = r2_score(y_true, y_pred)

print(f"MAE:  {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R2: {R2:.2f}")

MAE:  174.98
RMSE: 327.19
R2: 0.54
