### Model 3 Tabnet Volume prediction

In [1]:
import math, pickle, warnings
from pathlib import Path
import os, random, time, re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score, classification_report,
                             confusion_matrix, precision_recall_fscore_support,
                             mean_absolute_error, mean_squared_error, r2_score, precision_score, recall_score,f1_score)

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from pytorch_tabnet.tab_model import TabNetRegressor
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [None]:
SEED                = 1
BATCH               = 64
PATIENCE            = 15
MIN_ROWS_PER_CLASS  = 3      
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

warnings.filterwarnings("ignore")
np.random.seed(SEED); torch.manual_seed(SEED)

DATA_PATH = Path("./data/processed_csv.csv")
assert DATA_PATH.exists(), "processed_csv.csv not found!"


In [4]:
CSV_PATH   = "./data/processed_csv.csv"
BASE_FEATS = ["a","b","c","alpha","beta","gamma"]
TARGET     = "vol"

In [5]:
df = pd.read_csv(CSV_PATH).dropna(subset=BASE_FEATS+[TARGET]).copy()

def analytic_volume(a,b,c,alpha,beta,gamma):
    alpha,beta,gamma = np.deg2rad(alpha),np.deg2rad(beta),np.deg2rad(gamma)
    cos_a,cos_b,cos_c = np.cos(alpha),np.cos(beta),np.cos(gamma)
    rad = np.clip(1 - cos_a**2 - cos_b**2 - cos_c**2 + 2*cos_a*cos_b*cos_c, 0, None)
    return a*b*c*np.sqrt(rad)

df["v_formula"] = analytic_volume(df["a"],df["b"],df["c"],
                                  df["alpha"],df["beta"],df["gamma"])
FEATS = BASE_FEATS + ["v_formula"]

X = df[FEATS].values.astype(np.float32)
y = np.log1p(df[TARGET].values.astype(np.float32)).reshape(-1,1)

scaler = StandardScaler().fit(X[:, :6])
X[:, :6] = scaler.transform(X[:, :6])

X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
reg = TabNetRegressor(
    n_d=64, n_a=64, n_steps=5, gamma=1.5,
    n_independent=2, n_shared=2, lambda_sparse=1e-4,
    optimizer_fn=torch.optim.Adam, optimizer_params=dict(lr=1e-3),
    mask_type="entmax", seed=42, verbose=10,
    scheduler_fn=torch.optim.lr_scheduler.StepLR,
    scheduler_params={"step_size":50, "gamma":0.9},
)

reg.fit(
    X_tr, y_tr,
    eval_set=[(X_te, y_te)],
    eval_name=["val"],
    eval_metric=["rmse"],
    max_epochs=400, patience=30,
    batch_size=1024, virtual_batch_size=128,
    num_workers=0, drop_last=False,
    loss_fn=torch.nn.MSELoss()
)


epoch 0  | loss: 5.39955 | val_rmse: 1.4062700271606445|  0:00:03s
epoch 10 | loss: 0.11155 | val_rmse: 0.2486400008201599|  0:00:43s
epoch 20 | loss: 0.04678 | val_rmse: 0.18647000193595886|  0:01:23s
epoch 30 | loss: 0.02938 | val_rmse: 0.22452999651432037|  0:03:06s
epoch 40 | loss: 0.02146 | val_rmse: 0.1903499960899353|  0:04:09s
epoch 50 | loss: 0.02066 | val_rmse: 0.17824000120162964|  0:04:51s
epoch 60 | loss: 0.01417 | val_rmse: 0.10523000359535217|  0:05:40s
epoch 70 | loss: 0.01182 | val_rmse: 0.15897999703884125|  0:06:39s
epoch 80 | loss: 0.0106  | val_rmse: 0.08816000074148178|  0:07:21s
epoch 90 | loss: 0.01069 | val_rmse: 0.10780999809503555|  0:08:06s
epoch 100| loss: 0.00929 | val_rmse: 0.09359999746084213|  0:08:58s
epoch 110| loss: 0.00795 | val_rmse: 0.10200999677181244|  0:09:45s

Early stopping occurred at epoch 110 with best_epoch = 80 and best_val_rmse = 0.08816000074148178


In [7]:
y_pred_log = reg.predict(X_te).flatten()
y_pred     = np.expm1(y_pred_log)
y_true     = np.expm1(y_te.flatten())

mse, mae, r2 = (mean_squared_error(y_true,y_pred),
                mean_absolute_error(y_true,y_pred),
                r2_score(y_true,y_pred))
print(f"\nTabNet  •  MSE={mse:.3f}  MAE={mae:.3f}  R²={r2:.4f}")



TabNet  •  MSE=20.097  MAE=0.154  R²=-11.3015


In [None]:
hist_dict = reg.history.history
train_key = next(k for k in hist_dict if k.startswith("loss"))
val_key   = next(k for k in hist_dict if k.startswith("val_"))

plt.figure(figsize=(12, 8))
plt.plot(hist_dict[train_key], label="Train")
plt.plot(hist_dict[val_key],   label="Val")
plt.yscale("log")
plt.title("RMSE (log‑scale)")
plt.xlabel("Epoch")
plt.ylabel("RMSE")
plt.legend()
plt.tight_layout()
plt.show()

plt.figure(figsize=(12, 8))
sns.scatterplot(x=y_true, y=y_pred, s=15, alpha=0.6)
mx = max(y_true.max(), y_pred.max())
plt.plot([0, mx], [0, mx], 'r--')
plt.xlim(0, 8)
plt.ylim(0, 8)
plt.title("True vs Predicted")
plt.xlabel("True")
plt.ylabel("Predicted")
plt.tight_layout()
plt.show()

res = y_true - y_pred
plt.figure(figsize=(12, 8))
sns.histplot(res, kde=True, bins=30, color="steelblue")
plt.title("Residual Distribution")
plt.xlabel("Residual")
plt.ylabel("Frequency")
plt.xlim(-12, 12)
plt.tight_layout()
plt.show()

plt.figure(figsize=(12, 8))
sns.scatterplot(x=y_pred, y=res, s=15, alpha=0.6)
plt.axhline(0, ls='--', c='red')
plt.title("Residuals vs Predicted")
plt.xlabel("Predicted")
plt.ylabel("Residual")
plt.xlim(-1, 6)
plt.ylim(-4, 4)
plt.tight_layout()
plt.show()

fi = reg.feature_importances_
plt.figure(figsize=(12, 8))
ax = sns.barplot(x=fi, y=FEATS, palette="crest")
plt.title("TabNet Feature Importance")
plt.xlabel("Importance Score")
plt.ylabel("Feature")
plt.xlim(0, max(fi) * 1.1)
for i, val in enumerate(fi):
    ax.text(val + 0.01 * max(fi), i, f"{val:.2f}", va='center', fontsize=9)
plt.tight_layout()
plt.show()


In [9]:
parent_dir    = "Models"
model_name    = "Model 3"
model_dir     = os.path.join(parent_dir, model_name)

try:
    os.mkdir(parent_dir)
    print(f"Directory '{parent_dir}' created successfully.")
except FileExistsError:
    print(f"Directory '{parent_dir}' already exists.")
except PermissionError:
    print(f"Permission denied: Unable to create '{parent_dir}'.")
except Exception as e:
    print(f"An error occurred: {e}")

try:
    os.mkdir(model_dir)
    print(f"Directory '{model_dir}' created successfully.")
except FileExistsError:
    print(f"Directory '{model_dir}' already exists.")
except PermissionError:
    print(f"Permission denied: Unable to create '{model_dir}'.")
except Exception as e:
    print(f"An error occurred: {e}")

reg.save_model(os.path.join(model_dir, f"{model_name} tabnet_volume"))
print("Model saved to tabnet_volume.zip")

Directory 'Models' already exists.
Directory 'Models\Model 3' created successfully.
Successfully saved model at Models\Model 3\Model 3 tabnet_volume.zip
Model saved to tabnet_volume.zip
