In [None]:
!pip install pytorch-tabnet

Collecting pytorch-tabnet
  Downloading pytorch_tabnet-4.1.0-py3-none-any.whl.metadata (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# ================== 1️⃣ Load Dataset ==================
df = pd.read_csv("/content/drive/MyDrive/SMOTE4.csv")  # Replace with actual path

In [None]:
# Define Features & Target
feature_cols = ["durata", "DistTOT", "HSR", "ACC", "DEC", "RPE", "sRPE"]
target_col = "phase"

# Convert Target to Categorical Labels
label_encoder = LabelEncoder()
df[target_col] = label_encoder.fit_transform(df[target_col])  # Encode phase (1-4)

# Train-Test Split (70-30 for better generalization)
X_train, X_test, y_train, y_test = train_test_split(
    df[feature_cols], df[target_col],
    test_size=0.3,
    random_state=42,
    stratify=df[target_col]  # Ensures balanced class distribution
)

In [None]:
# ================== 2️⃣ Feature Scaling ==================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# ================== 3️⃣ Train TabNet Model ==================
tabnet = TabNetClassifier(
    lambda_sparse=1e-3,
    momentum=0.98,
    verbose=10,
    seed=42
)

# Train TabNet
tabnet.fit(
    X_train_scaled, y_train.values,
    eval_set=[(X_test_scaled, y_test.values)],
    eval_metric=['accuracy'],
    max_epochs=30,  # ✅ Correct placement inside fit()
    patience=10
)



epoch 0  | loss: 1.61482 | val_0_accuracy: 0.31393 |  0:00:00s
epoch 10 | loss: 0.8418  | val_0_accuracy: 0.7082  |  0:00:02s
epoch 20 | loss: 0.56257 | val_0_accuracy: 0.7918  |  0:00:04s
Stop training because you reached max_epochs = 30 with best_epoch = 24 and best_val_0_accuracy = 0.86557




In [None]:
# ================== 4️⃣ Extract TabNet Feature Embeddings ==================
embeddings_train = tabnet.predict_proba(X_train_scaled)
embeddings_test = tabnet.predict_proba(X_test_scaled)

In [None]:
# ================== 5️⃣ Train Random Forest on TabNet Embeddings ==================
rf = RandomForestClassifier(
    n_estimators=50,  # Reduced to avoid overfitting
    max_depth=10,
    min_samples_split=10,
    max_features="sqrt",
    random_state=42
)

rf.fit(embeddings_train, y_train)


In [None]:
# ================== 6️⃣ Evaluate Hybrid Model ==================
train_preds = rf.predict(embeddings_train)
test_preds = rf.predict(embeddings_test)

train_acc = accuracy_score(y_train, train_preds)
test_acc = accuracy_score(y_test, test_preds)

print(f"Train Accuracy: {train_acc:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")


Train Accuracy: 0.9582
Test Accuracy: 0.9000


In [None]:
# Classification Report
print("\nClassification Report:\n", classification_report(y_test, test_preds))

# ================== 7️⃣ Cross-Validation for Stability ==================
cv_scores = cross_val_score(rf, embeddings_train, y_train, cv=5)
print(f"\nCross-Validation Accuracy: {np.mean(cv_scores):.4f}")


Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.90      0.91       305
           1       0.87      0.93      0.90       305
           2       0.89      0.88      0.88       305
           3       0.93      0.90      0.91       305

    accuracy                           0.90      1220
   macro avg       0.90      0.90      0.90      1220
weighted avg       0.90      0.90      0.90      1220


Cross-Validation Accuracy: 0.8938
