In [21]:
%pip install pytorch-tabnet scikit-learn pandas

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Note: you may need to restart the kernel to use updated packages.


In [22]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer

In [23]:
data = pd.read_csv('../Dataset/Thyroid_Diff.csv')

X = data.drop('Recurred', axis=1)
y = data['Recurred']

if y.dtype == 'object':
    le = LabelEncoder()
    y = le.fit_transform(y)

categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=[np.number]).columns

numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

X = preprocessor.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
from pytorch_tabnet.tab_model import TabNetClassifier
import torch

tabnet_clf = TabNetClassifier(optimizer_fn=torch.optim.Adam,
                              optimizer_params=dict(lr=2e-2),
                              scheduler_params={"step_size":10, "gamma":0.9},
                              scheduler_fn=torch.optim.lr_scheduler.StepLR, 
                              mask_type='sparsemax')  # "sparsemax" or "entmax" are the options

tabnet_clf.fit(
    X_train=X_train, y_train=y_train,
    eval_set=[(X_train, y_train), (X_test, y_test)],
    eval_name=['train', 'valid'],
    eval_metric=['accuracy'],
    max_epochs=100, patience=20,
    batch_size=1024, virtual_batch_size=128,
    num_workers=0,
    drop_last=False
)

saving_path_name = "./TabNetModel/tabnet_model.pth"
tabnet_clf.save_model(saving_path_name)

epoch 0  | loss: 0.86117 | train_accuracy: 0.64052 | valid_accuracy: 0.66234 |  0:00:00s




epoch 1  | loss: 0.80141 | train_accuracy: 0.70588 | valid_accuracy: 0.75325 |  0:00:00s
epoch 2  | loss: 0.66793 | train_accuracy: 0.71569 | valid_accuracy: 0.75325 |  0:00:00s
epoch 3  | loss: 0.63027 | train_accuracy: 0.71242 | valid_accuracy: 0.75325 |  0:00:00s
epoch 4  | loss: 0.62346 | train_accuracy: 0.70915 | valid_accuracy: 0.75325 |  0:00:00s
epoch 5  | loss: 0.5593  | train_accuracy: 0.70915 | valid_accuracy: 0.75325 |  0:00:00s
epoch 6  | loss: 0.51439 | train_accuracy: 0.70915 | valid_accuracy: 0.75325 |  0:00:00s
epoch 7  | loss: 0.47156 | train_accuracy: 0.70915 | valid_accuracy: 0.75325 |  0:00:00s
epoch 8  | loss: 0.43915 | train_accuracy: 0.70915 | valid_accuracy: 0.75325 |  0:00:00s
epoch 9  | loss: 0.44085 | train_accuracy: 0.70915 | valid_accuracy: 0.75325 |  0:00:00s
epoch 10 | loss: 0.40845 | train_accuracy: 0.70915 | valid_accuracy: 0.75325 |  0:00:00s
epoch 11 | loss: 0.3848  | train_accuracy: 0.70915 | valid_accuracy: 0.75325 |  0:00:00s
epoch 12 | loss: 0.35



'./TabNetModel/tabnet_model.pth.zip'

In [25]:
preds = tabnet_clf.predict(X_test)

accuracy = (preds == y_test).mean()
print(f'Accuracy: {accuracy*100:.2f}%')

Accuracy: 96.10%


TabNet Accuracy: 0.9610