In [67]:
import torch
import numpy as np
import random
import os

# Function to set all random seeds for reproducibility
def set_all_seeds(seed):
    
    os.environ['PYTHONHASHSEED'] = str(seed)
    
    # Setting seed for built-in random module
    random.seed(seed)
    
    # Setting seed for numpy
    np.random.seed(seed)
    
    # Setting seed for PyTorch on CPU
    torch.manual_seed(seed)
    
    # setting seeds for CUDA and enforce deterministic behavior
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)           # For multi-GPU setups
        torch.backends.cudnn.deterministic = True  # Ensuring deterministic convolution algorithms
        torch.backends.cudnn.benchmark = False     # Disabling auto-tuner to ensure reproducibility

# Setting a fixed random seed
MY_RANDOM_SEED = 42 
set_all_seeds(MY_RANDOM_SEED)


In [68]:
# Check if CUDA-enabled GPU is available
print(torch.cuda.is_available())  

# Print the number of GPUs detected by PyTorch
print(torch.cuda.device_count())  

# printing the name of the available GPU
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))  
    # Index 0 refers to the first GPU
    # This helps confirm which GPU the notebook is using


True
1
NVIDIA GeForce RTX 4060 Laptop GPU


In [69]:
import torch
import pytorch_lightning as pl
import pytorch_tabular
import tensorboard
import google.protobuf

print("torch:", torch.__version__)
print("pytorch-lightning:", pl.__version__)
print("pytorch-tabular:", pytorch_tabular.__version__)
print("tensorboard:", tensorboard.__version__)
print("protobuf:", google.protobuf.__version__)


torch: 2.7.1+cu118
pytorch-lightning: 2.4.0
pytorch-tabular: 1.1.1
tensorboard: 2.19.0
protobuf: 3.20.3


In [70]:
# Importing Libraries
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
from pytorch_tabular.models.tab_transformer.config import TabTransformerConfig
from pytorch_tabular import TabularModel

In [71]:
# Loading Train dataset 
df = pd.read_csv("C:/Users/Sreelakshmi/00_Final_Project_MENG/Approach 1/Dataset_1/train_data.csv")


In [72]:
# Total number of columns and rows in the training DataFrame
print(f"Train DataFrame shape: {df.shape}")


Train DataFrame shape: (1499, 1873)


In [73]:
# Display first 5 rows
df.head(5)

Unnamed: 0,abdominal_and_pelvic_pain,abdominal_aortic_aneurysm__without_rupture,abdominal_distension__gaseous_,abn_findings_on_dx_imaging_of_abd_regions__inc_retroperiton,abnormal_and_inconclusive_findings_on_dx_imaging_of_breast,abnormal_blood_pressure_reading__without_diagnosis,abnormal_coagulation_profile,abnormal_electrocardiogram__ecg___ekg_,abnormal_finding_of_blood_chemistry__unspecified,abnormal_findings_on_diagnostic_imaging_and_in_function_studies__without_diagnosis,...,vomiting__unspecified,vomiting_without_nausea,weakness,wheezing,white_matter_disease__unspecified,wtrcraft_fall_nos_crew,xerosis_cutis,zoster__herpes_zoster_,zoster_without_complications,__target__
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,1,1,0
2,1,0,0,0,0,0,1,1,0,1,...,0,0,1,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,1,0,0,0,0,1,0,1,...,0,0,1,0,0,0,0,0,0,1


In [74]:
target_col = '__target__'
features = df.drop(columns=[target_col]).columns.tolist()

print(f"Features: {features[:10]} ...")
print(f"Target: {target_col}")

Features: ['abdominal_and_pelvic_pain', 'abdominal_aortic_aneurysm__without_rupture', 'abdominal_distension__gaseous_', 'abn_findings_on_dx_imaging_of_abd_regions__inc_retroperiton', 'abnormal_and_inconclusive_findings_on_dx_imaging_of_breast', 'abnormal_blood_pressure_reading__without_diagnosis', 'abnormal_coagulation_profile', 'abnormal_electrocardiogram__ecg___ekg_', 'abnormal_finding_of_blood_chemistry__unspecified', 'abnormal_findings_on_diagnostic_imaging_and_in_function_studies__without_diagnosis'] ...
Target: __target__


In [75]:
# Standardize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Save scaler
joblib.dump(scaler, 'C:/Users/Sreelakshmi/00_Final_Project_MENG/Approach 1/TabTransformer/tabTransformer_scaler.sav')


['C:/Users/Sreelakshmi/00_Final_Project_MENG/Approach 1/TabTransformer/tabTransformer_scaler.sav']

In [76]:
# Preparing scaled DataFrame

X_scaled_df = pd.DataFrame(X_scaled, columns=features)  # Convert scaled array back to DataFrame
train_df_scaled = X_scaled_df.copy()
train_df_scaled[target_col] = df[target_col]           # Add target column

In [77]:
# Defining the data configuration
data_config = DataConfig(
    target=[target_col], # This tells PyTorch-Tabular which column to predict
    continuous_cols=features, # List of feature column names that are continuous/numeric
    # categorical_cols=[]  # Add if you have categorical features
)

In [78]:
# Training configuration
trainer_config = TrainerConfig(
    max_epochs=50,                  # Maximum number of training epochs
    batch_size=512,                 # Number of samples per batch
    progress_bar="none",            # Disable progress bar 
    early_stopping="valid_loss",    # Stop training if validation loss stops improving
    checkpoints="valid_loss",       # Save the best model based on validation loss
    
)

# Optimizer configuration

optimizer_config = OptimizerConfig()
# Default optimizer is usually Adam. You can customize learning rate, weight decay, etc.
# Example: OptimizerConfig(optimizer="Adam", lr=1e-3, weight_decay=1e-5)


# Model configuration (TabTransformer)

model_config = TabTransformerConfig(
    task="classification",             # "classification" or "regression"
    learning_rate=1e-3,                # Learning rate for optimizer
    input_embed_dim=64,                # Dimension of embeddings for each feature
    num_heads=8,                       # Number of attention heads in transformer
    num_attn_blocks=6,                 # Number of transformer blocks/layers
    embedding_dropout=0.1,             # Dropout for embeddings (prevents overfitting)
    batch_norm_continuous_input=True,  # Apply batch normalization to continuous features
)


In [79]:
# Initialize TabularModel

tabular_model = TabularModel(
    data_config=data_config,           # Specifies target, continuous, and categorical columns
    model_config=model_config,         # Specifies TabTransformer architecture and hyperparameters
    optimizer_config=optimizer_config, # Optimizer settings (learning rate, type, etc.)
    trainer_config=trainer_config,     # Training behavior (epochs, batch size, early stopping)
)

In [80]:
# Safe context for model.fit

import torch
import omegaconf
import typing
import collections

# PyTorch-Tabular uses OmegaConf objects internally for configs.
# `torch.serialization.safe_globals` ensures that certain object types are treated as safe during serialization (e.g., when saving/loading checkpoints).
with torch.serialization.safe_globals([
    omegaconf.dictconfig.DictConfig,        # OmegaConf dictionary config
    omegaconf.base.ContainerMetadata,       # Metadata for OmegaConf objects
    omegaconf.listconfig.ListConfig,        # OmegaConf list config
    omegaconf.nodes.AnyNode,                # Any node in OmegaConf
    omegaconf.base.Metadata,                # More OmegaConf metadata
    typing.Any,                             # Python typing
    dict, list, int,                        # Basic Python types
    collections.defaultdict                  # For default dictionaries
]):
    
    # Fit the model
    tabular_model.fit(
        train=train_df_scaled,  # Training DataFrame (scaled/processed)
    )

Seed set to 42


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


C:\Users\Sreelakshmi\miniconda3\envs\tabular_final_env\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:654: Checkpoint directory C:\Users\Sreelakshmi\00_Final_Project_MENG\Approach 1\TabTransformer-Notebooks\saved_models exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name             | Type                   | Params | Mode 
--------------------------------------------------------------------
0 | _backbone        | TabTransformerBackbone | 1.1 M  | train
1 | _embedding_layer | Embedding2dLayer       | 0      | train
2 | _head            | LinearHead             | 3.7 K  | train
3 | loss             | CrossEntropyLoss       | 0      | train
--------------------------------------------------------------------
1.1 M     Trainable params
0         Non-trainable params
1.1 M     Total params
4.363     Total estimated model params size (MB)
120       Modules in train mode
0         Modules in eval mode
C:\Users\Sreelakshmi\miniconda3\envs\tabular_fi

AttributeError: '_SpecialForm' object has no attribute '__qualname__'

In [82]:
tabular_model.save_model('C:/Users/Sreelakshmi/00_Final_Project_MENG/Approach 1/TabTransformer/tabtransformer_model')
