In [1]:
import sys
sys.path.insert(0, '/home/warin/projects/CAT-Transformer/model')
from updatedModel import CATTransformer, Combined_Dataset, train, test
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from rff.layers import GaussianEncoding #pip install random-fourier-features-pytorch
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, recall_score, confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder
import os
import optuna
from optuna.trial import TrialState

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Run regardless if you do or do not have GPU so all tensors are moved to right location later on
if torch.cuda.is_available():
    device_in_use = torch.device("cuda")
    print("GPU is available and being used")
else:
    device_in_use = torch.device("cpu")
    print("GPU is not available, using CPU instead")

GPU is not available, using CPU instead


In [3]:
# df_train = pd.read_csv(r'C:\Users\smbm2\projects\CAT-Transformer\datasets\helena\train.csv')
# df_test = pd.read_csv(r'C:\Users\smbm2\projects\CAT-Transformer\datasets\helena\test.csv')
# df_val = pd.read_csv(r'C:\Users\smbm2\projects\CAT-Transformer\datasets\helena\validation.csv') #READ FROM RIGHT SPOT

df_train = pd.read_csv('/home/warin/projects/CAT-Transformer/datasets/helena/train.csv')
df_test = pd.read_csv('/home/warin/projects/CAT-Transformer/datasets/helena/test.csv')
df_val = pd.read_csv('/home/warin/projects/CAT-Transformer/datasets/helena/validation.csv')


# df_train.columns
cont_columns = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27']
target = ['class']

#CHECKING TO MAKE SURE YOUR LIST IS CORRECT (NO NEED TO TOUCH)
yourlist = cont_columns + target
yourlist.sort()
oglist = list(df_train.columns)
oglist.sort()

assert(yourlist == oglist), "You may of spelled feature name wrong or you forgot to put one of them in the list"

target_classes = [max(len(df_train[target].value_counts()), len(df_val[target].value_counts()),len(df_test[target].value_counts()))]
print("target classes",target_classes)
# Create a StandardScaler and fit it to the cont features
scaler = StandardScaler()
scaler.fit(df_train[cont_columns])

# Transform the training, test, and validation datasets
df_train[cont_columns] = scaler.transform(df_train[cont_columns])
df_test[cont_columns] = scaler.transform(df_test[cont_columns])
df_val[cont_columns] = scaler.transform(df_val[cont_columns])

#Wrapping in Dataset
train_dataset = Combined_Dataset(df_train, cat_columns=[], num_columns=cont_columns, task1_column='class')
val_dataset = Combined_Dataset(df_val, cat_columns=[], num_columns=cont_columns, task1_column='class')
test_dataset = Combined_Dataset(df_test, cat_columns=[], num_columns=cont_columns, task1_column='class')

#This is a hyperparameter that is not tuned. Maybe mess with what makes sense here
batch_size = 256

# Wrapping with DataLoader for easy batch extraction
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

target classes [100]


# RUN EXPERIMENTS

1. Using Optuna to optimize CAT-Transformers hyperparameters for your dataset

In [4]:
# Define the early stopping mechanism
class EarlyStopping:
    def __init__(self, patience=5):
        self.patience = patience
        self.counter = 0
        self.best_metric = float('-inf')
        self.early_stop = False

    def __call__(self, metric):
        if metric > self.best_metric:
            self.best_metric = metric
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

# Function to log results to a text file
def log_to_file(filename, text):
    with open(filename, 'a') as f:
        f.write(text + '\n')

def objective(trial):
    trial_number = trial.number

    # Define hyperparameters to search over

    alpha = trial.suggest_float('alpha', 0.1, 5, step=0.1)
    embed_size = trial.suggest_int('embed_size', 100, 320, step=20)
    num_layers = trial.suggest_int('num_layers', 1,2)
    heads = trial.suggest_categorical('heads', [1,2,5,10,20])
    forward_expansion = trial.suggest_int('forward_expansion', 2,8)
    decoder_dropout = trial.suggest_float('decoder_dropout', 0, .5, step=.1)
    classification_dropout = trial.suggest_float('classificiation_dropout', 0, 0.5, step=0.1)
    pre_norm_on = trial.suggest_categorical('pre_norm_on', [True, False])
    mlp_scale_classification = trial.suggest_int('mlp_scale_classification', 2, 8)
    learning_rate = trial.suggest_float('learning_rate', 0.00001, 0.1, log=True)

    num_epochs = 100


    # Create your model with the sampled hyperparameters
    model = CATTransformer(alpha=alpha,
                           embed_size=embed_size,
                           n_cont=len(cont_columns),
                           cat_feat=[],
                           num_layers=num_layers,
                           heads=heads,
                           forward_expansion=forward_expansion,
                           decoder_dropout=decoder_dropout,
                           classification_dropout=classification_dropout,
                           pre_norm_on=pre_norm_on,
                           mlp_scale_classification=mlp_scale_classification,
                           regression_on=False,
                           targets_classes=target_classes).to(device_in_use)

    # Define loss function and optimizer
    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Initialize early stopping
    early_stopping = EarlyStopping(patience=5)  # Adjust patience as needed

    # Training loop with a large number of epochs
    for epoch in range(num_epochs):
        train_loss, train_acc = train(regression_on=False, 
                                   dataloader=train_dataloader, 
                                   model=model, 
                                   loss_function=loss_function, 
                                   optimizer=optimizer, 
                                   device_in_use=device_in_use)
        val_loss, val_acc = test(regression_on=False,
                               dataloader=val_dataloader,
                               model=model,
                               loss_function=loss_function,
                               device_in_use=device_in_use)
        
        # Check if we should early stop based on validation accuracy
        if early_stopping(val_acc):
            break
    
    # Log the final test accuracy for this trial to a shared log file
    final_log = f"Trial {trial_number} completed. Validation Accuracy = {val_acc:.4f}"
    log_to_file('all_trials_log.txt', final_log)

    # Return the test accuracy as the objective to optimize
    return val_acc

In [5]:
# Set the number of optimization trials
num_trials = 50

# Create an Optuna study
study = optuna.create_study(direction='maximize')  # Maximize validation accuracy

# Start the optimization process
study.optimize(objective, n_trials=num_trials, show_progress_bar=True)

# Get the best hyperparameters and the validation accuracy at the point of early stopping
best_params = study.best_params
best_val_accuracy = study.best_value

print("Best Hyperparameters:", best_params)
print("Best Validation Accuracy (at Early Stopping):", best_val_accuracy)

[I 2023-12-03 21:11:22,047] A new study created in memory with name: no-name-e28e01a7-87e3-44e7-b852-7b2df797bfef
Best trial: 0. Best value: 0.0738317: 100%|██████████| 1/1 [00:27<00:00, 27.55s/it]

[I 2023-12-03 21:11:49,595] Trial 0 finished with value: 0.07383168013089272 and parameters: {'alpha': 4.7, 'embed_size': 320, 'num_layers': 2, 'heads': 5, 'forward_expansion': 3, 'decoder_dropout': 0.2, 'classificiation_dropout': 0.4, 'pre_norm_on': False, 'mlp_scale_classification': 7, 'learning_rate': 0.0035939638737664367}. Best is trial 0 with value: 0.07383168013089272.
Best Hyperparameters: {'alpha': 4.7, 'embed_size': 320, 'num_layers': 2, 'heads': 5, 'forward_expansion': 3, 'decoder_dropout': 0.2, 'classificiation_dropout': 0.4, 'pre_norm_on': False, 'mlp_scale_classification': 7, 'learning_rate': 0.0035939638737664367}
Best Validation Accuracy (at Early Stopping): 0.07383168013089272





In [6]:
#Testing against the test dataset

model = CATTransformer(alpha=best_params['alpha'],
                           embed_size=best_params['embed_size'],
                           n_cont=len(cont_columns),
                           cat_feat=[],
                           num_layers=best_params['num_layers'],
                           heads=best_params['heads'],
                           forward_expansion=best_params['forward_expansion'],
                           decoder_dropout=best_params['decoder_dropout'],
                           classification_dropout=best_params['classificiation_dropout'],
                           pre_norm_on=best_params['pre_norm_on'],
                           mlp_scale_classification=best_params['mlp_scale_classification'],
                           regression_on=False,
                           targets_classes=target_classes).to(device_in_use)
loss_functions = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr = best_params['learning_rate']) # Maybe try messing around with optimizers. try other torch optimizers with different configurations.
early_stopping = EarlyStopping(patience=5)
epochs = 100 

train_losses = []
train_accuracies_1 = [] 
test_losses = []
test_accuracies_1 = [] 

for t in range(epochs):
    train_loss, train_acc = train(regression_on=False, 
                                   dataloader=train_dataloader, 
                                   model=model, 
                                   loss_function=loss_functions, 
                                   optimizer=optimizer, 
                                   device_in_use=device_in_use)
    test_loss, test_acc = test(regression_on=False,
                               dataloader=test_dataloader,
                               model=model,
                               loss_function=loss_functions,
                               device_in_use=device_in_use)
    train_losses.append(train_loss)
    train_accuracies_1.append(train_acc)
    test_losses.append(test_loss)
    test_accuracies_1.append(test_acc)

    epoch_str = f"Epoch [{t+1:2}/{epochs}]"
    train_metrics = f"Train: Loss {(train_loss)}, Accuracy {(train_acc)}"
    test_metrics = f"Test: Loss {(test_loss)}, Accuracy {(test_acc)}"
    print(f"{epoch_str:15} | {train_metrics:65} | {test_metrics:65}")


# Plotting the loss curves
plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
plt.plot(range(1, epochs+1), train_losses, label='Train Loss')
plt.plot(range(1, epochs+1), [l for l in test_losses], label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Test Loss Curve')
plt.legend()

# Plotting the accuracy curves
plt.subplot(1, 2, 2)
plt.plot(range(1, epochs+1), train_accuracies_1, label='Train Accuracy')
plt.plot(range(1, epochs+1), test_accuracies_1, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Test Accuracy Curve')
plt.legend()

best_index = test_accuracies_1.index(max(test_accuracies_1))
print(f"Best accuracy {test_accuracies_1[best_index]}\n",file=open("log_2pi.txt", 'a'))


Epoch [ 1/100]  | Train: Loss 4.205244916777371, Accuracy 0.07163047527225716       | Test: Loss 4.107066875849014, Accuracy 0.08517382413087934       


KeyboardInterrupt: 