In [None]:
# Train a set of models on different OpenXAI datasets

In [13]:
# go up a directory
import os
os.chdir('..')

In [14]:
# Utils
import torch
import torch.nn as nn
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
import datetime
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import confusion_matrix
import seaborn as sn

# Data loaders
from openxai.dataloader import return_loaders
from openxai.ML_Models.LR.model import LogisticRegression
import openxai.ML_Models.ANN.MLP as model_MLP

In [25]:
plt.style.use('default')
SEED = 3407
torch.manual_seed(SEED)

<torch._C.Generator at 0x7f9bc8356d50>

In [26]:
def getExperimentID():
    date_info = datetime.datetime.now()
    testID    = '%d%02d%02d_%02d%02d' % (date_info.year, date_info.month, date_info.day, date_info.hour, date_info.minute)
    return testID

In [27]:
def training(model, loader_train, loader_test, ml_model, dir_name, learning_rate, epochs, dataset, exp_id, layer_info_str, use_class_weighting):
    loaders = {'train': loader_train, 'test': loader_test}

    if use_class_weighting: 
        # Compute class weights
        class_counts  = torch.bincount(torch.tensor(loader_train.dataset.targets.to_numpy().astype(np.compat.long)))
        total_samples = len(loader_train.dataset)
        class_weights = total_samples / (class_counts.float())

    # model collector
    best_auc_roc = 0

    # Use GPU if available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model  = model.to(device)

    # declaring optimizer and loss
    if use_class_weighting:
        criterion = nn.CrossEntropyLoss(weight=class_weights)
    else:
        criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # training
    for e in range(epochs):
        print('Epoch {}/{}'.format(e, epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluation mode

            all_preds  = []
            all_labels = []
            for i, (inputs, labels) in enumerate(loaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device).type(torch.long)

                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    y_pred = model(inputs.float())
                    loss   = criterion(y_pred.float(), labels.long())

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                all_preds.append(y_pred)
                all_labels.append(labels)
            # statistics
            preds = torch.cat(all_preds, dim=0)
            labels = torch.cat(all_labels, dim=0)
            
            raw_preds      = preds.data[:, 1]
            raw_preds_np   = raw_preds.view(-1).numpy()
            class_preds    = raw_preds >= 0.5
            class_preds_np = class_preds.view(-1).long().numpy()
            
            epoch_loss     = loss.item()
            epoch_acc      = accuracy_score(labels.numpy(), class_preds_np)
            epoch_f1       = f1_score(labels.numpy(), class_preds_np)
            epoch_auc_roc  = roc_auc_score(labels.numpy(), raw_preds_np)

            print(f'{phase}: Loss: {epoch_loss:.4f} | F1-score: {epoch_f1:.4f} | AUC ROC: {epoch_auc_roc:.4f} | Accuracy: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'test' and epoch_auc_roc > best_auc_roc:
                best_auc_roc = epoch_auc_roc
                best_model_name = '{}_{}_{}_{}_{}_auc_roc_{:.2f}.pt'.format(exp_id, layer_info_str, dataset, ml_model, learning_rate, best_auc_roc)
                print('new best model', e)
                if use_class_weighting:
                    fpth = 'models/ClassWeighted/' + dir_name
                else:
                    fpth = 'models/NotClassWeighted/' + dir_name
                if not os.path.isdir(fpth):  # If folder doesn't exist, then create it.
                    os.makedirs(fpth)
                output_file_path = fpth + '/' + best_model_name
                torch.save(model.state_dict(), output_file_path)

    return best_model_name

In [28]:
def PlotROC(output_dir, labels, preds, addtlNameInfo=''):
    fpr, tpr, _ = metrics.roc_curve(labels, preds)
    auc         = roc_auc_score(labels, preds)
    plt.figure(dpi=100)
    plt.plot([0,1],[0,1], color='k')
    plt.plot([0,0],[0,1], color='k', linestyle='dashed')
    plt.plot([0,1],[1,1], color='k', linestyle='dashed')
    plt.plot([1,1],[1,0], color='k', linestyle='dashed')
    plt.plot([1,0],[0,0], color='k', linestyle='dashed')
    plt.plot(fpr, tpr, color='b', label="AUC: " + str(round(auc,2)))
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(loc='lower right')
    plt.title('ROC Curve - ' + addtlNameInfo)
    plt.savefig(output_dir + 'ROC_'+addtlNameInfo+'.png', bbox_inches='tight')
    plt.close('all')

In [29]:
def EvaluateNetwork(best_model_name, model_name, dim_per_layer, activation_per_layer, loader_train, loader_val, 
                    use_class_weighting):
    print('Evaluating', best_model_name)

    model = DefineModel(model_name, dim_per_layer, activation_per_layer)

    if use_class_weighting:
        fpth = 'models/ClassWeighted/'
    else:
        fpth = 'models/NotClassWeighted/'
        
    model.load_state_dict(torch.load(fpth + model_name.upper() + '/' + best_model_name))

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model  = model.to(device)
    model.eval()   # Set model to evaluation mode

    all_preds  = []
    all_labels = []
    for i, (inputs, labels) in enumerate(loader_val):
        inputs = inputs.to(device)
        labels = labels.to(device).type(torch.long)

        with torch.set_grad_enabled(False):
            y_pred = model(inputs.float())
        all_preds.append(y_pred)
        all_labels.append(labels)
    # statistics
    preds  = torch.cat(all_preds, dim=0)
    labels = torch.cat(all_labels, dim=0)
    
    raw_preds      = preds.data[:, 1]
    raw_preds_np   = raw_preds.view(-1).numpy()
    class_preds    = raw_preds >= 0.5
    class_preds_np = class_preds.view(-1).long().numpy()
    
    total_acc     = accuracy_score(labels.numpy(), class_preds_np)
    total_f1      = f1_score(labels.numpy(), class_preds_np)
    total_auc_roc = roc_auc_score(labels.numpy(), raw_preds_np)
    
    y_preds_test = []
    for i, (inputs, labels) in enumerate(loader_val):
        inputs = inputs
        y_pred = model(inputs.float()).data
        y_pred = y_pred[:, 1]# >= 0.5  # True if bigger than 0.5
        y_preds_test.append(y_pred)  # convert true and false to 1 and 0, respectively.
    y_preds_test_flat = np.array([item for sublist in y_preds_test for item in sublist])

    y_preds_train = []
    for i, (inputs, labels) in enumerate(loader_train):
        inputs = inputs
        y_pred = model(inputs.float()).data
        y_pred = y_pred[:, 1]# >= 0.5  # True if bigger than 0.5
        y_preds_train.append(y_pred)  # convert true and false to 1 and 0, respectively.
    y_preds_train_flat = np.array([item for sublist in y_preds_train for item in sublist])

    return total_acc, total_f1, total_auc_roc, y_preds_test_flat, y_preds_train_flat

In [30]:
def PlotConfusionMatrix(labels, preds, num_of_classes, output_dir, addtlNameInfo=''):
    preds      = preds >= 0.5
    conf_mat   = confusion_matrix(labels, preds, normalize=None)
    pathToSave = os.path.join(output_dir + 'ConfusionMatrix_'+addtlNameInfo+'.png')
    df_cm      = pd.DataFrame(conf_mat, range(num_of_classes), range(num_of_classes))
    
    plt.figure(figsize=(12, 9), dpi=100)
    plt.title('Confusion Matrix - ' + addtlNameInfo)
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    # sn.set(font_scale=2) # for label size
    sn.heatmap(df_cm, annot=True, annot_kws={"size": 18}, cmap="Blues", fmt='d') # font size
    plt.savefig(pathToSave, format = 'png')
    plt.close('all')

In [31]:
def SaveModelInfo(model_name, data_name, model, exp_id, epochs, learning_rate, X_train, y_train, X_val, y_val, X_test, y_test, total_f1, total_acc, total_auc_roc, loader_val, y_preds_test_flat, loader_train, y_preds_train_flat, layer_info_str, use_class_weighting, dim_per_layer='', activation_per_layer=''):
    # Save to .txt
    if use_class_weighting:
        output_dir = 'models/ClassWeighted/' + model_name.upper() + '/'
    else:
        output_dir = 'models/NotClassWeighted/' + model_name.upper() + '/'
    file_name  =  exp_id + '_' + layer_info_str + model_name.upper() + '_' + data_name + '_summary'

    fpth = os.path.join(output_dir, file_name+'.txt')
    paramTxt = open(fpth, 'w')

    paramTxt.write(file_name)
    paramTxt.write('Hyperparameters')
    paramTxt.write('exp_id:\t' + exp_id + '\n')
    paramTxt.write('data_name:\t' + data_name + '\n')
    paramTxt.write('model_name:\t' + model_name + '\n')
    paramTxt.write('epochs:\t\t' + str(epochs) + '\n')
    paramTxt.write('learning_rate:\t' + str(learning_rate) + '\n\n')
    paramTxt.write('X_train.shape:\t' + str(X_train.shape) + '\n')
    paramTxt.write('y_train.shape:\t' + str(y_train.shape) + '\n\n')
    paramTxt.write('X_val.shape:\t' + str(X_val.shape) + '\n')
    paramTxt.write('y_val.shape:\t' + str(y_val.shape) + '\n\n')
    paramTxt.write('X_test.shape:\t' + str(X_test.shape) + '\n')
    paramTxt.write('y_test.shape:\t' + str(y_test.shape) + '\n\n')
    paramTxt.write('dim_per_layer:\t' + str(dim_per_layer) + '\n')
    if 'ann' in model_name:
        paramTxt.write('activation_per_layer:\t' + str(activation_per_layer) + '\n')
    paramTxt.write('\nF1-score: '+ str(round(total_f1,4)) +' | Accuracy: ' + str(round(total_acc, 4)) + ' | AUC-ROC: ' + str(round(total_auc_roc, 4)) + '\n\n')
    paramTxt.write("Proportion of ones in test set: " + str(round(np.mean(loader_val.dataset.targets),3)) + '\n')
    paramTxt.write("Proportion of ones predicted in test set: " + str(round(np.mean(y_preds_test_flat),3)) + '\n')
    paramTxt.write("Proportion of ones in train set: " + str(round(np.mean(loader_train.dataset.targets),3)) + '\n')
    paramTxt.write("Proportion of ones predicted in train set: " + str(round(np.mean(y_preds_train_flat),3)) + '\n')
    paramTxt.write('\nArchitecture:\n')
    paramTxt.write(str(model.__dict__['_modules']))
    paramTxt.close()
    
    plt.style.use('default')
    PlotROC(fpth, y_val, y_preds_test_flat, addtlNameInfo = model_name.upper() + '_' + data_name)
    
    PlotConfusionMatrix(y_val, y_preds_test_flat, 2, output_dir, addtlNameInfo = model_name.upper() + '_' + data_name)

    print(f'F1-score: {total_f1:.4f} | Accuracy: {total_acc:.4f} | AUC-ROC: {total_auc_roc:.4f}')
    print("Proportion of ones in test set:", round(np.mean(loader_val.dataset.targets),3))
    print("Proportion of ones predicted in test set: ", round(np.mean(y_preds_test_flat),3))
    print("Proportion of ones in train set:", round(np.mean(loader_train.dataset.targets),3))
    print("Proportion of ones predicted in train set: ", round(np.mean(y_preds_train_flat),3), '\n')

In [32]:
def DefineModel(model_name, dim_per_layer=None, activation_per_layer=None):
    input_size = loader_train.dataset.get_number_of_features()

    if 'ann' in model_name:
        dim_per_layer = [input_size] + dim_per_layer
        model         = model_MLP.MLP(dim_per_layer, activation_per_layer)
    elif model_name == 'lr':
        dim_per_layer = [input_size] + dim_per_layer
        model         = LogisticRegression(dim_per_layer[0], dim_per_layer[1])

    return model

In [34]:
dim_per_layer_per_MLP = {'ann_s':  [16, 2],
                         'ann_m':  [32, 16, 2],
                         'ann_l':  [64, 32, 16, 2],
                         'ann_xl': [256, 128, 64, 32, 16, 2],
                         'lr':     [2]
                         }  # dimension for each layer for each network to train, ignoring input layer size
activation_per_layer_per_MLP = {'ann_s':  [nn.ReLU(), None],
                                'ann_m':  [nn.ReLU(), nn.ReLU(), None],
                                'ann_l':  [nn.ReLU(), nn.ReLU(), nn.ReLU(), None],
                                'ann_xl': [nn.ReLU(), nn.ReLU(), nn.ReLU(), nn.ReLU(), nn.ReLU(), None],
                                'lr':     [None]
                                } # ignore input layer size

data_names          = ['blood'] #'adult', 'compas', 'credit', 'german', 'heloc', 'credit']#, 'rcdv', 'student'] # 'lending-club',
model_names         = ['lr', 'ann_s', 'ann_m', 'ann_l', 'ann_xl']
epochs              = 100
learning_rate       = 0.001
use_class_weighting = True

for data_name in data_names:
    for model_name in model_names:
        print("Training:", model_name, "on", data_name)
        # Define hyperparameters
        exp_id   = getExperimentID()
        dir_name = model_name.upper()

        # Get the data for training and evaluation
        if data_name in ['compas', 'blood']:
            download_data = False
        else:
            download_data = True
        loader_train, loader_val, loader_test = return_loaders(data_name=data_name, download=download_data, batch_size=64, scaler='minmax')

        X_train, y_train = loader_train.dataset.data, loader_train.dataset.targets.to_numpy()
        X_val, y_val     = loader_val.dataset.data, loader_val.dataset.targets.to_numpy()
        X_test, y_test   = loader_test.dataset.data, loader_test.dataset.targets.to_numpy()

        # Define the model
        layer_info_str = '' #empty for lr, fill for ann
        for d in dim_per_layer_per_MLP[model_name]:
            layer_info_str += str(d) + '_'

        model = DefineModel(model_name, dim_per_layer_per_MLP[model_name], activation_per_layer_per_MLP[model_name])

        # Train the model
        best_model_name = training(model, loader_train, loader_val, model_name, dir_name,
                                   learning_rate, epochs, data_name, exp_id, layer_info_str, use_class_weighting)

        # Evaluate the model
        total_acc, total_f1, total_auc_roc, y_preds_test_flat, y_preds_train_flat = \
            EvaluateNetwork(best_model_name, model_name, dim_per_layer_per_MLP[model_name],
                            activation_per_layer_per_MLP[model_name], loader_train, loader_val, use_class_weighting)

        # Save model info
        SaveModelInfo(model_name, data_name, model, exp_id, epochs, learning_rate,
                      X_train, y_train, X_val, y_val, X_test, y_test, total_f1, total_acc, total_auc_roc,
                      loader_val, y_preds_test_flat, loader_train, y_preds_train_flat, layer_info_str,
                      use_class_weighting, dim_per_layer_per_MLP[model_name], activation_per_layer_per_MLP[model_name])

Training: lr on blood
Epoch 0/99
----------
train: Loss: 0.7354 | F1-score: 0.0000 | AUC ROC: 0.5085 | Accuracy: 0.7537
test: Loss: 0.7089 | F1-score: 0.0000 | AUC ROC: 0.4781 | Accuracy: 0.7983
new best model 0
Epoch 1/99
----------
train: Loss: 0.7342 | F1-score: 0.0000 | AUC ROC: 0.5111 | Accuracy: 0.7537
test: Loss: 0.7081 | F1-score: 0.0000 | AUC ROC: 0.4855 | Accuracy: 0.7983
new best model 1
Epoch 2/99
----------
train: Loss: 0.6829 | F1-score: 0.0000 | AUC ROC: 0.5163 | Accuracy: 0.7537
test: Loss: 0.7072 | F1-score: 0.0000 | AUC ROC: 0.4904 | Accuracy: 0.7983
new best model 2
Epoch 3/99
----------
train: Loss: 0.6683 | F1-score: 0.0000 | AUC ROC: 0.5186 | Accuracy: 0.7537
test: Loss: 0.7063 | F1-score: 0.0000 | AUC ROC: 0.4952 | Accuracy: 0.7983
new best model 3
Epoch 4/99
----------
train: Loss: 0.6759 | F1-score: 0.0000 | AUC ROC: 0.5246 | Accuracy: 0.7537
test: Loss: 0.7055 | F1-score: 0.0000 | AUC ROC: 0.4956 | Accuracy: 0.7983
new best model 4
Epoch 5/99
----------
train:

In [None]:
# Datasets:
# LR, ANN_S, ANN_M, ANN_L Compas
# LR, ANN_S, ANN_M, ANN_L german
# LR, ANN_S, ANN_M, ANN_L heloc
# LR, ANN_S, ANN_M, ANN_L adult income
# synthetic dataset
# LR, ANN_S, ANN_M, ANN_L give me some credit (credit)

In [33]:
# gather all the scores and save them
import os
import csv

for model_name in model_names:
    folder_path = './models/ClassWeighted/' + model_name.upper() +'/' # Replace with the actual folder path containing the text files
    output_file = 'parsed_values.csv'  # Replace with the desired output file name
        
    # Define the fieldnames for the CSV file
    fieldnames = ['data_name', 'F1-score', 'Accuracy', 'AUC-ROC', 'Proportion of ones in test set',
                  'Proportion of ones predicted in test set', 'Proportion of ones in train set',
                  'Proportion of ones predicted in train set']
    
    # Create a list to store the parsed values
    parsed_values = []
    
    # Iterate over the text files in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):  # Process only text files
            with open(os.path.join(folder_path, filename), 'r') as file:
                content = file.read()
    
                # Extract the required values using string manipulation or regular expressions
                data_name = content.split('data_name:')[1].split('\n')[0]
                f1_score = content.split('F1-score: ')[1].split(' |')[0]
                accuracy = content.split('Accuracy: ')[1].split(' |')[0]
                auc_roc = content.split('AUC-ROC: ')[1].split('\n')[0]
                ones_test = content.split('Proportion of ones in test set: ')[1].split('\n')[0]
                ones_predicted_test = content.split('Proportion of ones predicted in test set: ')[1].split('\n')[0]
                ones_train = content.split('Proportion of ones in train set: ')[1].split('\n')[0]
                ones_predicted_train = content.split('Proportion of ones predicted in train set: ')[1].split('\n')[0]
    
                # Append the parsed values to the list
                parsed_values.append({
                    'data_name': data_name,
                    'F1-score': f1_score,
                    'Accuracy': accuracy,
                    'AUC-ROC': auc_roc,
                    'Proportion of ones in test set': ones_test,
                    'Proportion of ones predicted in test set': ones_predicted_test,
                    'Proportion of ones in train set': ones_train,
                    'Proportion of ones predicted in train set': ones_predicted_train
                })
    
            # sort the rows by data_name
            parsed_values = sorted(parsed_values, key=lambda k: k['data_name'])
            # Write the parsed values to the CSV file
            with open(folder_path+output_file, 'w', newline='') as csvfile:
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                writer.writerows(parsed_values)
            
            print(f"Parsed values saved to {output_file}.")


Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_values.csv.
Parsed values saved to parsed_valu

In [31]:
content

"20230629_0057_2_LR_german_summaryHyperparametersexp_id:\t20230629_0057\ndata_name:\tgerman\nmodel_name:\tlr\nepochs:\t\t100\nlearning_rate:\t0.001\n\nX_train.shape:\t(640, 60)\ny_train.shape:\t(640,)\n\nX_val.shape:\t(160, 60)\ny_val.shape:\t(160,)\n\nX_test.shape:\t(200, 60)\ny_test.shape:\t(200,)\n\ndim_per_layer:\t[2]\n\nF1-score: 0.6732 | Accuracy: 0.5812 | AUC-ROC: 0.6143\n\nProportion of ones in test set: 0.762\nProportion of ones predicted in test set: 0.517\nProportion of ones in train set: 0.686\nProportion of ones predicted in train set: 0.521\n\nArchitecture:\nOrderedDict([('linear', Linear(in_features=60, out_features=2, bias=True))])"