In [5]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import pickle
from helpers import TrainingAttnScoresLog
import matplotlib.pyplot as plt

import sys
sys.path.insert(0, '/home/wdwatson2/projects/CAT-Transformer/model')
from testingModel import CATTransformer, MyFTTransformer, Combined_Dataset, train, test, EarlyStopping

device_in_use = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device_in_use)

cuda


In [2]:
#Lets do this with income since there are not a ton of features, and the target is binary

df_train = pd.read_csv('/home/wdwatson2/projects/CAT-Transformer/datasets/income/train.csv')
df_test = pd.read_csv('/home/wdwatson2/projects/CAT-Transformer/datasets/income/test.csv')
df_val = pd.read_csv('/home/wdwatson2/projects/CAT-Transformer/datasets/income/validation.csv') 

cont_columns = ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss',
       'hours-per-week']
cat_columns = ['workclass', 'education', 'marital-status', 'occupation',
       'relationship', 'race', 'sex', 'native-country']
target = ['income']

#CHECKING TO MAKE SURE YOUR LIST IS CORRECT (NO NEED TO TOUCH)
yourlist = cont_columns + cat_columns+target
yourlist.sort()
oglist = list(df_train.columns)
oglist.sort()

assert(yourlist == oglist), "You may of spelled feature name wrong or you forgot to put on of them in the list"

cat_features = (10,16,7,16,6,5,2,43)

target_classes = [max(len(df_train[target].value_counts()), len(df_val[target].value_counts()),len(df_test[target].value_counts()))]
print(target_classes)
# Create a StandardScaler and fit it to the cont features
scaler = StandardScaler()
scaler.fit(df_train[cont_columns])

# Transform the training, test, and validation datasets
df_train[cont_columns] = scaler.transform(df_train[cont_columns])
df_test[cont_columns] = scaler.transform(df_test[cont_columns])
df_val[cont_columns] = scaler.transform(df_val[cont_columns])

#Wrapping in Dataset
train_dataset = Combined_Dataset(df_train, cat_columns=cat_columns, num_columns=cont_columns, task1_column=target[0])
val_dataset = Combined_Dataset(df_val, cat_columns=cat_columns, num_columns=cont_columns, task1_column=target[0])
test_dataset = Combined_Dataset(df_test, cat_columns=cat_columns, num_columns=cont_columns, task1_column=target[0])

batch_size = 256

# Wrapping with DataLoader for easy batch extraction
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

[2]


In [43]:
#Lets first train a cat fully

model_cat = CATTransformer(n_cont=len(cont_columns),
                       cat_feat=cat_features,
                       targets_classes=target_classes,
                       get_attn=True,
                       num_layers=10).to(device_in_use)

loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model_cat.parameters(), lr = 0.0001)

early_stopping = EarlyStopping(patience=10, verbose=True)

train_losses = []
train_accuracies_1 = [] 
test_losses = []
test_accuracies_1 = [] 

epochs = 200

for t in range(epochs):
    train_loss, train_acc, attn= train(regression_on=False, 
                                  get_attn=True,
                                   dataloader=train_dataloader, 
                                   model=model_cat, 
                                   loss_function=loss_function, 
                                   optimizer=optimizer, 
                                   device_in_use=device_in_use)
    test_loss, test_acc, attn = test(regression_on=False,
                               get_attn=True,
                               dataloader=test_dataloader,
                               model=model_cat,
                               loss_function=loss_function,
                               device_in_use=device_in_use)
    train_losses.append(train_loss)
    train_accuracies_1.append(train_acc)
    test_losses.append(test_loss)
    test_accuracies_1.append(test_acc)

    epoch_str = f"Epoch [{t+1:2}/{epochs}]"
    train_metrics = f"Train: Loss {(train_loss)}, Accuracy {(train_acc)}"
    test_metrics = f"Test: Loss {(test_loss)}, Accuracy {(test_acc)}"
    print(f"{epoch_str:15} | {train_metrics:65} | {test_metrics:65}")

    early_stopping(test_acc)
    
    if early_stopping.early_stop:
        print("Early stopping")
        break

Epoch [ 1/200]  | Train: Loss 0.4820755975904749, Accuracy 0.7776770306238848       | Test: Loss 0.37563256706510273, Accuracy 0.8312101910828026      
Epoch [ 2/200]  | Train: Loss 0.34359556658944085, Accuracy 0.840299511538799       | Test: Loss 0.3371277987957001, Accuracy 0.8478161965423112       
Epoch [ 3/200]  | Train: Loss 0.32336301756883734, Accuracy 0.849045014478341       | Test: Loss 0.325788733788899, Accuracy 0.8525932666060054        
Epoch [ 4/200]  | Train: Loss 0.31620153873714046, Accuracy 0.8522331744128229      | Test: Loss 0.32153202295303346, Accuracy 0.8546405823475887      
Epoch [ 5/200]  | Train: Loss 0.3129150497157182, Accuracy 0.8551288426101963       | Test: Loss 0.3199089241879327, Accuracy 0.8577115559599636       
Epoch [ 6/200]  | Train: Loss 0.3086722001655778, Accuracy 0.8565328029483167       | Test: Loss 0.31687322173799787, Accuracy 0.8577115559599636      
Epoch [ 7/200]  | Train: Loss 0.3067282975387217, Accuracy 0.8577027698967504       | Te

In [42]:
#And a FT

model_ft = MyFTTransformer(n_cont=len(cont_columns),
                       cat_feat=cat_features,
                       targets_classes=target_classes,
                       get_attn=True,
                       num_layers=10).to(device_in_use)

loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model_ft.parameters(), lr = 0.0001)

early_stopping = EarlyStopping(patience=10, verbose=True)

train_losses = []
train_accuracies_1 = [] 
test_losses = []
test_accuracies_1 = [] 

epochs = 200

for t in range(epochs):
    train_loss, train_acc, attn = train(regression_on=False, 
                                  get_attn=True,
                                   dataloader=train_dataloader, 
                                   model=model_ft, 
                                   loss_function=loss_function, 
                                   optimizer=optimizer, 
                                   device_in_use=device_in_use)
    test_loss, test_acc, attn = test(regression_on=False,
                               get_attn=True,
                               dataloader=test_dataloader,
                               model=model_ft,
                               loss_function=loss_function,
                               device_in_use=device_in_use)
    train_losses.append(train_loss)
    train_accuracies_1.append(train_acc)
    test_losses.append(test_loss)
    test_accuracies_1.append(test_acc)

    epoch_str = f"Epoch [{t+1:2}/{epochs}]"
    train_metrics = f"Train: Loss {(train_loss)}, Accuracy {(train_acc)}"
    test_metrics = f"Test: Loss {(test_loss)}, Accuracy {(test_acc)}"
    print(f"{epoch_str:15} | {train_metrics:65} | {test_metrics:65}")

    early_stopping(test_acc)
    
    if early_stopping.early_stop:
        print("Early stopping")
        break

Epoch [ 1/200]  | Train: Loss 0.4919486457287376, Accuracy 0.7702477405013308       | Test: Loss 0.3597981427397047, Accuracy 0.8372383985441311       
Epoch [ 2/200]  | Train: Loss 0.3388649360902274, Accuracy 0.8404750065810641       | Test: Loss 0.33316743799618315, Accuracy 0.8489535941765242      
Epoch [ 3/200]  | Train: Loss 0.3236610382350523, Accuracy 0.8489280177834976       | Test: Loss 0.3331290479217257, Accuracy 0.8529344858962693       
Epoch [ 4/200]  | Train: Loss 0.3160080879704276, Accuracy 0.852379420281377        | Test: Loss 0.3176516762801579, Accuracy 0.8549818016378526       
Epoch [ 5/200]  | Train: Loss 0.3117789940825149, Accuracy 0.8546316066571119       | Test: Loss 0.3146205587046487, Accuracy 0.857484076433121        
Epoch [ 6/200]  | Train: Loss 0.31015400733075926, Accuracy 0.8556845769107023      | Test: Loss 0.3127829509122031, Accuracy 0.8588489535941766       
Epoch [ 7/200]  | Train: Loss 0.30704670054699057, Accuracy 0.8568837930328468      | Te

In [5]:
# Define your evaluation function
def evaluate(model, dataloader, device_in_use):
    model.eval()  # Set model to evaluation mode
    accuracies = []
    attentions = []

    with torch.no_grad():
        for (cat_x, cont_x, labels) in dataloader:
            cat_x,cont_x,labels=cat_x.to(device_in_use),cont_x.to(device_in_use),labels.to(device_in_use)

            predictions, attention = model(cat_x, cont_x)

            _, predicted = torch.max(predictions, 1)
            accuracy = (predicted == labels).sum().item() / labels.size(0)
            accuracies.append(accuracy)

            attentions.append(attention.cpu().numpy()) 

    avg_accuracy = np.mean(accuracies)
    all_attentions = np.concatenate(attentions, axis=0) if attentions else None

    return avg_accuracy, all_attentions

In [6]:
# Now we need to analyze the attention scores for the two classes

# get samples from each class
poor_samples = df_val.loc[df_val['income'] == 0]
rich_samples = df_val.loc[df_val['income'] == 1]

poor_dataset = Combined_Dataset(poor_samples, cat_columns=cat_columns, num_columns=cont_columns, task1_column=target[0])
rich_dataset = Combined_Dataset(rich_samples, cat_columns=cat_columns, num_columns=cont_columns, task1_column=target[0])

poor_dataloader = DataLoader(poor_dataset, batch_size=len(poor_dataset))
rich_dataloader = DataLoader(rich_dataset, batch_size=len(rich_dataset))

poor_acc_cat, poor_attn_cat = evaluate(model_cat, poor_dataloader, device_in_use)
print("poor_cat_acc", poor_acc_cat)
rich_acc_cat, rich_attn_cat = evaluate(model_cat, rich_dataloader, device_in_use)
print("rich cat acc", rich_acc_cat)
poor_acc_ft, poor_attn_ft = evaluate(model_ft, poor_dataloader, device_in_use)
print("poor ft acc", poor_acc_ft)
rich_acc_ft, rich_attn_ft = evaluate(model_ft, rich_dataloader, device_in_use)
print("rich ft acc", rich_acc_ft)


#Now we average over heads
poor_attn_cat = poor_attn_cat.mean(0) #discrete distribution of attention scores
rich_attn_cat = rich_attn_cat.mean(0)
poor_attn_ft = poor_attn_ft.mean(0)
rich_attn_ft = rich_attn_ft.mean(0)

print("\nsize of the attn array I am working with: ", poor_attn_cat.shape)
poor_attn_cat

poor_cat_acc 0.9341155234657039
rich cat acc 0.6151154653603919
poor ft acc 0.9483303249097473
rich ft acc 0.56962911126662

size of the attn array I am working with:  (14,)


array([0.07298017, 0.06870462, 0.07084739, 0.07104348, 0.06818973,
       0.0726997 , 0.07216004, 0.07220282, 0.07305511, 0.07131346,
       0.07429491, 0.07112397, 0.06871252, 0.07267208], dtype=float32)

In [1]:
def entropy(distribution):
    probabilities = distribution / np.sum(distribution)  # Normalize probabilities
    entropy_val = -np.sum(probabilities * np.log2(probabilities + 1e-12))  # Add small value to avoid log(0)
    return entropy_val

In [8]:
poor_cat_entropy = entropy(poor_attn_cat)
rich_cat_entropy = entropy(rich_attn_cat)
poor_ft_entropy = entropy(poor_attn_ft)
rich_ft_entropy = entropy(rich_attn_ft)

print(f"CAT class 0 Entropy: {poor_cat_entropy}, FT class 0 Entropy: {poor_ft_entropy}")
print(f"CAT class 1 Entropy: {rich_cat_entropy}, FT class 1 Entropy: {rich_ft_entropy}")

# Well, yep it looks like cat has slightly less entropy in its averaged attention scores which means that it contains more information -> more interpretable

CAT class 0 Entropy: 3.8069140911102295, FT class 0 Entropy: 3.806957960128784
CAT class 1 Entropy: 3.8066823482513428, FT class 1 Entropy: 3.8065927028656006


# For a dataset:
1. Train various layer sizes of CAT and FT
2. Split Train/Test/Val datasets by class into seperate dataloaders
3. Evaluate the models with these subsets and gather attn distributions and calc entropy for each
4. store the distribution and entropies for each 

## {Model : {Split : {Class : {Attn : [], Entropy : val}}}}

In [10]:
class DataStructure:
    def __init__(self):
        self.data = {}  # Initialize the data structure

    def add_model(self, model_name):
        if model_name not in self.data:
            self.data[model_name] = {}  # Create a dictionary for the model

    def add_split(self, model_name, split_name):
        if model_name not in self.data:
            self.add_model(model_name)
        if split_name not in self.data[model_name]:
            self.data[model_name][split_name] = {}  # Create a dictionary for the split

    def add_class_data(self, model_name, split_name, class_name, attn_distributions, entropies):
        if model_name not in self.data:
            self.add_model(model_name)
        if split_name not in self.data[model_name]:
            self.add_split(model_name, split_name)
        self.data[model_name][split_name][class_name] = {
            'Attn': attn_distributions,
            'Entropy': entropies
        }

    def get_data(self):
        return self.data

In [40]:
def attn_entropy_get(log:DataStructure, trained_model, model_name, traindf:pd.DataFrame, testdf:pd.DataFrame, target, cat_columns, num_columns, device_in_use):
    classes = np.unique(df_train[target])

    for x in classes:
        class_train_samples = df_train.loc[df_train[target] == x]
        class_test_samples = df_test.loc[df_test[target] == x]

        class_train_dataset = Combined_Dataset(class_train_samples, cat_columns=cat_columns, num_columns=num_columns, task1_column=target)
        class_test_dataset = Combined_Dataset(class_test_samples, cat_columns=cat_columns, num_columns=num_columns, task1_column=target)

        class_train_dataloader = DataLoader(class_train_dataset, batch_size=len(class_train_dataset))
        class_test_dataloader = DataLoader(class_test_dataset, batch_size=len(class_test_dataset))
             
        train_acc, train_attn = evaluate(trained_model, class_train_dataloader, device_in_use)
        train_attn = train_attn.mean(0)
        test_acc, test_attn = evaluate(trained_model, class_test_dataloader, device_in_use)
        test_attn = test_attn.mean(0)

        log.add_class_data(model_name, "train", "class_"+str(x), train_attn, entropy(train_attn))
        log.add_class_data(model_name, "test", "class_"+str(x), test_attn, entropy(test_attn))

In [41]:
attn_log = DataStructure()

model_name = 'CAT'
target = 'income'

attn_entropy_get(attn_log, model_cat, model_name, df_train, df_test, target, cat_columns=cat_columns, num_columns=cont_columns, device_in_use= device_in_use)

data = attn_log.get_data()
data


{'CAT': {'train': {'class_0': {'Attn': array([0.07304911, 0.06869813, 0.0708304 , 0.07112652, 0.06817421,
           0.07270043, 0.07213499, 0.07222321, 0.07304791, 0.07126234,
           0.07427512, 0.07110429, 0.06870748, 0.07266589], dtype=float32),
    'Entropy': 3.8069122},
   'class_1': {'Attn': array([0.07013594, 0.06839065, 0.07032982, 0.07563202, 0.06845   ,
           0.07148976, 0.07168393, 0.07244731, 0.07478277, 0.07081585,
           0.07438321, 0.0708281 , 0.06841263, 0.07221805], dtype=float32),
    'Entropy': 3.8066483}},
  'test': {'class_0': {'Attn': array([0.07299901, 0.06870215, 0.07083803, 0.07106112, 0.06819244,
           0.07269025, 0.07214098, 0.07221694, 0.07305422, 0.07131784,
           0.07430075, 0.07111096, 0.06871327, 0.07266206], dtype=float32),
    'Entropy': 3.806914},
   'class_1': {'Attn': array([0.07009649, 0.0683968 , 0.07033315, 0.07574085, 0.06843449,
           0.07145561, 0.07172981, 0.07242835, 0.07471254, 0.07085246,
           0.07437216, 