# Self Attention Classifier Training
Training a classifier using a pretrained autoencoder as a base

In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
from sklearn.metrics import f1_score

### Importing the data

In [2]:
X_train = np.load("data/Regular_processed/X_train.npy")
X_valid = np.load("data/Regular_processed/X_valid.npy")
y_train = np.load("data/Regular_processed/y_train.npy")
y_valid = np.load("data/Regular_processed/y_valid.npy")

for arr in [X_train, X_valid, y_train, y_valid]:
    print(arr.shape)

(988, 66)
(247, 66)
(988,)
(247,)


In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


### Defining the model

In [4]:
PATH_TO_MODELS = "models/Self_Attention_Classifier"
MODEL_NAME = "AtClass_66_24_8_ReLU_Sigmoid_10"  #InputColumns_HiddenSize0_HiddenSize1_ActivationFunction(Encoder&Hidden)_ActivationFunctionFinal_Dropout%
    
class Attention_Classifier(nn.Module):
    def __init__(self, input_columns=66, hidden_size=[24, 8], output_size=3, dropout_p=0.1, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.attention = nn.MultiheadAttention(embed_dim=1, num_heads=1, batch_first=True, dropout=dropout_p)
        self.classifier = nn.Sequential(
            nn.Linear(input_columns, hidden_size[0]),
            nn.ReLU(),
            nn.Dropout(dropout_p),
            nn.Linear(hidden_size[0], hidden_size[1]),
            nn.ReLU(),
            nn.Dropout(dropout_p),
            nn.Linear(hidden_size[1], output_size),
            nn.Sigmoid()
        )
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x_unsqueeze = torch.unsqueeze(x, dim=2)
        self_att_output, _ = self.attention(x_unsqueeze, x_unsqueeze, x_unsqueeze)
        self_att_output_squeezed = torch.squeeze(self_att_output)
        return self.classifier(self_att_output_squeezed)

In [5]:
class Horse_Health_Dataset(Dataset):
    def __init__(self, x: np.ndarray, y: np.ndarray):
        if x.shape[0] != y.shape[0]:
            raise Exception("Dataset Error: Sizes of X and y dont match")
        
        x_tensor = torch.from_numpy(x)
        y_tensor = torch.from_numpy(y)
        self.X = x_tensor.to(device)
        self.y = y_tensor.to(device)
        self.length = x.shape[0]

    def __len__(self):
        return self.length
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]
    
def create_dataloader(X, y, batch_size=128):
    """Returns a torch dataloader for the given dataset and batch_size"""
    dataset = Horse_Health_Dataset(X, y)
    dataloaders = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)

    return dataloaders

### Training Pipeline

In [6]:
class EarlyStopper:
    """Implements Early Stoppage of training when there is not progress in validation set"""
    def __init__(self, patience=10, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [7]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    train_loss, train_correct, train_f1 = 0, 0, 0
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        y = y.type(torch.LongTensor).to(device)
        X = X.type(torch.FloatTensor).to(device)
        pred = model(X)
        # print(f"pred size = {str(pred.size())}")
        # print(f"y size = {y.size()}")
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_correct += (torch.argmax(pred, dim=1) == y).sum().item()
        train_f1 += f1_score(y_pred=torch.argmax(pred, dim=1), y_true=y, average='micro')

    train_loss /= num_batches
    train_correct /= size
    train_f1 /= num_batches

    return train_loss, train_correct, train_f1

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, test_correct, test_f1 = 0, 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            y = y.type(torch.LongTensor).to(device)
            X = X.type(torch.FloatTensor).to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            test_correct += (torch.argmax(pred, dim=1) == y).sum().item()
            test_f1 += f1_score(y_pred=torch.argmax(pred, dim=1), y_true=y, average='micro')
            
    test_loss /= num_batches
    test_correct /= size
    test_f1 /= num_batches
    
    return test_loss, test_correct, test_f1

def train(train_dataloader: DataLoader, validation_dataloader: DataLoader, model: nn.Module, loss_fn, optimizer, epochs=100, patience=5):
    early_stopper = EarlyStopper(patience=patience)
    tr_loss, tr_accuracy, tr_f1 = [], [], []
    va_loss, va_accuracy, va_f1 = [], [], []
    for t in range(epochs):
        train_loss, train_correct, train_f1 = train_loop(train_dataloader, model, loss_fn, optimizer)
        valid_loss, valid_correct, valid_f1 = test_loop(validation_dataloader, model, loss_fn)

        tr_loss.append(train_loss), tr_accuracy.append(train_correct), tr_f1.append(train_f1)
        va_loss.append(valid_loss), va_accuracy.append(valid_correct), va_f1.append(valid_f1)

        print(f"Epoch {t+1}: Train_accuracy: {(100*train_correct):>0.2f}%, Train_loss: {train_loss:>8f} Train_F1_batchwise: {train_f1:>0.2f}, Validation_accuracy: {(100*valid_correct):>0.2f}%, Validation_loss: {valid_loss:>8f}, Validation_F1_batchwise :{valid_f1:>0.2f}")

        if (t + 1) % 5 == 0:
            torch.save(model.state_dict(), f"{PATH_TO_MODELS}/{MODEL_NAME}_epoch_{t+1}.pt")

        if early_stopper.early_stop(valid_loss):
            print("Early Stopping Cutoff!")
            break

    return tr_accuracy, tr_loss, tr_f1, va_accuracy, va_loss, va_f1

### Training the architecture

In [8]:
# Create the dataloaders
train_dataloader = create_dataloader(batch_size=128, X=X_train, y=y_train)
valid_dataloader = create_dataloader(batch_size=128, X=X_valid, y=y_valid)

# Create the model
model = Attention_Classifier()
model.to(device)

# Define the optimizer and loss function
LEARNING_RATE = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_fn = nn.CrossEntropyLoss()  # Cannot use Cross Entropy Loss with Softmax

# Train the model
train_accuracy, train_loss, train_f1, valid_accuracy, valid_loss, valid_f1 = train(train_dataloader=train_dataloader, validation_dataloader=valid_dataloader,
                                                                                   model=model, loss_fn=loss_fn, optimizer=optimizer, epochs=100, patience=10)

Epoch 1: Train_accuracy: 34.21%, Train_loss: 1.091768 Train_F1_batchwise: 0.35, Validation_accuracy: 48.99%, Validation_loss: 1.059820, Validation_F1_batchwise :0.49
Epoch 2: Train_accuracy: 45.85%, Train_loss: 1.056583 Train_F1_batchwise: 0.46, Validation_accuracy: 48.58%, Validation_loss: 1.039802, Validation_F1_batchwise :0.49
Epoch 3: Train_accuracy: 45.85%, Train_loss: 1.051872 Train_F1_batchwise: 0.46, Validation_accuracy: 48.99%, Validation_loss: 1.043078, Validation_F1_batchwise :0.49
Epoch 4: Train_accuracy: 45.34%, Train_loss: 1.055975 Train_F1_batchwise: 0.45, Validation_accuracy: 48.58%, Validation_loss: 1.039779, Validation_F1_batchwise :0.49
Epoch 5: Train_accuracy: 45.55%, Train_loss: 1.050688 Train_F1_batchwise: 0.46, Validation_accuracy: 48.99%, Validation_loss: 1.033206, Validation_F1_batchwise :0.49
Epoch 6: Train_accuracy: 45.85%, Train_loss: 1.048824 Train_F1_batchwise: 0.46, Validation_accuracy: 48.99%, Validation_loss: 1.037905, Validation_F1_batchwise :0.49
Epoc

In [9]:
# Check final F1 Score
final_valid_dataloader = create_dataloader(batch_size=len(X_valid), X=X_valid, y=y_valid)
test_loss, test_correct, test_f1 = test_loop(final_valid_dataloader, model, loss_fn)
print(f"Final Validation F1 Score is: {test_f1}")

Final Validation F1 Score is: 0.6842105263157895
