#Setup

In [None]:
import torch
from transformers import AutoModel, AutoTokenizer
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd

# Hyperparameters
batch_size = 32
epochs = 10
learning_rate = 2e-3

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_check_point = 'bert-base-uncased'

# Initialse base model and tokenizer
base_model = AutoModel.from_pretrained(model_check_point).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_check_point)

# Freeze base model parameters
for param in base_model.parameters():
  param.requires_grad = False

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

#Classification Head

In [None]:
class MLPClassifier(nn.Module):
  def __init__(self):
      super().__init__()
      self.layers = nn.Sequential(
        nn.Linear(768, 256),
        nn.Dropout(0.3),
        nn.BatchNorm1d(256),
        nn.ReLU(),
        nn.Linear(256, 128),
        nn.Dropout(0.3),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, 1),
        nn.Sigmoid()
      )

  def forward(self, x):
      return self.layers(x)

#Training Data Preprocessing

In [None]:
# Load data
df = pd.read_csv('./train.csv')
premises = df['premise'].tolist()
hypotheses = df['hypothesis'].tolist()
labels = df['label'].tolist()

# Tokenise input sequence
tr_encoded_pair = tokenizer(premises, hypotheses, padding='max_length', truncation=True, max_length=128, return_tensors='pt')

token_ids = tr_encoded_pair['input_ids']
attention_masks = tr_encoded_pair['attention_mask'] # "0" for padded values and "1" for the other values
token_type_ids = tr_encoded_pair['token_type_ids']  # "0" for the first sentence and "1" for the second sentence

# Convert labels to tensors
label_tensors = torch.tensor(labels, dtype=torch.float32).reshape(-1, 1)

# Create TensorDataset
train_dataset = TensorDataset(token_ids, attention_masks, token_type_ids, label_tensors)

# Define data loader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

#Training

In [None]:
# Initialise classifier
classifier = MLPClassifier().to(device)

# Define loss function and optimiser
criterion = nn.BCELoss() # Binary cross entropy loss
optimizer = torch.optim.Adam(classifier.parameters(), lr=learning_rate)

# Training loop
classifier.train()
for epoch in range(epochs):
  epoch_loss = 0
  num_batches = 0
  last = torch.empty((0, 16), dtype=torch.float32)
  for inputs, attention_masks, token_type_ids, labels in train_loader:
    inputs, attention_masks, token_type_ids, labels = inputs.to(device), attention_masks.to(device), token_type_ids.to(device), labels.to(device)

    # Forward pass
    outputs = base_model(input_ids=inputs, attention_mask=attention_masks, token_type_ids=token_type_ids)  # Generate word embeddings
    pooled_output = outputs[0].mean(dim=1)  # Average pooling the last hidden states generated by base model
    logits = classifier(pooled_output)

    # Compute loss
    loss = criterion(logits, labels)
    epoch_loss += loss.item()
    num_batches += 1

    # Backward pass and optimisation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  print(f"Epoch {epoch+1}, Training Loss: {epoch_loss / num_batches}")

#Evaluation

In [None]:
from sklearn.metrics import confusion_matrix, f1_score, recall_score, precision_score, accuracy_score

# Load dev dataset
df = pd.read_csv('./dev.csv', keep_default_na=False)
dev_premises = df['premise'].tolist()
dev_hypotheses = df['hypothesis'].tolist()
dev_labels = df['label'].tolist()

# Tokenise input sequence
dev_encoded_pair = tokenizer(dev_premises, dev_hypotheses, padding='max_length', truncation=True, max_length=128, return_tensors='pt')
dev_token_ids = dev_encoded_pair['input_ids']
dev_attention_masks = dev_encoded_pair['attention_mask']
dev_token_type_ids = dev_encoded_pair['token_type_ids']
dev_label_tensors = torch.tensor(dev_labels, dtype=torch.float32).reshape(-1,1)

# Data loader
dev_dataset = TensorDataset(dev_token_ids, dev_attention_masks, dev_token_type_ids, dev_label_tensors)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)

y_pred = []
y_true = []

classifier.eval()
with torch.no_grad():
  for inputs, attention_masks, token_type_ids, labels in dev_loader:
    inputs, attention_masks, token_type_ids, labels = inputs.to(device), attention_masks.to(device), token_type_ids.to(device), labels.to(device)

    # Forward pass
    outputs = base_model(input_ids=inputs, attention_mask=attention_masks, token_type_ids=token_type_ids)
    pooled_output = outputs[0].mean(dim=1)  # Average pooling
    logits = classifier(pooled_output)
    pre = torch.round(logits).data.cpu().numpy()  # 1: entailment; 0: non-entailment
    y_pred.extend(pre)

    # Ground truth labels
    labels = labels.data.cpu().numpy()
    y_true.extend(labels)

cf_matrix = confusion_matrix(y_true, y_pred)
print("Confusion Matrix of the Test Set")
print("-----------")
print(cf_matrix)
print('F1-Score macro: ',f1_score(y_true, y_pred, average='macro'))
print('Recall-Score macro: ',recall_score(y_true, y_pred, average='macro'))
print('Precision-Score macro: ',precision_score(y_true, y_pred, average='macro'))
print('Accuracy-Score: ',accuracy_score(y_true, y_pred))

Confusion Matrix of the Test Set
-----------
[[2172 1087]
 [1028 2450]]
F1-Score macro:  0.6855265688712298
Recall-Score macro:  0.6854449685137861
Precision-Score macro:  0.685713705117331
Accuracy-Score:  0.6860620454208104
