#Student Numbers:

18071472

18017060

21104272

21047887

# Download Related Packages/Files

Lecturers and TAs, you should run this to get the essential packages and trained models for testing.

      1. Upgrade gdown (For downloading 3 and 4)
      2. Download HuggingFace's Transformers package
      3. Download the Pre-processed Data
      4. Download the Trained Models

In [None]:
# Upgrade gdown
!pip install --upgrade --no-cache-dir gdown

# Download HuggingFace's package
!pip install transformers

# Download Data
!gdown 1nWNc8QmJBb3oMvUUjTkLbxM7RnaqL4da

# If the above one doesn't work, try this one
# !gdown 1hD2hO8qMaHFzjK2_vZb7eF61oSW7hV_9

!unzip data.zip

# Download trained models
!gdown 1mSRIFcseKq3Xje3CFGWFVyD82Vzr8D-m

# If the above one doesn't work, try this one
# !gdown 1uMRUVn_Aif-FMpLOa1TxfjRubHi3Us0M

!unzip models.zip

# Mount Google Drive

For Lecturers and TAs, you don't have to run this part.

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

# Import

Lecturers and TAs, you should run this to import important packages.

In [None]:
import torch.nn as nn
from transformers import AutoModel
from transformers import BertTokenizerFast
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.optim as optim
import torch
import os
import numpy as np
import matplotlib.pyplot as plt

# DataLoader

For Lecturers and TAs, you don't have to run this part, since the testing script doesn't use the dataloader. If you want to test other parts like *model training*, *model evaluation* and *Identify Successful/Fail Cases for Each Algorithm*, you can run this.

      How data is loaded can be seen in this part.

In [None]:
class SST2_Dataset(Dataset):
    def __init__(self, file_path, is_QA=False):

        self.file = pd.read_csv(file_path, sep='\t', header=None)
        self.file = self.file.to_numpy()
        self.length = self.file.shape[0]
        self.is_QA = is_QA

    def __len__(self):
        return self.length

    def __getitem__(self, idx):        
        
        if self.is_QA:
            x = [self.file[idx,0], self.file[idx,1]]
            y = self.file[idx,2]
        else:
            x = self.file[idx,1]
            y = self.file[idx,0]
        
        return x, y

# Model Implementation

Lecturers and TAs, you should run this to get the structure of the Models. Otherwise, the model cannot be loaded.

      How the models are implemented can be seen in this part.

In [None]:
class BertClassifier(nn.Module):

    def __init__(self, num_classes, mode, BertModel='bert-base-uncased'):
        super(BertClassifier, self).__init__()
        
        # Load Pretrained Bert Model
        self.BertModel = AutoModel.from_pretrained(BertModel)
        self.classifier = nn.Linear(768, num_classes)
        self.mode = mode

    def forward(self, x):

        if self.mode == 'plain' or self.mode == 'qa':
            out = self.BertModel(**x)[1]
            logits = self.classifier(out)
        elif self.mode == 'all':
            out = self.BertModel(**x)[0]
            out = torch.relu(torch.mean(out, dim=1))
            logits = self.classifier(out)
        else:
            raise Exception('Mode doesn\'t exist!')
        return logits
    
    def predict(self, x, tokeniser, device, candidate_answers = None):
        
        if self.mode == 'plain' or self.mode == 'all':
            temp = tokeniser(x, return_tensors='pt').to(device)
            return torch.argmax(self.forward(temp).reshape(-1), 0)
        else:
            if candidate_answers == None:
                print('No candidate answers have been provided')
            elif len(candidate_answers) != 2:
                print("In this version, only binary candidate answers are supported")
            else:

                temp = []

                for candidate_answer in candidate_answers:

                    x_args = tokenizer(x, candidate_answer,return_tensors='pt').to(device)
                    temp.append(self.forward(x_args).reshape(-1))

                return torch.argmax(torch.tensor(temp), 0)

# Model Training

For Lecturers and TAs, you don't have to run this part.

      1. Partial fine-tuning and complete fine-tuning are in this part. 
      2. Two stage training are in this part.
      3. The logic for training all models are in this part.
      4. Save best models and checkpoints are in this part.

## Training Block

In [None]:
def train_bert_classifier(
    tokenizer, 
    model, 
    model_name, 
    bert_learning_rate,
    classifier_learning_rate,
    train_set, 
    val_set, 
    batch_size, 
    num_epochs, 
    device, 
    criterion, 
    checkpoint_path, 
    resume_training, 
    save_epoch,
    last_backup,
    train_part,
    is_QA = False
):

    trainloader = DataLoader(train_set, batch_size=batch_size,shuffle=True)
    validloader = DataLoader(val_set, batch_size=batch_size,shuffle=True)

    if train_part:
        for param in model.parameters():
            param.requires_grad = False
        for param in model.BertModel.encoder.layer[-1].parameters():
            param.requires_grad = True
        for param in model.BertModel.pooler.parameters():
            param.requires_grad = True
        for param in model.classifier.parameters():
            param.requires_grad = True
    else:
        for param in model.parameters():
            param.requires_grad = True
            
    bert_params = model.BertModel.parameters()
    bert_optimizer = optim.Adam(bert_params, lr=bert_learning_rate)
    classifier_params = model.classifier.parameters()
    classifier_optimizer = optim.Adam(classifier_params, lr=classifier_learning_rate)

    train_loss = []
    valid_loss = []
    prev_epoch = 0
    min_valid_loss = float('inf')

    if resume_training:

        try:
            checkpoint = torch.load(f'{checkpoint_path}_{last_backup}.pt',map_location=device)
        except:
            checkpoint = torch.load(f'{checkpoint_path}_{1-last_backup}.pt',map_location=device)

        model.load_state_dict(checkpoint['model_state_dict'])
        bert_optimizer.load_state_dict(checkpoint['bert_optimizer_state_dict'])
        classifier_optimizer.load_state_dict(checkpoint['classifier_optimizer_state_dict'])
        prev_epoch = checkpoint['epoch']
        train_loss = checkpoint['training_loss']
        valid_loss = checkpoint['validation_loss']
        min_valid_loss = checkpoint['min_valid_loss'] 
        del checkpoint

    model.to(device)

    counter = 0
    backup = 0
    for epoch in range(prev_epoch, num_epochs):
        
        # Training mode
        model.train()
        
        running_loss = 0

        for i, train_data in enumerate(trainloader):

            inputs, labels = train_data

            if is_QA:
                x_args = tokenizer([[inputs[0][j], str(inputs[1][j])] for j in range(len(labels))],return_tensors='pt',padding=True).to(device)
                labels = labels.double().to(device)
            else:
                x_args = tokenizer(list(inputs),return_tensors='pt',padding=True).to(device)
                labels = labels.to(device)
            
            bert_optimizer.zero_grad()
            classifier_optimizer.zero_grad()

            if is_QA:
                outputs=model(x_args).reshape(-1)
            else:
                outputs=model(x_args)
            loss=criterion(outputs, labels)
            loss.backward()
            bert_optimizer.step()
            classifier_optimizer.step()
            running_loss += loss.item()

        print(f'epoch {epoch+1}, training loss = {running_loss/(i+1)}')
        train_loss.append(running_loss/(i+1))

        # Evaluation Mode
        model.eval()
        running_loss = 0
        for i, val_data in enumerate(validloader):

            inputs, labels = val_data
            
            if is_QA:
                x_args = tokenizer([[inputs[0][j], str(inputs[1][j])] for j in range(len(labels))],return_tensors='pt',padding=True).to(device)
                labels = labels.double().to(device)
            else:
                x_args = tokenizer(list(inputs),return_tensors='pt',padding=True).to(device)
                labels = labels.to(device)

            if is_QA:
                outputs=model(x_args).reshape(-1)
            else:
                outputs=model(x_args)

            loss=criterion(outputs, labels)
            running_loss += loss.item()  

        # Save the best model
        if running_loss/(i+1) < min_valid_loss:
            print(f'epoch {epoch+1}, validation loss = {running_loss/(i+1)}, lowest validation loss = True, save model')  
            torch.save(model.state_dict(), f'{model_name}.pt')    
            min_valid_loss = running_loss/(i+1)
        else:
            print(f'epoch {epoch+1}, validation loss = {running_loss/(i+1)}, lowest validation loss = False, do not save model')
        valid_loss.append(running_loss/(i+1))
        
        counter += 1

        # Regularly save models between save_epoch epochs, for resuming training
        if counter == save_epoch:
            counter = 0
            torch.save({
            'epoch': epoch+1,
            'model_state_dict': model.state_dict(),
            'bert_optimizer_state_dict': bert_optimizer.state_dict(),
            'classifier_optimizer_state_dict': classifier_optimizer.state_dict(),
            'training_loss': train_loss,
            'validation_loss': valid_loss,
            'min_valid_loss': min_valid_loss
            }, f'{checkpoint_path}_{backup}.pt')
            print(f'Model checkpoint has been saved to {checkpoint_path}_{backup}.pt')
            if backup == 0:
                backup = 1
            else:
                backup = 0
        with open(f'{model_name}_train_loss.txt', 'w') as f:
          for line in train_loss:
              f.write(str(line))
              f.write(' ')
        with open(f'{model_name}_val_loss.txt', 'w') as f:
          for line in valid_loss:
              f.write(str(line))
              f.write(' ')
        print('Loss has been updated.')

    return model, train_loss, valid_loss

## Set Hyperparameters

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs = 10
bert_learning_rate = 2e-5
classifier_learning_rate = 1e-3
batch_size = 70
criterion = nn.CrossEntropyLoss()
criterion_QA = nn.BCEWithLogitsLoss()
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

## Binary Classification Model Training

### Load Binary Dataset for BertPlain and BertAll

In [None]:
binary_train_set = SST2_Dataset('data/binary_train.csv')
binary_val_set = SST2_Dataset('data/binary_val.csv')
# binary_test_set = SST2_Dataset('data/binary_test.csv')

### BertPlain (Complete fine-tuning)

In [None]:
binaryBertPlain = BertClassifier(num_classes=2, mode='plain')

In [None]:
binaryBertPlain, binaryBertPlain_train_loss, binaryBertPlain_valid_loss = train_bert_classifier(
    tokenizer=tokenizer,
    model=binaryBertPlain, 
    model_name='/content/gdrive/MyDrive/binaryBertPlain', 
    bert_learning_rate=bert_learning_rate,
    classifier_learning_rate=classifier_learning_rate,
    train_set=binary_train_set, 
    val_set=binary_val_set, 
    batch_size=batch_size, 
    num_epochs=num_epochs, 
    device=device, 
    criterion=criterion, 
    checkpoint_path='/content/gdrive/MyDrive/binaryBertPlain_checkpoint',
    resume_training=False, # Set this to True if you want to restore training
    save_epoch=10,
    last_backup=0,
    train_part=False
)

### BertPlain (Partial fine-tuning)

In [None]:
binaryBertPlainPart = BertClassifier(num_classes=2, mode='plain')

In [None]:
binaryBertPlainPart, binaryBertPlainPart_train_loss, binaryBertPlainPart_valid_loss = train_bert_classifier(
    tokenizer=tokenizer,
    model=binaryBertPlainPart, 
    model_name='/content/gdrive/MyDrive/binaryBertPlainPart', 
    bert_learning_rate=bert_learning_rate,
    classifier_learning_rate=classifier_learning_rate,
    train_set=binary_train_set, 
    val_set=binary_val_set, 
    batch_size=batch_size, 
    num_epochs=num_epochs, 
    device=device, 
    criterion=criterion, 
    checkpoint_path='/content/gdrive/MyDrive/binaryBertPlainPart_checkpoint',
    resume_training=False, # Set this to True if you want to restore training
    save_epoch=10,
    last_backup=0,
    train_part=True
)

### BertALL (Complete fine-tuning)

In [None]:
binaryBertALL = BertClassifier(num_classes=2, mode='all')

In [None]:
binaryBertALL, binaryBertALL_train_loss, binaryBertALL_valid_loss = train_bert_classifier(
    tokenizer=tokenizer, 
    model=binaryBertALL, 
    model_name='/content/gdrive/MyDrive/binaryBertALL', 
    bert_learning_rate=bert_learning_rate,
    classifier_learning_rate=classifier_learning_rate,
    train_set=binary_train_set, 
    val_set=binary_val_set, 
    batch_size=batch_size, 
    num_epochs=num_epochs, 
    device=device, 
    criterion=criterion, 
    checkpoint_path='/content/gdrive/MyDrive/binaryBertALL_checkpoint',
    resume_training=False, # Set this to True if you want to restore training
    save_epoch=10,
    last_backup=0,
    train_part=False
)

### BertALL (Partial fine-tuning)

In [None]:
binaryBertALLPart = BertClassifier(num_classes=2, mode='all')

In [None]:
binaryBertALLPart, binaryBertALLPart_train_loss, binaryBertALLPart_valid_loss = train_bert_classifier(
    tokenizer=tokenizer, 
    model=binaryBertALLPart, 
    model_name='/content/gdrive/MyDrive/binaryBertALLPart', 
    bert_learning_rate=bert_learning_rate,
    classifier_learning_rate=classifier_learning_rate,
    train_set=binary_train_set, 
    val_set=binary_val_set, 
    batch_size=batch_size, 
    num_epochs=num_epochs, 
    device=device, 
    criterion=criterion, 
    checkpoint_path='/content/gdrive/MyDrive/binaryBertALLPart_checkpoint',
    resume_training=False, # Set this to True if you want to restore training
    save_epoch=10,
    last_backup=0,
    train_part=True
)

### Load Binary Dataset for BertQA with True/False Answer

In [None]:
TF_binary_train_set = SST2_Dataset('data/binary_train_QA_TF.csv', True)
TF_binary_val_set = SST2_Dataset('data/binary_val_QA_TF.csv', True)
# TF_binary_test_set = SST2_Dataset('data/binary_test_QA_TF.csv', True)

### BertQA with TF (Complete fine-tuning)

In [None]:
binaryBertQATF = BertClassifier(num_classes=1, mode='qa')

In [None]:
binaryBertQATF, binaryBertQATF_train_loss, binaryBertQATF_valid_loss = train_bert_classifier(
    tokenizer=tokenizer, 
    model=binaryBertQATF, 
    model_name='/content/gdrive/MyDrive/binaryBertQATF', 
    bert_learning_rate=bert_learning_rate,
    classifier_learning_rate=classifier_learning_rate,
    train_set=TF_binary_train_set, 
    val_set=TF_binary_val_set, 
    batch_size=batch_size, 
    num_epochs=num_epochs, 
    device=device, 
    criterion=criterion_QA, 
    checkpoint_path='/content/gdrive/MyDrive/binaryBertQATF_checkpoint',
    resume_training=False, # Set this to True if you want to restore training
    save_epoch=10,
    last_backup=0,
    train_part=False,
    is_QA=True
)

### BertQA with TF (Partial fine-tuning)

In [None]:
binaryBertQATFPart = BertClassifier(num_classes=1, mode='qa')

In [None]:
binaryBertQATFPart, binaryBertQATFPart_train_loss, binaryBertQATFPart_valid_loss = train_bert_classifier(
    tokenizer=tokenizer, 
    model=binaryBertQATFPart, 
    model_name='/content/gdrive/MyDrive/binaryBertQATFPart', 
    bert_learning_rate=bert_learning_rate,
    classifier_learning_rate=classifier_learning_rate,
    train_set=TF_binary_train_set, 
    val_set=TF_binary_val_set, 
    batch_size=batch_size, 
    num_epochs=num_epochs, 
    device=device, 
    criterion=criterion_QA, 
    checkpoint_path='/content/gdrive/MyDrive/binaryBertQATFPart_checkpoint',
    resume_training=False, # Set this to True if you want to restore training
    save_epoch=10,
    last_backup=0,
    train_part=True,
    is_QA=True
)

### Load Binary Dataset for BertQA with Positive/Negative Answer

In [None]:
PN_binary_train_set = SST2_Dataset('data/binary_train_QA_PN.csv', True)
PN_binary_val_set = SST2_Dataset('data/binary_val_QA_PN.csv', True)
# PN_binary_test_set = SST2_Dataset('data/binary_test_QA_PN.csv', True)

### BertQA with PN (Complete fine-tuning)

In [None]:
binaryBertQAPN = BertClassifier(num_classes=1, mode='qa')

In [None]:
binaryBertQAPN, binaryBertQAPN_train_loss, binaryBertQAPN_valid_loss = train_bert_classifier(
    tokenizer=tokenizer, 
    model=binaryBertQAPN, 
    model_name='/content/gdrive/MyDrive/binaryBertQAPN', 
    bert_learning_rate=bert_learning_rate,
    classifier_learning_rate=classifier_learning_rate,
    train_set=PN_binary_train_set, 
    val_set=PN_binary_val_set, 
    batch_size=batch_size, 
    num_epochs=num_epochs, 
    device=device, 
    criterion=criterion_QA, 
    checkpoint_path='/content/gdrive/MyDrive/binaryBertQAPN_checkpoint',
    resume_training=False, # Set this to True if you want to restore training
    save_epoch=10,
    last_backup=0,
    train_part=False,
    is_QA=True
)

### BertQA with PN (Partial fine-tuning)

In [None]:
binaryBertQAPNPart = BertClassifier(num_classes=1, mode='qa')

In [None]:
binaryBertQAPNPart, binaryBertQAPNPart_train_loss, binaryBertQAPNPart_valid_loss = train_bert_classifier(
    tokenizer=tokenizer, 
    model=binaryBertQAPNPart, 
    model_name='/content/gdrive/MyDrive/binaryBertQAPNPart', 
    bert_learning_rate=bert_learning_rate,
    classifier_learning_rate=classifier_learning_rate,
    train_set=PN_binary_train_set, 
    val_set=PN_binary_val_set, 
    batch_size=batch_size, 
    num_epochs=num_epochs, 
    device=device, 
    criterion=criterion_QA, 
    checkpoint_path='/content/gdrive/MyDrive/binaryBertQAPNPart_checkpoint',
    resume_training=False, # Set this to True if you want to restore training
    save_epoch=10,
    last_backup=0,
    train_part=True,
    is_QA=True
)

# Model Evaluation

For Lecturers and TAs, you don't have to run this part.

      1. This part is used to get the test results, as well as the loss plot.
      2. Each Model is deleted from memory after its evaluation. Otherwise the memory will not be enough.

In [None]:
def retrieve_prediction(model, test_set, batch_size, tokenizer, device, is_QA=False, QA_keywords=None):

    model.to(device)
    model.eval()
    testloader = DataLoader(test_set, batch_size)
    y_pred = []
    y_true = []
    
    if is_QA:
        
        for i, test_data in enumerate(testloader):

            inputs, labels = test_data
            temp = []

            for key_word_ind in range(len(QA_keywords)):

                x_args = tokenizer([[inputs[j], QA_keywords[key_word_ind]] for j in range(len(labels))],return_tensors='pt',padding=True).to(device)
                outputs=model(x_args)
                temp.append(outputs)

            y_pred.extend(torch.argmax(torch.concat(temp,dim=1),dim=1))
            y_true.extend(list(labels))

    else:

        for i, test_data in enumerate(testloader):

            inputs, labels = test_data
            x_args = tokenizer(list(inputs),return_tensors='pt',padding=True).to(device)
            labels = labels.to(device)
            outputs = torch.argmax(model(x_args),dim=1)
            y_pred.extend(list(outputs))
            y_true.extend(list(labels))

    return torch.tensor(y_pred), torch.tensor(y_true)

In [None]:
def get_metrics(y_pred, y_true):
    accuracy = torch.sum(y_pred == y_true)/((torch.sum(y_pred == y_true))+torch.sum(y_pred != y_true))
    precision = torch.sum(y_pred[y_true == 1] == 1)/torch.sum(y_true == 1)
    recall = torch.sum(y_pred[y_true == 1] == 1)/(torch.sum(y_pred[y_true == 1] == 1) + torch.sum(y_pred[y_true == 0] == 1))
    F_1 = 2*precision*recall/(precision+recall)
    return precision, recall, accuracy, F_1

In [None]:
def report_result(model_name, precision, recall, accuracy, F_1):
    print(f'Model = {model_name}, Precision = {precision}, Recall = {recall}, Accuracy = {accuracy}, F1 Score = {F_1}')

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 70
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

## Binary

In [None]:
binary_test_set = SST2_Dataset('data/binary_test.csv')

In [None]:
model_names = ['binaryBertPlain','binaryBertALL','binaryBertQATF','binaryBertQAPN','binaryBertPlainPart', 'binaryBertALLPart', 'binaryBertQATFPart', 'binaryBertQAPNPart']

### Loss

In [None]:
def plot_loss(train_losses, valid_losses, model_names):
    plt.figure(figsize=(7,7))
    for i in range(len(model_names)):
        plt.plot(train_losses[i], linestyle='-', c=f'C{i}', label=f'{model_names[i][6:]}')
        plt.plot(valid_losses[i], linestyle='--', c=f'C{i}')
    plt.ylabel('Loss')
    plt.xlabel('# of Epochs')
    plt.legend()
    plt.show()

In [None]:
train_losses = []
valid_losses = []
for model_name in model_names:
    with open(f'models/{model_name}_train_loss.txt') as f:
      train_losses.append(list(map(float,f.readline().split()))[:10])
    with open(f'models/{model_name}_val_loss.txt') as f:
      valid_losses.append(list(map(float,f.readline().split()))[:10])

In [None]:
plot_loss(train_losses, valid_losses, model_names)

### BertPlain (Complete fine-tuning)

In [None]:
binaryBertPlain = BertClassifier(num_classes=2, mode='plain').to(device)
binaryBertPlain.load_state_dict(torch.load('models/binaryBertPlain.pt',map_location=device))
y_pred, y_true = retrieve_prediction(binaryBertPlain, binary_test_set, batch_size, tokenizer, device)
del binaryBertPlain

In [None]:
precision, recall, accuracy, F_1 = get_metrics(y_pred, y_true)
report_result('binaryBertPlain', precision, recall, accuracy, F_1)

### BertPlain (Partial fine-tuning)

In [None]:
binaryBertPlainPart = BertClassifier(num_classes=2, mode='plain').to(device)
binaryBertPlainPart.load_state_dict(torch.load('models/binaryBertPlainPart.pt',map_location=device))
y_pred, y_true = retrieve_prediction(binaryBertPlainPart, binary_test_set, batch_size, tokenizer, device)
del binaryBertPlainPart

In [None]:
precision, recall, accuracy, F_1 = get_metrics(y_pred, y_true)
report_result('binaryBertPlainPart', precision, recall, accuracy, F_1)

### BertALL (Complete fine-tuning)

In [None]:
binaryBertALL = BertClassifier(num_classes=2, mode='all').to(device)
binaryBertALL.load_state_dict(torch.load('models/binaryBertALL.pt',map_location=device))
y_pred, y_true = retrieve_prediction(binaryBertALL, binary_test_set, batch_size, tokenizer, device)
del binaryBertALL

In [None]:
precision, recall, accuracy, F_1 = get_metrics(y_pred, y_true)
report_result('binaryBertALL', precision, recall, accuracy, F_1)

### BertALL (Partial fine-tuning)

In [None]:
binaryBertALLPart = BertClassifier(num_classes=2, mode='all').to(device)
binaryBertALLPart.load_state_dict(torch.load('models/binaryBertALLPart.pt',map_location=device))
y_pred, y_true = retrieve_prediction(binaryBertALLPart, binary_test_set, batch_size, tokenizer, device)
del binaryBertALLPart

In [None]:
precision, recall, accuracy, F_1 = get_metrics(y_pred, y_true)
report_result('binaryBertALLPart', precision, recall, accuracy, F_1)

### BertQA with TF (Complete fine-tuning)

In [None]:
binaryBertQATF = BertClassifier(num_classes=1, mode='qa').to(device)
binaryBertQATF.load_state_dict(torch.load('models/binaryBertQATF.pt',map_location=device))
y_pred, y_true = retrieve_prediction(binaryBertQATF, binary_test_set, batch_size, tokenizer, device, True, ['False', 'True'])
del binaryBertQATF

In [None]:
precision, recall, accuracy, F_1 = get_metrics(y_pred, y_true)
report_result('binaryBertQATF', precision, recall, accuracy, F_1)

### BertQA with TF (Partial fine-tuning)

In [None]:
binaryBertQATFPart = BertClassifier(num_classes=1, mode='qa').to(device)
binaryBertQATFPart.load_state_dict(torch.load('models/binaryBertQATFPart.pt',map_location=device))
y_pred, y_true = retrieve_prediction(binaryBertQATFPart, binary_test_set, batch_size, tokenizer, device, True, ['False', 'True'])
del binaryBertQATFPart

In [None]:
precision, recall, accuracy, F_1 = get_metrics(y_pred, y_true)
report_result('binaryBertQATFPart', precision, recall, accuracy, F_1)

### BertQA with PN (Complete fine-tuning)

In [None]:
binaryBertQAPN = BertClassifier(num_classes=1, mode='qa').to(device)
binaryBertQAPN.load_state_dict(torch.load('models/binaryBertQAPN.pt',map_location=device))
y_pred, y_true = retrieve_prediction(binaryBertQAPN, binary_test_set, batch_size, tokenizer, device, True, ['Negative','Positive'])
del binaryBertQAPN

In [None]:
precision, recall, accuracy, F_1 = get_metrics(y_pred, y_true)
report_result('binaryBertQAPN', precision, recall, accuracy, F_1)

### BertQA with PN (Partial fine-tuning)

In [None]:
binaryBertQAPNPart = BertClassifier(num_classes=1, mode='qa').to(device)
binaryBertQAPNPart.load_state_dict(torch.load('models/binaryBertQAPNPart.pt',map_location=device))
y_pred, y_true = retrieve_prediction(binaryBertQAPNPart, binary_test_set, batch_size, tokenizer, device, True, ['Negative','Positive'])
del binaryBertQAPNPart

In [None]:
precision, recall, accuracy, F_1 = get_metrics(y_pred, y_true)
report_result('binaryBertQAPNPart', precision, recall, accuracy, F_1)

# Identify Successful/Fail Cases for Each Algorithm

For Lecturers and TAs, you don't have to run this part.

      How the samples of qualitative analysis are retrieved is in this part.

In [None]:
binary_test_set = SST2_Dataset('data/binary_test.csv')
testloader = DataLoader(binary_test_set, 1)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
binaryBertPlain = BertClassifier(num_classes=2, mode='plain').to(device)
binaryBertPlain.load_state_dict(torch.load('models/binaryBertPlain.pt',map_location=device))
binaryBertPlain.eval()
binaryBertALL = BertClassifier(num_classes=2, mode='all').to(device)
binaryBertALL.load_state_dict(torch.load('models/binaryBertALL.pt',map_location=device))
binaryBertALL.eval()
binaryBertQATF = BertClassifier(num_classes=1, mode='qa').to(device)
binaryBertQATF.load_state_dict(torch.load('models/binaryBertQATF.pt',map_location=device))
binaryBertQATF.eval()
binaryBertQAPN = BertClassifier(num_classes=1, mode='qa').to(device)
binaryBertQAPN.load_state_dict(torch.load('models/binaryBertQAPN.pt',map_location=device))
binaryBertQAPN.eval()

In [None]:
pred = pd.DataFrame({'Sentence':[],'Label':[],'BERT-Plain-C':[],'BERT-ALL-C':[],'BERT-QA-TF-C':[],'BERT-QA-PN-C':[]})
for i, testdata in enumerate(testloader):
    x, y = testdata
    x = x[0]
    binaryBertPlainresult = binaryBertPlain.predict(x, tokenizer, device)
    binaryBertALLresult = binaryBertALL.predict(x, tokenizer, device)
    binaryBertQATFresult = binaryBertQATF.predict(x, tokenizer, device, ['False', 'True'])
    binaryBertQAPNresult = binaryBertQAPN.predict(x, tokenizer, device, ['Negative', 'Positive'])
    df = pd.DataFrame({'Sentence':[x],'Label':[int(y)],'BERT-Plain-C':[int(binaryBertPlainresult)],'BERT-ALL-C':[int(binaryBertALLresult)],'BERT-QA-TF-C':[int(binaryBertQATFresult)],'BERT-QA-PN-C':[int(binaryBertQAPNresult)]})
    pred = pd.concat([pred,df])
    # print(f'Sentence={x}, Correct Label={y}, PlainLabel={binaryBertPlainresult}, ALLLabel={binaryBertALLresult}, QATFLabel={binaryBertQATFresult}, QAPNLabel={binaryBertQAPNresult}')

In [None]:
pred.to_csv('pred.csv',index=False)

In [None]:
del binaryBertPlain
del binaryBertALL
del binaryBertQATF
del binaryBertQAPN
del pred

# Model Testing

Lecturers and TAs, in the following cells you can test the model performance by writing a sentence.

      1. The implementation is not batched, so only one sentence can be put into them. An example of testing each of the model is given in the cells below. 
      2. In this training set, 0 represents positive sentiment while 1 represents negative sentiment. It's important to put the correct according candidate answers for BERT-QA.
      3. It is crucial to release the memory if you want to test many models.


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

In [None]:
def report_sentiment(sentence, sentiment):

    if sentiment == 0:
        print(f'Sentence \"{sentence}\" shows a positive sentiment.')
    else:
        print(f'Sentence \"{sentence}\" shows a negative sentiment.')

### BertPlain (Complete fine-tuning)

In [None]:
binaryBertPlain = BertClassifier(num_classes=2, mode='plain').to(device)
binaryBertPlain.load_state_dict(torch.load('models/binaryBertPlain.pt',map_location=device))
binaryBertPlain.eval()

In [None]:
sentence = "NLP is interesting!"
sentiment = binaryBertPlain.predict(sentence, tokenizer, device)

In [None]:
report_sentiment(sentence, sentiment)

In [None]:
del binaryBertPlain

### BertPlain (Partial fine-tuning)

In [None]:
binaryBertPlainPart = BertClassifier(num_classes=2, mode='plain').to(device)
binaryBertPlainPart.load_state_dict(torch.load('models/binaryBertPlainPart.pt',map_location=device))
binaryBertPlainPart.eval()

In [None]:
sentence = "NLP is interesting!"
sentiment = binaryBertPlainPart.predict(sentence, tokenizer, device)

In [None]:
report_sentiment(sentence, sentiment)

In [None]:
del binaryBertPlainPart

### BertALL (Complete fine-tuning)

In [None]:
binaryBertALL = BertClassifier(num_classes=2, mode='all').to(device)
binaryBertALL.load_state_dict(torch.load('models/binaryBertALL.pt',map_location=device))
binaryBertALL.eval()

In [None]:
sentence = "NLP is interesting!"
sentiment = binaryBertALL.predict(sentence, tokenizer, device)

In [None]:
report_sentiment(sentence, sentiment)

In [None]:
del binaryBertALL

### BertALL (Partial fine-tuning)

In [None]:
binaryBertALLPart = BertClassifier(num_classes=2, mode='all').to(device)
binaryBertALLPart.load_state_dict(torch.load('models/binaryBertALLPart.pt',map_location=device))
binaryBertALLPart.eval()

In [None]:
sentence = "NLP is interesting!"
sentiment = binaryBertALLPart.predict(sentence, tokenizer, device)

In [None]:
report_sentiment(sentence, sentiment)

In [None]:
del binaryBertALLPart

### BertQA with TF (Complete fine-tuning)

In [None]:
binaryBertQATF = BertClassifier(num_classes=1, mode='qa').to(device)
binaryBertQATF.load_state_dict(torch.load('models/binaryBertQATF.pt',map_location=device))
binaryBertQATF.eval()

In [None]:
sentence = "NLP is interesting!"
sentiment = binaryBertQATF.predict(sentence, tokenizer, device, ['False', 'True'])

In [None]:
report_sentiment(sentence, sentiment)

In [None]:
del binaryBertQATF

### BertQA with TF (Partial fine-tuning)

In [None]:
binaryBertQATFPart = BertClassifier(num_classes=1, mode='qa').to(device)
binaryBertQATFPart.load_state_dict(torch.load('models/binaryBertQATFPart.pt',map_location=device))
binaryBertQATFPart.eval()

In [None]:
sentence = "NLP is interesting!"
sentiment = binaryBertQATFPart.predict(sentence, tokenizer, device, ['False', 'True'])

In [None]:
report_sentiment(sentence, sentiment)

In [None]:
del binaryBertQATFPart

### BertQA with PN (Complete fine-tuning)

In [None]:
binaryBertQAPN = BertClassifier(num_classes=1, mode='qa').to(device)
binaryBertQAPN.load_state_dict(torch.load('models/binaryBertQAPN.pt',map_location=device))
binaryBertQAPN.eval()

In [None]:
sentence = "NLP is interesting!"
sentiment = binaryBertQAPN.predict(sentence, tokenizer, device, ['Negative', 'Positive'])

In [None]:
report_sentiment(sentence, sentiment)

In [None]:
del binaryBertQAPN

### BertQA with PN (Partial fine-tuning)

In [None]:
binaryBertQAPNPart = BertClassifier(num_classes=1, mode='qa').to(device)
binaryBertQAPNPart.load_state_dict(torch.load('models/binaryBertQAPNPart.pt',map_location=device))
binaryBertQAPNPart.eval()

In [None]:
sentence = "NLP is interesting!"
sentiment = binaryBertQAPNPart.predict(sentence, tokenizer, device, ['Negative', 'Positive'])

In [None]:
report_sentiment(sentence, sentiment)

In [None]:
del binaryBertQAPNPart