In [1]:
!pip install transformers #installing because we want to use pre_trained models, both from huggingface and our own models

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.0-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m38.8 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.4 tokenizers-0.13.3 transformers-4.28.0


In [2]:
import pickle
import torch
import numpy as np
import matplotlib.pyplot as plt
from transformers import DebertaForSequenceClassification, RobertaForSequenceClassification
from tqdm import tqdm

In [3]:
from google.colab import drive
drive.mount('/content/drive')
base_dir = '/content/drive/My Drive/ESC324projectdrive/dav/'

Mounted at /content/drive


In [4]:
#function to load pickle files
def load_pickle(file_path):
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    return data

#loading the pickle files
(train_dataloader_roberta, val_dataloader_roberta, test_dataloader_roberta) = load_pickle(base_dir + 'dataloaders_roberta_final.pickle')
(train_dataloader_deberta, val_dataloader_deberta, test_dataloader_deberta) = load_pickle(base_dir + 'dataloaders_deberta_final.pickle')

In [6]:
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

roberta_model = RobertaForSequenceClassification.from_pretrained(base_dir + 'trained_roberta_model')
deberta_model = DebertaForSequenceClassification.from_pretrained(base_dir + 'trained_deberta_model') 

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
deberta_model.to(device)
roberta_model.to(device)
print(["Using CPU", "Using Cuda"][int(torch.cuda.is_available())])


Using Cuda


In [7]:
#function to calculate accuracy of the model for a given batch of data
def accuracy(roberta_model_output, deberta_model_output, ground_truth_labels, rel_freq_heuristic):
  roberta_model_output = torch.softmax(roberta_model_output, dim = 1) * rel_freq_heuristic.cpu()
  roberta_model_pred = torch.argmax(roberta_model_output, dim=1)
  roberta_model_pred = np.array(roberta_model_pred)
  roberta_conf = torch.max(roberta_model_output, dim = 1)[0][0].item()

  deberta_model_output = torch.softmax(deberta_model_output, dim = 1) * rel_freq_heuristic.cpu()
  deberta_model_pred = torch.argmax(deberta_model_output, dim=1)
  deberta_model_pred = np.array(deberta_model_pred)
  deberta_conf = torch.max(deberta_model_output, dim = 1)[0][0].item()

  if roberta_conf > deberta_conf:
    final_output = roberta_model_pred
  else:
    final_output = deberta_model_pred

  ground_truth_labels = np.array(ground_truth_labels.detach().cpu())

  #checking how many values in model_output are equal to their corresponding values in ground_truth_labels
  num_correct = np.sum(final_output == ground_truth_labels)
  accuracy_val = num_correct / len(ground_truth_labels)
  return accuracy_val

In [8]:
roberta_batches = []
deberta_batches = []

#unloading the dataloaders
for batch in test_dataloader_roberta:
  roberta_batches.append(batch)

i = 0
for batch in test_dataloader_deberta:
  i+=1
  for k in range(len(batch)):
    temp_list = []
    for j in range(4):      
      if k >= len(batch[j]):
        continue
      temp_list.append(batch[j][k])
    deberta_batches.append(temp_list)


deberta_batches = deberta_batches[:-2]

del test_dataloader_roberta, test_dataloader_deberta

In [9]:
print(len(roberta_batches))
print(len(deberta_batches))

430
430


In [10]:
# Test loop
roberta_model.eval()
deberta_model.eval()

total_test_acc = 0

for index, (batch_roberta, batch_deberta) in enumerate(zip(roberta_batches, deberta_batches)):
    input_ids_roberta, attention_mask_roberta, labels_roberta, rel_freq_heuristic_roberta = batch_roberta
    input_ids_roberta, attention_mask_roberta, labels_roberta, rel_freq_heuristic_roberta = input_ids_roberta.to(device), attention_mask_roberta.to(device), labels_roberta.to(device), rel_freq_heuristic_roberta.to(device)

    input_ids_deberta, attention_mask_deberta, labels_deberta, rel_freq_heuristic_deberta = batch_deberta
    input_ids_deberta, attention_mask_deberta, labels_deberta, rel_freq_heuristic_deberta = input_ids_deberta.to(device), attention_mask_deberta.to(device), labels_deberta.to(device), rel_freq_heuristic_deberta.to(device)
   
    with torch.no_grad():
        roberta_outputs = roberta_model(input_ids_roberta, attention_mask = attention_mask_roberta, labels = labels_roberta)
        roberta_output_data = roberta_outputs[1].detach().cpu()

        deberta_outputs = deberta_model(input_ids_deberta.unsqueeze(0), attention_mask = attention_mask_deberta.unsqueeze(0), labels = labels_roberta)
        deberta_output_data = deberta_outputs[1].detach().cpu()

        #calculating accuracy for batch
        total_test_acc += accuracy(roberta_output_data, deberta_output_data, labels_roberta, rel_freq_heuristic_roberta)
    
    # Delete unnecessary values to save memory
    del input_ids_roberta, attention_mask_roberta, labels_roberta, rel_freq_heuristic_roberta, input_ids_deberta, attention_mask_deberta, labels_deberta, rel_freq_heuristic_deberta, roberta_outputs, deberta_outputs

#Calculate average test accuracy
avg_test_acc = total_test_acc / len(roberta_batches)
print(f"Test Accuracy: {avg_test_acc}")

Test Accuracy: 0.79176749495809666
