In [None]:
import numpy as np
import matplotlib.pyplot as plt
import glob
import cv2
import pandas as pd
import re
import torch
import transformers
import os

import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer

nltk.download('wordnet')
nltk.download('vader_lexicon')

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\surfy\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\surfy\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [None]:
googleColab = False

if googleColab:
  from google.colab import drive
  drive.mount('/content/drive')
  # CHANGE THIS PATH IF USING COLAB
  %cd "/content/drive/MyDrive/Msc Artificial Intelligence/Semester 1/Applied Artificial Intelligence/Assignment"

In [None]:
# This is the model class used to outline the classifier models architecture
class Model(torch.nn.Module):

  def __init__(self, miniLMInput=False):
    super().__init__()

    # Changes the input size for the distill bert and mini lm embeddings
    inputSize = 768

    if miniLMInput:
      inputSize = 384

    # Defines the size and amount of linear layers the model has
    self.linear1 = torch.nn.Linear(inputSize, 128)
    self.linear2 = torch.nn.Linear(128, 64)
    self.linear3 = torch.nn.Linear(64, 32)
    # self.linear4 = torch.nn.Linear(256, 128)
    # self.linear5 = torch.nn.Linear(128, 30)
    # self.linear6 = torch.nn.Linear(30, 10)
    self.output = torch.nn.Linear(32, 1)

    self.debug = False

  def forward(self, x):
    # Runs through each layer with a forward pass and return output
    x = self.linear1(x)
    x = torch.nn.functional.leaky_relu(x)

    if self.debug:
      print(f"L1: {x}")

    x = self.linear2(x)
    x = torch.nn.functional.leaky_relu(x)

    if self.debug:
      print(f"L2: {x}")

    #drop = torch.nn.Dropout(p=0.7)
    #x = drop(self.linear3(x))
    x = self.linear3(x)
    x = torch.nn.functional.leaky_relu(x)

    if self.debug:
      print(f"L3: {x}")

    # drop = torch.nn.Dropout(p=0.3)
    # x = drop(self.linear4(x))
    # x = torch.nn.functional.relu(x)

    # drop = torch.nn.Dropout(p=0.5)
    # x = drop(self.linear5(x))
    # x = torch.nn.functional.relu(x)

    # drop = torch.nn.Dropout(p=0.7)
    # x = drop(self.linear6(x))
    # x = torch.nn.functional.relu(x)

    x = self.output(x)

    if self.debug:
      print(f"x: {x}")

    return torch.nn.functional.sigmoid(x)

  def setDebug(self, value):
    # sets debug to value
    self.debug = value

##MINILM Training

In [None]:
# Gets mini lm test batches from files
aths = glob.glob("./datasetBothModels/miniLM/testbatches/*.npy")
testBatches = [path for path in paths if "labels" not in path]
testLabels = [path for path in paths if "labels" in path]

In [None]:
# Gets mini lm training batches from files
paths = glob.glob("./datasetBothModels/miniLM/batches/*.npy")
batches = [path for path in paths if "labels" not in path]
labels = [path for path in paths if "labels" in path]

In [None]:
# Sets the params for the model
params = {
    "learningRate": 0.001,
    "optimizer": "Adam"
}

# Create the classifier model and sends it to the gpu for training
miniLMModel = Model(miniLMInput=True).cuda()

# Defines the loss function (Binary Cross Entropy)
criterion = torch.nn.BCELoss()

# Defines the loss optimizer
optimizer = torch.optim.Adam(miniLMModel.parameters(), lr = params["learningRate"])

In [None]:
def testModel():
  # This function runs the models on a validation dataset to assess acurracy

  miniLMModel.eval()

  predicted = []
  truth = []

  # Runs through test batches and labels
  for batchPath, labelPath in zip(testBatches, testLabels):

    # Loads the bathes into numpy arrays
    x2 = np.load(batchPath)

    y2 = np.load(labelPath)
    y2 = y2.reshape(-1, 1)

    # Splits the minilm batches into smaller batches of 256
    miniX = int(x2.shape[0] / 4)

    for j in range(4):
      # Gets the 256 batches in seperate arrays
      idx = j*miniX
      y = torch.tensor(y2[idx:idx + miniX]).float()
      y = y.cuda()

      x = torch.tensor(x2[idx:idx + miniX])
      x = x.cuda()

      # Runs the models in evaluation mode on these batches
      with torch.no_grad():
        y_pred = miniLMModel(x)

      y = y.cpu()
      y_pred = y_pred.cpu()

      # Converts results to clamp to 0 or 1 to compare to the truth values
      y_pred = torch.where(y_pred <= 0.5,  0, 1)
      predicted = torch.cat((torch.tensor(predicted), y_pred))
      truth = torch.cat((torch.tensor(truth), y))

  # Calculates accuracy
  acc = (truth == predicted).sum().float()/len(truth)

  return acc

In [None]:
epochs = 50

predicted = []
truth = []
epochLosses = []

for i in range(epochs):

  # Dynamic loss if
  # if len(losses) > 50:
  #   if losses[-1] <= 0.15:
  #     optimizer = torch.optim.Adam(miniLMModel.parameters(), lr = (params["learningRate"] / 10))

  losses = []
  miniLMModel.train()
  for batchPath, labelPath in zip(batches, labels):

    # print(f"batch: {batchPath[-17:]}")
    # print(f"batch: {labelPath[-17:]}")

    x2 = np.load(batchPath)
    #x = x / np.linalg.norm(x)

    y2 = np.load(labelPath)
    y2 = y2.reshape(-1, 1)
    miniX = int(x2.shape[0] / 4)

    for j in range(4):

      idx = j*miniX
      y = torch.tensor(y2[idx:idx + miniX]).float()
      y = y.cuda()

      x = torch.tensor(x2[idx:idx + miniX])
      x = x.cuda()

      # Runs model of training batch
      y_pred = miniLMModel(x)

      #print(f"{x.size()} | {y.size()}")
      #print(f"yPred {y_pred}")
      #print(f"y {y}")
      #print(y_pred)
      #print(y)
      #print(torch.max(y_pred, 1)[1])
      # print(f"{y_pred.size()} | {y.size()}")
      # print(f"{y_pred.type()}) | {y.type()}")

      #print(y_pred.device)
      #print(y.device)

      # Calculates loss based on difference between prediction and actual truth value
      loss = criterion(y_pred, y)

      # Appends loss value for evaluation
      losses.append(loss.cpu().detach().numpy())

      #print(f"{y_pred.size()} | {y.size()}")
      #print(f"{y_pred[y_pred == 1]} \n {y[:10]}")
      #print(f"{y_pred[y_pred == 0]}")
      #print(f"loss: {loss}")
      #print((y==y_pred))
      #print(predicted)
      #print(y_pred)

      # Takes optimizer step in loss space (updates model weights)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      y = y.cpu()
      y_pred = y_pred.cpu()

      # Clamps y hat values to 0 or 1 for comparison with truth values
      y_pred = torch.where(y_pred <= 0.5,  0, 1)
      predicted = torch.cat((torch.tensor(predicted), y_pred))
      truth = torch.cat((torch.tensor(truth), y))
      # print(f"y_pred: {y_pred.shape}")
      # print(f"y: {y.shape}")
      # print(f"predicted: {predicted.shape}")
      # print(f"truth: {truth.shape}")

  # Calculates training accuracy
  acc = (truth == predicted).sum().float()/len(truth)

  # Validation test
  valAcc = testModel()

  # Appends epochs average loss for evaluation
  epochLosses.append(np.array(losses).mean())

  #losses = torch.cat((torch.tensor(losses), torch.tensor(loss.cpu().item())))

  print(f"Epoch {i} | Loss {loss.item()} | Accuracy {acc} | Validation Accuracy {valAcc}")

# Saves model with unique name to load later and evaluate
modelName = f"MINILM-{round(acc.item(), 3)}-{round(valAcc.item(), 3)}-{round(epochLosses[-1].item(), 3)}-{epochs}-{params['learningRate']}-{params['optimizer']}"

folderPath = f"./models/{modelName}/"

if not os.path.exists(folderPath):
    os.makedirs(folderPath)

torch.save(miniLMModel, f"{folderPath}{modelName}.pt")
np.save(f"{folderPath}/losses.npy", np.array(epochLosses))

  predicted = torch.cat((torch.tensor(predicted), y_pred))
  truth = torch.cat((torch.tensor(truth), y))
  predicted = torch.cat((torch.tensor(predicted), y_pred))
  truth = torch.cat((torch.tensor(truth), y))


Epoch 0 | Loss 0.664980947971344 | Accuracy 0.6379973292350769 | Validation Accuracy 0.7190755009651184
Epoch 1 | Loss 0.6216778755187988 | Accuracy 0.6469676494598389 | Validation Accuracy 0.7438151240348816
Epoch 2 | Loss 0.6051216125488281 | Accuracy 0.6512933373451233 | Validation Accuracy 0.7445746660232544
Epoch 3 | Loss 0.6023092269897461 | Accuracy 0.6537482142448425 | Validation Accuracy 0.7379557490348816
Epoch 4 | Loss 0.5936565399169922 | Accuracy 0.6552110314369202 | Validation Accuracy 0.7418619990348816
Epoch 5 | Loss 0.5970460772514343 | Accuracy 0.6566124558448792 | Validation Accuracy 0.7345920205116272
Epoch 6 | Loss 0.5988903641700745 | Accuracy 0.6578119397163391 | Validation Accuracy 0.7293837070465088
Epoch 7 | Loss 0.605821967124939 | Accuracy 0.6590312123298645 | Validation Accuracy 0.6955295205116272
Epoch 8 | Loss 0.6273400783538818 | Accuracy 0.6603162288665771 | Validation Accuracy 0.6214192509651184
Epoch 9 | Loss 0.6338008642196655 | Accuracy 0.6613250970

##DistillBert Training

In [None]:
# This code is the same as the mini lm code

paths = glob.glob("./datasetBothModels/distillBert/testbatches/*.npy")
testBatches = [path for path in paths if "labels" not in path]
testLabels = [path for path in paths if "labels" in path]

In [None]:
paths = glob.glob("./datasetBothModels/distillBert/batches/*.npy")
batches = [path for path in paths if "labels" not in path]
labels = [path for path in paths if "labels" in path]

In [None]:
params = {
    "learningRate": 0.01,
    "optimizer": "Adamax"
}

distillModel = Model().cuda()
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adamax(distillModel.parameters(), lr = params["learningRate"])

In [None]:
def testModelBert():

  distillModel.eval()

  predicted = []
  truth = []

  for batchPath, labelPath in zip(testBatches, testLabels):

    x2 = np.load(batchPath)

    y2 = np.load(labelPath)
    y2 = y2.reshape(-1, 1)

    y = torch.tensor(y2).float()
    y = y.cuda()

    x = torch.tensor(x2)
    x = x.cuda()

    with torch.no_grad():
      y_pred = distillModel(x)

    y = y.cpu()
    y_pred = y_pred.cpu()

    y_pred = torch.where(y_pred <= 0.5,  0, 1)
    predicted = torch.cat((torch.tensor(predicted), y_pred))
    truth = torch.cat((torch.tensor(truth), y))

  acc = (truth == predicted).sum().float()/len(truth)

  return acc

In [None]:
epochs = 50

predicted = []
truth = []
epochLosses = []

for i in range(epochs):

  # if len(losses) > 50:
  #   if losses[-1] <= 0.15:
  #     optimizer = torch.optim.Adam(distillModel.parameters(), lr = (params["learningRate"] / 10))

  losses = []
  distillModel.train()
  for batchPath, labelPath in zip(batches, labels):
    # Batches are no longer split into 4 as distill bert was saved in batches of 256 already

    # print(f"batch: {batchPath[-17:]}")
    # print(f"batch: {labelPath[-17:]}")

    x2 = np.load(batchPath)
    #x = x / np.linalg.norm(x)

    y2 = np.load(labelPath)
    y2 = y2.reshape(-1, 1)

    y = torch.tensor(y2).float()
    y = y.cuda()

    x = torch.tensor(x2)
    x = x.cuda()

    y_pred = distillModel(x)

    #print(f"{x.size()} | {y.size()}")
    #print(f"yPred {y_pred}")
    #print(f"y {y}")
    #print(y_pred)
    #print(y)
    #print(torch.max(y_pred, 1)[1])
    # print(f"{y_pred.size()} | {y.size()}")
    # print(f"{y_pred.type()}) | {y.type()}")

    #print(y_pred.device)
    #print(y.device)

    loss = criterion(y_pred, y)

    losses.append(loss.cpu().detach().numpy())

    #print(f"{y_pred.size()} | {y.size()}")
    #print(f"{y_pred[y_pred == 1]} \n {y[:10]}")
    #print(f"{y_pred[y_pred == 0]}")
    #print(f"loss: {loss}")
    #print((y==y_pred))
    #print(predicted)
    #print(y_pred)

    #loss = loss.cpu()

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    y = y.cpu()
    y_pred = y_pred.cpu()

    y_pred = torch.where(y_pred <= 0.5,  0, 1)
    predicted = torch.cat((torch.tensor(predicted), y_pred))
    truth = torch.cat((torch.tensor(truth), y))
    # print(f"y_pred: {y_pred.shape}")
    # print(f"y: {y.shape}")
    # print(f"predicted: {predicted.shape}")
    # print(f"truth: {truth.shape}")

  acc = (truth == predicted).sum().float()/len(truth)

  valAcc = testModelBert()

  epochLosses.append(np.array(losses).mean())

  #losses = torch.cat((torch.tensor(losses), torch.tensor(loss.cpu().item())))

  print(f"Epoch {i} | Loss {loss.item()} | Accuracy {acc} | Validation Accuracy {valAcc}")

modelName = f"DISTILLBERT-{round(acc.item(), 3)}-{round(valAcc.item(), 3)}-{round(epochLosses[-1].item(), 3)}-{epochs}-{params['learningRate']}-{params['optimizer']}"

folderPath = f"./models/{modelName}/"

if not os.path.exists(folderPath):
    os.makedirs(folderPath)

torch.save(distillModel, f"{folderPath}{modelName}.pt")
np.save(f"{folderPath}/losses.npy", np.array(epochLosses))

  predicted = torch.cat((torch.tensor(predicted), y_pred))
  truth = torch.cat((torch.tensor(truth), y))
  predicted = torch.cat((torch.tensor(predicted), y_pred))
  truth = torch.cat((torch.tensor(truth), y))


Epoch 0 | Loss 0.45294272899627686 | Accuracy 0.7359274625778198 | Validation Accuracy 0.6012369990348816
Epoch 1 | Loss 0.4427000880241394 | Accuracy 0.747971773147583 | Validation Accuracy 0.6047092080116272
Epoch 2 | Loss 0.4259592890739441 | Accuracy 0.7533921003341675 | Validation Accuracy 0.6073133945465088
Epoch 3 | Loss 0.39586180448532104 | Accuracy 0.7570412755012512 | Validation Accuracy 0.6057942509651184
Epoch 4 | Loss 0.41417789459228516 | Accuracy 0.7600300312042236 | Validation Accuracy 0.6064453125
Epoch 5 | Loss 0.4263767600059509 | Accuracy 0.7623080015182495 | Validation Accuracy 0.6048176884651184
Epoch 6 | Loss 0.41418543457984924 | Accuracy 0.7642327547073364 | Validation Accuracy 0.6115451455116272
Epoch 7 | Loss 0.4214744567871094 | Accuracy 0.7659767866134644 | Validation Accuracy 0.6117621660232544
Epoch 8 | Loss 0.3938171863555908 | Accuracy 0.7673934102058411 | Validation Accuracy 0.6116536259651184
Epoch 9 | Loss 0.40710151195526123 | Accuracy 0.7686608433