### Objective: transfer learning with pooled output
Try fragment level classification on all three variants of CamelBERT (ca, msa, mix)

In [None]:

import pandas as pd
from transformers import AutoModel, AutoTokenizer
import numpy as np
import torch
import torch.nn as nn
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight
import pytorch_lightning as pl
from torch.utils.data import DataLoader, TensorDataset
import math
from torch.optim.lr_scheduler import ReduceLROnPlateau
from pytorch_lightning.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=8, verbose=True, mode='min')
import torch.nn.functional as F
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.decomposition import PCA

### Data, CAMEL BERT setup

In [None]:
base_path = '../../data/'

dev_levelled = pd.read_csv(base_path + 'dev_levelled.csv')

train_levelled = pd.read_csv(base_path + 'train_levelled.csv')

test_levelled = pd.read_csv(base_path + 'test_levelled.csv')

dev_levelled = dev_levelled[dev_levelled.apply(lambda x: type(x['text']) == str, axis = 1)]
train_levelled = train_levelled[train_levelled.apply(lambda x: type(x['text']) == str, axis = 1)]
test_levelled = test_levelled[test_levelled.apply(lambda x: type(x['text']) == str, axis = 1)]


camelbert_msa = 'CAMeL-Lab/bert-base-arabic-camelbert-msa'
camelbert_ca = 'CAMeL-Lab/bert-base-arabic-camelbert-ca'
camelbert_mix = 'CAMeL-Lab/bert-base-arabic-camelbert-mix'

#### Normalize Levels, Dataloader Function

In [None]:
train_levelled['level_norm'] = train_levelled.apply(lambda row: row['level'] - 3, axis = 1)
test_levelled['level_norm'] = test_levelled.apply(lambda row: row['level'] - 3, axis = 1)
dev_levelled['level_norm'] = dev_levelled.apply(lambda row: row['level'] - 3, axis = 1)

In [None]:
def get_all_dataloaders(train, test, dev, tokenizer):
  batch_size = 32

  train_text, train_labels = train['text'], train['level_norm']
  test_text, test_labels = test['text'], test['level_norm']
  dev_text, dev_labels = dev['text'], dev['level_norm']
  tokens_train = tokenizer.batch_encode_plus(
      train_text.tolist(), max_length = 20, pad_to_max_length=True, truncation=True
  )
  tokens_test = tokenizer.batch_encode_plus(
      test_text.tolist(), max_length = 20, pad_to_max_length=True, truncation=True
  )
  tokens_dev = tokenizer.batch_encode_plus(
      dev_text.tolist(), max_length = 20, pad_to_max_length=True, truncation=True
  )
  train_seq = torch.tensor(tokens_train['input_ids'])
  train_mask = torch.tensor(tokens_train['attention_mask'])
  train_y = torch.tensor(train_labels.tolist())

  dev_seq = torch.tensor(tokens_dev['input_ids'])
  dev_mask = torch.tensor(tokens_dev['attention_mask'])
  dev_y = torch.tensor(dev_labels.tolist())

  test_seq = torch.tensor(tokens_test['input_ids'])
  test_mask = torch.tensor(tokens_test['attention_mask'])
  test_y = torch.tensor(test_labels.tolist())

  class_weights = compute_class_weight(class_weight = 'balanced', classes = np.unique(train_labels), y = train_labels)
  weights = torch.tensor(class_weights,dtype=torch.float)

  train_data = TensorDataset(train_seq, train_mask, train_y)
  dev_data = TensorDataset(dev_seq, dev_mask, dev_y)
  test_data = TensorDataset(test_seq, test_mask, test_y)

  train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=2)
  dev_loader = DataLoader(dev_data, batch_size=batch_size, shuffle=False, num_workers=2)
  test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=2)

  return train_loader, dev_loader, test_loader, weights

### Experimental setup

#### Model Architecture (pooler)

In [None]:
class SimplerReadabilityBertModel(pl.LightningModule):
  def __init__(self, bert, class_weights, lr = 1e-7):
    super(SimplerReadabilityBertModel, self).__init__()
    self.bert = bert
    self.layer_1 = torch.nn.Linear(768, 3)
    self.activation = nn.LogSoftmax(dim=1)

    self.lossFn = nn.NLLLoss(weight = class_weights)
    self.lr = lr

    self.all_pred = []
    self.all_gt = []
    self.all_train_loss = []
    self.all_dev_loss = []
    self.initialize_weights()

  def forward(self, tokens, mask):
    bert_output = self.bert(tokens, attention_mask = mask)
    x = self.layer_1(bert_output.pooler_output)
    x = self.activation(x)

    return x

  def loss(self, probs, labels):
    return self.lossFn(probs, labels)

  def training_step(self, train, i):
    x, mask, y = train
    probs = self.forward(x, mask)
    loss = self.loss(probs, y)
    self.log('train_loss', loss)
    self.all_train_loss.append(loss)
    return loss

  def validation_step(self, val, i):
    x, mask, y = val
    probs = self.forward(x, mask)
    loss = self.loss(probs, y)
    self.log('val_loss', loss)
    self.all_dev_loss.append(loss)
    return loss

  def test_step(self, test, i):
    x, mask, y = test
    probs = self(x, mask)

    self.all_pred.append(probs)
    self.all_gt.append(y)

    loss = self.loss(probs, y)
    self.log('test_loss', loss)

  def initialize_weights(self):
    nn.init.xavier_uniform_(self.layer_1.weight)
    nn.init.zeros_(self.layer_1.bias)


  def configure_optimizers(self):
      optimizer = torch.optim.Adam(self.parameters(), lr = self.lr)
      scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=4, verbose=True)
      return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "val_loss"}

#### Experimental protocol

In [None]:
def train(model, train_loader, dev_loader, trainer):
  trainer.fit(model, train_loader, dev_loader)

def benchmark(model, test_loader, trainer):
  results = trainer.test(model, test_loader)
  all_pred_test = np.concatenate([x.detach().cpu() for x in model.all_pred])
  all_gt_test = np.concatenate([x.detach().cpu() for x in model.all_gt])
  pred_labels = np.apply_along_axis(np.argmax, axis = 1, arr = all_pred_test)
  return [pred_labels, classification_report(all_gt_test, pred_labels, output_dict = True)]

def run_experiment(model, train_set, test_set, dev_set):
  print('Importing model and tokenizer...')
  bert_model = AutoModel.from_pretrained(model)
  tokenizer = AutoTokenizer.from_pretrained(model)
  device = torch.device("cuda")
  bert_model = bert_model.to(device)
  print('Setting up data...')
  train_dl, dev_dl, test_dl, weights = get_all_dataloaders(train_set, test_set, dev_set, tokenizer)
  early_stopping = EarlyStopping(monitor='val_loss', patience=8, verbose=True, mode='min')
  print('Setting up architecture...')
  arch = SimplerReadabilityBertModel(bert_model, weights, 5e-5)
  trainer = pl.Trainer(callbacks=[early_stopping],accelerator="gpu", max_epochs = 10)
  print('Training start')
  train(arch, train_dl, dev_dl, trainer)
  return benchmark(arch, test_dl, trainer)

## EXPERIMENTS

In [None]:
all_res = [run_experiment(camelbert_ca, train_levelled, test_levelled, dev_levelled),
run_experiment(camelbert_msa, train_levelled, test_levelled, dev_levelled),
run_experiment(camelbert_mix, train_levelled, test_levelled, dev_levelled)]

class_reports = [r[1] for r in all_res]
results = [r[0] for r in all_res]

In [None]:
def results_to_csv(result_arr):
  all_rows = []
  for resu in result_arr:
    inv_report = resu

    arr_inv = np.concatenate([[inv_report[x]['f1-score'],
            inv_report[x]['precision'],
            inv_report[x]['recall'],] for x in ['0', '1', '2']])
    arr_inv = np.append(arr_inv, inv_report['accuracy'])
    arr_inv = np.append(arr_inv, inv_report['macro avg']['f1-score'])

    all_rows.append(arr_inv)

  return all_rows



In [None]:
all_rows = results_to_csv(class_reports)

In [None]:
df_results = pd.DataFrame(all_rows, columns = ['f1_3','3_prec','3_recall','f1_4','4_prec','4_recall','f1_5','5_prec','5_recall','accuracy','f1_macro'])

In [None]:
df_results