# Imports

In [None]:
!pip install transformers



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras 
import torch
import transformers
import seaborn as sns
import random

# Data Loading

In [None]:
def to_bool(col):
  for i in range(len(col)):
    if col[i] == 'True':
      col[i] = True
    elif col[i] == 'False':
      col[i] = False
  return col

In [None]:
df = pd.read_csv('/content/drive/MyDrive/dev/f_correct_training.csv', names=['text', 'humor'])
df['humor'] = to_bool(np.asanyarray(df['humor']))
df['humor'] = df['humor'].astype(int)

In [None]:
from sklearn.model_selection import train_test_split

RANDOM_SEED = np.random.randint(0, 1000)
print('random seed: ', RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

df_train, df_test = train_test_split(df, test_size=0.25, random_state=RANDOM_SEED)
df_val, df_test = train_test_split(df_test, test_size=0.7, random_state=RANDOM_SEED)
df_train.shape, df_val.shape, df_test.shape

random seed:  918


((1001, 2), (100, 2), (234, 2))

# Model Creation & Setup

In [None]:
from transformers import BertTokenizer, BertModel

PRE_TRAINED_MODEL_NAME = 'bert-base-cased'
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)

## The Dataset for humor detection

*  All the tokens are padded to MAX_LEN value



In [None]:
class HumorDetectionDataset(torch.utils.data.Dataset):

  def __init__(self, texts, labels, tokenizer, max_length, batch_size):
    self.texts = texts
    self.labels = labels
    self.tokenizer = tokenizer
    self.max_len = max_length
  
  def __len__(self):
    return len(self.texts)
  
  def __getitem__(self, item):
    text = str(self.texts[item])
    label = self.labels[item]

    encoding = self.tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=self.max_len,
        return_token_type_ids=False,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )

    return {
        'text' : text,
        'input_ids' : encoding['input_ids'].flatten(),
        'attention_mask' : encoding['attention_mask'].flatten(),
        'labels' : torch.tensor(label, dtype=torch.long)
    }

## The DataLoader for the HumorDetection dataset

In [None]:
from torch.utils.data import DataLoader

def create_data_loader(df, tokenizer, max_length, batch_size):
  ds = HumorDetectionDataset(
      texts=df.text.to_numpy(),
      labels=df.humor.to_numpy(),
      tokenizer=tokenizer,
      max_length=max_length,
      batch_size=batch_size
  )

  return DataLoader(
      ds,
      batch_size=batch_size,
      num_workers=2
  )

In [None]:
MAX_LEN = 50
BATCH_SIZE = 16

train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)

## The classifier consists of a:

*   Pre-Trained BERT Model
*   Dropout Layer (*p=0.1*)
*   Fully Connected Layer



In [None]:
class HumorClassifier(torch.nn.Module):

  def __init__(self, n_classes):
    super(HumorClassifier, self).__init__()
    self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
    self.drop = torch.nn.Dropout(p=0.1)
    self.out = torch.nn.Linear(self.bert.config.hidden_size, n_classes)
  
  def forward(self, input_ids, attention_mask):
    returned = self.bert(
        input_ids=input_ids,
        attention_mask=attention_mask
    )
    pooled_output = returned.pooler_output
    output = self.drop(pooled_output)
    return self.out(output)

In [None]:
model = HumorClassifier(2)
device = torch.device(type='cuda', index=0)
model = model.to(device)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Initialization of parameters for the model and training

*  Learning rate = 2e-5
*  Bias correction = True
*  Weight decay = 0.01
*  warmup = 10% of total steps


In [None]:
from transformers import AdamW, get_linear_schedule_with_warmup
from torch.nn import CrossEntropyLoss

EPOCHS = 6

optimizer = AdamW(model.parameters(),
                  lr=2e-5,
                  eps = 1e-6,
                  correct_bias=True,
                  weight_decay=0.01)

total_steps = len(train_data_loader) * EPOCHS
warmup_steps = int(0.1 * total_steps)

scheduler = get_linear_schedule_with_warmup(
  optimizer,
  num_warmup_steps=warmup_steps,
  num_training_steps=total_steps
)

loss_fn = CrossEntropyLoss().to(device) 

In [None]:
def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples):
  model = model.train()

  losses = []
  correct_predictions = 0

  for batch in data_loader:
    b_input_ids = batch['input_ids'].to(device)
    b_att_mask = batch['attention_mask'].to(device)
    b_labels = batch['labels'].to(device)

    model.zero_grad()

    logits = model(b_input_ids, b_att_mask)

    _, preds = torch.max(logits, dim=1)
    loss = loss_fn(logits, b_labels)

    correct_predictions += torch.sum(preds == b_labels)
    losses.append(loss.item())

    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()
    

  return correct_predictions.double() / n_examples, np.mean(losses)

In [None]:
def eval_model(model, data_loader, loss_fn, device, n_examples):
  model = model.eval()

  losses = []
  correct_predictions = 0

  with torch.no_grad():
    for batch in data_loader:
      b_input_ids = batch['input_ids'].to(device)
      b_att_mask = batch['attention_mask'].to(device)
      b_labels = batch['labels'].to(device)

      logits = model(b_input_ids, b_att_mask)

      _, preds = torch.max(logits, dim=1)

      loss = loss_fn(logits, b_labels)
      
      losses.append(loss.item())
      correct_predictions += torch.sum(preds == b_labels)
      
  print('Final preds: ', correct_predictions.double())
  return correct_predictions.double() / n_examples, np.mean(losses)

In [None]:
def predict_labels(model, data_loader):
  
  predicted_labels = []
  real_labels = []

  with torch.no_grad():
    for batch in data_loader:

      b_input_ids = batch['input_ids'].to(device)
      b_att_mask = batch['attention_mask'].to(device)
      b_labels = batch['labels'].to(device)

      logits = model(b_input_ids, b_att_mask)

      _, preds = torch.max(logits, dim=1)

      probs = torch.nn.functional.softmax(logits, dim=1)

      predicted_labels.extend(preds)
      real_labels.extend(b_labels)
  
  predicted_labels = torch.stack(predicted_labels).cpu()
  real_labels = torch.stack(real_labels).cpu()
  return real_labels, predicted_labels

# Training Loop

In [None]:
%%time

for epoch in range(EPOCHS):
  print('-' * 20)
  print('Epoch: ', epoch+1)
  print('-' * 20)

  train_acc, train_loss = train_epoch(model,
                                      train_data_loader,
                                      loss_fn,
                                      optimizer,
                                      device,
                                      scheduler,
                                      len(df_train))
  
  print('Train loss: ', train_loss)
  print('Train acc: ', train_acc.item())

--------------------
Epoch:  1
--------------------
Train loss:  0.6745135576005966
Train acc:  0.6033966033966034
--------------------
Epoch:  2
--------------------
Train loss:  0.4975763942514147
Train acc:  0.7832167832167832
--------------------
Epoch:  3
--------------------
Train loss:  0.32147454955275095
Train acc:  0.8841158841158842
--------------------
Epoch:  4
--------------------
Train loss:  0.17788400205354843
Train acc:  0.949050949050949
--------------------
Epoch:  5
--------------------
Train loss:  0.12058858339866949
Train acc:  0.968031968031968
--------------------
Epoch:  6
--------------------
Train loss:  0.09693766955936713
Train acc:  0.973026973026973
CPU times: user 1min 5s, sys: 2.38 s, total: 1min 7s
Wall time: 1min 8s


# Evaluation

In [None]:
test_acc, _ = eval_model(
  model,
  test_data_loader,
  loss_fn,
  device,
  len(df_test)
)

test_acc.item()

Final preds:  tensor(183., device='cuda:0', dtype=torch.float64)


0.7820512820512822

In [None]:
from sklearn.metrics import f1_score, precision_recall_fscore_support

real_labels, predicted_labels = predict_labels(model, test_data_loader)

f1 = f1_score(real_labels, predicted_labels)
prec_rec_f1 = precision_recall_fscore_support(real_labels, predicted_labels, average='binary')

print('F1-Score: ', f1)
print(prec_rec_f1)

F1-Score:  0.7999999999999999
(0.8360655737704918, 0.7669172932330827, 0.7999999999999999, None)
