# Imports

In [None]:
!pip install transformers



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras 
import torch
import transformers
import seaborn as sns
import random

# Data Loading

In [None]:
df = pd.read_csv("/content/drive/MyDrive/dev/sentiment.csv", names=['text', 'humor', 'category'])
df['category'] = df['category'].astype(int)

In [None]:
from sklearn.model_selection import train_test_split

RANDOM_SEED = np.random.randint(0, 1000)
print('random seed: ', RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

df_train, df_test = train_test_split(df, test_size=0.25, random_state=RANDOM_SEED)
df_val, df_test = train_test_split(df_test, test_size=0.7, random_state=RANDOM_SEED)
df_train.shape, df_val.shape, df_test.shape

random seed:  596


((1001, 3), (100, 3), (234, 3))

# Model Creation & Setup

In [None]:
from transformers import BertTokenizer
# import transformers.tokenization_utils_base.PreTrainedTokenizerBase
PRE_TRAINED_MODEL_NAME = 'bert-base-cased'
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)

In [None]:
from transformers import BertTokenizer, BertModel

PRE_TRAINED_MODEL_NAME = 'bert-base-cased'
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)

## The Dataset for sentiment analysis

*  All the tokens are padded to MAX_LEN value



In [None]:
class SentimentAnalysisDataset(torch.utils.data.Dataset):

  def __init__(self, texts, labels, tokenizer, max_length, batch_size):
    self.texts = texts
    self.labels = labels
    self.tokenizer = tokenizer
    self.max_len = max_length
  
  def __len__(self):
    return len(self.texts)
  
  def __getitem__(self, item):
    text = str(self.texts[item])
    label = self.labels[item]

    encoding = self.tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=self.max_len,
        return_token_type_ids=False,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )

    return {
        'text' : text,
        'input_ids' : encoding['input_ids'].flatten(),
        'attention_mask' : encoding['attention_mask'].flatten(),
        'labels' : torch.tensor(label, dtype=torch.long)
    }

## The DataLoader for the SentimentAnalysis dataset

In [None]:
from torch.utils.data import DataLoader

def create_data_loader(df, tokenizer, max_length, batch_size):
  ds = SentimentAnalysisDataset(
      texts=df.text.to_numpy(),
      labels=df.humor.to_numpy(),
      tokenizer=tokenizer,
      max_length=max_length,
      batch_size=batch_size
  )

  return DataLoader(
      ds,
      batch_size=batch_size,
      num_workers=2
  )

In [None]:
MAX_LEN = 50
BATCH_SIZE = 16

train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)

## The classifier consists of a:

*   Pre-Trained BERT Model
*   Dropout Layer (*p=0.1*)
*   Fully Connected Layer



In [None]:
class SentimentClassifier(torch.nn.Module):

  def __init__(self, n_classes):
    super(SentimentClassifier, self).__init__()
    self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
    self.drop = torch.nn.Dropout(p=0.1)
    self.out = torch.nn.Linear(self.bert.config.hidden_size, n_classes)
  
  def forward(self, input_ids, attention_mask):
    returned = self.bert(
        input_ids=input_ids,
        attention_mask=attention_mask
    )
    pooled_output = returned.pooler_output
    output = self.drop(pooled_output)
    return self.out(output)

In [None]:
model = SentimentClassifier(3)
device = torch.device(type='cuda', index=0)
model = model.to(device)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
from transformers import AdamW, get_linear_schedule_with_warmup
from torch.nn import CrossEntropyLoss

EPOCHS = 6

optimizer = AdamW(model.parameters(),
                  lr=2e-5,
                  eps = 1e-6,
                  correct_bias=True,
                  weight_decay=0.01)

total_steps = len(train_data_loader) * EPOCHS
warmup_steps = int(0.1 * total_steps)

scheduler = get_linear_schedule_with_warmup(
  optimizer,
  num_warmup_steps=warmup_steps,
  num_training_steps=total_steps
)

loss_fn = CrossEntropyLoss().to(device) 

In [None]:
def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples):
  model = model.train()

  losses = []
  correct_predictions = 0

  for batch in data_loader:
    b_input_ids = batch['input_ids'].to(device)
    b_att_mask = batch['attention_mask'].to(device)
    b_labels = batch['labels'].to(device)

    model.zero_grad()

    logits = model(b_input_ids, b_att_mask)

    _, preds = torch.max(logits, dim=1)
    loss = loss_fn(logits, b_labels)

    correct_predictions += torch.sum(preds == b_labels)
    losses.append(loss.item())

    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()
    

  return correct_predictions.double() / n_examples, np.mean(losses)

In [None]:
def eval_model(model, data_loader, loss_fn, device, n_examples):
  model = model.eval()

  losses = []
  correct_predictions = 0

  with torch.no_grad():
    for batch in data_loader:
      b_input_ids = batch['input_ids'].to(device)
      b_att_mask = batch['attention_mask'].to(device)
      b_labels = batch['labels'].to(device)

      logits = model(b_input_ids, b_att_mask)

      _, preds = torch.max(logits, dim=1)

      loss = loss_fn(logits, b_labels)
      
      losses.append(loss.item())
      correct_predictions += torch.sum(preds == b_labels)
      
  print('Final preds: ', correct_predictions.double())
  return correct_predictions.double() / n_examples, np.mean(losses)

In [None]:
def predict_labels(model, data_loader):
  
  predicted_labels = []
  real_labels = []

  with torch.no_grad():
    for batch in data_loader:

      b_input_ids = batch['input_ids'].to(device)
      b_att_mask = batch['attention_mask'].to(device)
      b_labels = batch['labels'].to(device)

      logits = model(b_input_ids, b_att_mask)

      _, preds = torch.max(logits, dim=1)

      probs = torch.nn.functional.softmax(logits, dim=1)

      predicted_labels.extend(preds)
      real_labels.extend(b_labels)
  
  predicted_labels = torch.stack(predicted_labels).cpu()
  real_labels = torch.stack(real_labels).cpu()
  return real_labels, predicted_labels

# Training Loop

In [None]:
%%time

for epoch in range(EPOCHS):
  print('-' * 20)
  print('Epoch: ', epoch+1)
  print('-' * 20)

  train_acc, train_loss = train_epoch(model,
                                      train_data_loader,
                                      loss_fn,
                                      optimizer,
                                      device,
                                      scheduler,
                                      len(df_train))
  
  print('Train loss: ', train_loss)
  print('Train acc: ', train_acc.item())

--------------------
Epoch:  1
--------------------
Train loss:  0.8486177240099225
Train acc:  0.5724275724275725
--------------------
Epoch:  2
--------------------
Train loss:  0.4819904636769068
Train acc:  0.7832167832167832
--------------------
Epoch:  3
--------------------
Train loss:  0.3164437946582597
Train acc:  0.8841158841158842
--------------------
Epoch:  4
--------------------
Train loss:  0.1801791617439853
Train acc:  0.9370629370629371
--------------------
Epoch:  5
--------------------
Train loss:  0.0804999265967617
Train acc:  0.981018981018981
--------------------
Epoch:  6
--------------------
Train loss:  0.06113733179749004
Train acc:  0.985014985014985
CPU times: user 1min 3s, sys: 2.35 s, total: 1min 6s
Wall time: 1min 7s


# Evaluation

In [None]:
test_acc, _ = eval_model(
  model,
  test_data_loader,
  loss_fn,
  device,
  len(df_test)
)

test_acc.item()

Final preds:  tensor(174., device='cuda:0', dtype=torch.float64)


0.7435897435897436

In [None]:
from sklearn.metrics import f1_score, precision_recall_fscore_support

true_labels, predicted_labels = predict_labels(model, test_data_loader)

f1_w = f1_score(true_labels, predicted_labels, average='weighted')
f1_micro = f1_score(true_labels, predicted_labels, average='micro')
f1_macro = f1_score(true_labels, predicted_labels, average='macro')
prec_rec_f1 = precision_recall_fscore_support(true_labels, predicted_labels, average='weighted')


print('F1-Score Weighted: ', f1_w)
print('F1-Score Micro: ', f1_micro)
print('F1-Score Macro: ', f1_macro)
print(prec_rec_f1)

F1-Score Weighted:  0.7414613845355712
F1-Score Micro:  0.7435897435897437
F1-Score Macro:  0.7398651152449418
(0.746448023767221, 0.7435897435897436, 0.7414613845355712, None)
