# Test running pretrained BERT on reddit data
- Maybe start adding multi level attention blocks at the end?

## Imports

In [1]:
try:
    import transformers
except ImportError as e:
    print('transformers not installed')
    print('Installing now...')
    !pip install -q git+https://github.com/huggingface/transformers.git
    pass

In [2]:
import reddit_bert_functions as fun
from bert_sarcasm_model import bert_for_sarcasm

In [3]:
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
from torch.utils.data import Dataset,DataLoader,TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score
import transformers
import json
from tqdm.notebook import tqdm
from transformers.utils.dummy_pt_objects import AutoModelForSequenceClassification
from transformers import AutoModelForTokenClassification,AutoConfig, AutoModel,AutoTokenizer,BertModel,BertConfig,AdamW, get_constant_schedule,BertForSequenceClassification,get_linear_schedule_with_warmup
import random
import torch
import torch.nn as nn
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


## Import reddit dataset, split and create PyTorch data class objects

### Test data import

In [5]:
csv_path = 'train-balanced-sarcasm.csv'
x_train, y_train, x_val, y_val, x_test, y_test = fun.split_reddit_data(csv_path)

In [None]:
#word count per each sample
count = x_train.str.split().str.len()
plt.hist(count, bins=30, range=(0, 100))

In [None]:
#check if even split between labels
import seaborn as sns
fig, axes = plt.subplots(1, 3, figsize=(16, 7))
fig.suptitle('Distribution of Classes Reddit\n0:Non-Sarcastic vs. 1:Sarcastic accross data splits')
sns.countplot(ax=axes[0], x=y_train)
axes[0].set_title('Training Set')
sns.countplot(ax=axes[1], x=y_val)
axes[1].set_title('Validation Set')
sns.countplot(ax=axes[2], x=y_test)
axes[2].set_title('Test Set')


### Tokenize

In [6]:
max_length = 35  #based on word count bar plot above, 35 is reasonable

#tokenizer = AutoTokenizer.from_pretrained('bert-large-uncased')
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

reddit_train = fun.Reddit(x_train, y_train, tokenizer, max_length)
reddit_val = fun.Reddit(x_val, y_val, tokenizer, max_length)
reddit_test = fun.Reddit(x_test, y_test, tokenizer, max_length)

In [7]:
batch_size = 64
num_workers = 2
trainloader, validationloader, testloader = fun.get_data_loaders(reddit_train, reddit_val, reddit_test, batch_size, num_workers)


## Create BERT model

In [8]:
bertconfig = BertConfig()
#bert_large = BertModel.from_pretrained("bert-large-uncased")
bert = BertModel.from_pretrained("bert-base-uncased")
#freeze params
for param in bert.parameters():
    param.requires_grad = False

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [9]:
recover = False
model_save_dir = "/projectnb/dl523/students/nannkat/Project/training/cp_b128.ckpt"

sarcasm_model = bert_for_sarcasm(bert)

if recover:
    #load weights from checkpoint if applicable
    checkpoint = torch.load(model_save_dir)
    sarcasm_model.load_state_dict(checkpoint['model_state_dict'])

sarcasm_model.to(device)

bert_for_sarcasm(
  (input_model): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_a

In [16]:
print(sum(p.numel() for p in sarcasm_model.parameters() if p.requires_grad))
print(sum(p.numel() for p in sarcasm_model.parameters()))

295425
335437313


## Training

In [None]:
losses, val_losses = fun.train_reddit(sarcasm_model, trainloader, validationloader, 25, batch_size, device, lr = 1e-5)

Epoch:  1
Elapsed [0:00:00], Iteration [1/12635]Loss: 0.6896
Elapsed [0:01:19], Iteration [2001/12635]Loss: 0.6850
Elapsed [0:02:38], Iteration [4001/12635]Loss: 0.6773
Elapsed [0:03:57], Iteration [6001/12635]Loss: 0.6336
Elapsed [0:05:16], Iteration [8001/12635]Loss: 0.6103
Elapsed [0:06:35], Iteration [10001/12635]Loss: 0.5783


In [None]:
# Losses
epochs = range(1, len(losses) + 1)
plt.plot(epochs, losses, label='Training loss')
plt.plot(epochs, val_lossess, label='Validation loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and validation loss for Reddit Simple Bert')
plt.legend()
plt.show()

## Testing

In [14]:
def test_reddit(sarcasm_model, testloader, device):
    
    final_pred = []
    final_lab = []
    
    with torch.no_grad():
        test_correct = 0
        test_total = 0
        sarcasm_model.eval()
        for encodings, labels in testloader:
            inputs = encodings['input_ids']
            attention_mask = encodings['attention_mask']
            inputs, attention_mask = inputs.to(device), attention_mask.to(device)
            output = sarcasm_model(inputs,attention_mask).cpu()
            
            preds = torch.flatten(output)
            preds[preds<0.5] = 0
            preds[preds>=0.5] = 1
            test_correct += (preds == labels).float().sum().item()
            test_total += len(labels)
            
            preds = preds.numpy()
            l = labels.cpu().numpy()
            comp = l == preds
            final_lab.extend(l)
            final_pred.extend(preds)
    
    test_acc = round(test_correct/test_total,4)            
    print("Testing accuracy: {}".format(test_acc))
    
    return final_pred, final_lab, test_acc

In [16]:
recover = True
model_save_dir = "/projectnb/dl523/students/nannkat/Project/training/cp.ckpt"

sarcasm_model = bert_for_sarcasm(bert)

if recover:
    #load weights from checkpoint if applicable
    checkpoint = torch.load(model_save_dir)
    sarcasm_model.load_state_dict(checkpoint['model_state_dict'])

sarcasm_model.to(device)

bert_for_sarcasm(
  (input_model): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_a

In [17]:
final_pred, final_lab, test_acc = test_reddit(sarcasm_model, testloader, device)

Testing accuracy: 0.6797


### Stats

In [18]:
r_words = ["Sarcastic","Not Sarcastic"]
class_report = classification_report(final_lab,final_pred,target_names =r_words)
print('\033[1m'+'Precision, Recall and Accuracy for Reddit Data:\n')
print(class_report)

[1mPrecision, Recall and Accuracy for Reddit Data:

               precision    recall  f1-score   support

    Sarcastic       0.67      0.72      0.69     50540
Not Sarcastic       0.70      0.64      0.66     50537

     accuracy                           0.68    101077
    macro avg       0.68      0.68      0.68    101077
 weighted avg       0.68      0.68      0.68    101077



### Examples?