## Imports

In [11]:
try:
    import transformers
except ImportError as e:
    print('transformers not installed')
    print('Installing now...')
    !pip install -q git+https://github.com/huggingface/transformers.git
    pass

In [12]:
import reddit_bert_functions as fun
from bert_sarcasm_model import bert_for_sarcasm

In [13]:
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
from torch.utils.data import Dataset,DataLoader,TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score
import transformers
import json
from tqdm.notebook import tqdm
from transformers.utils.dummy_pt_objects import AutoModelForSequenceClassification
from transformers import AutoModelForTokenClassification,AutoConfig, AutoModel,AutoTokenizer,BertModel,BertConfig,AdamW, get_constant_schedule,BertForSequenceClassification,get_linear_schedule_with_warmup
import random
import torch
import torch.nn as nn
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

## Params

In [4]:
checkpoint_save_dir = "/projectnb/dl523/students/nannkat/Project/training/cp_freeze.ckpt"

## Load data

In [5]:
csv_path = 'train-balanced-sarcasm.csv'
x_train, y_train, x_val, y_val, x_test, y_test = fun.split_reddit_data(csv_path)

max_length = 35  #based on word count bar plot above, 35 is reasonable

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

reddit_train = fun.Reddit(x_train, y_train, tokenizer, max_length)
reddit_val = fun.Reddit(x_val, y_val, tokenizer, max_length)
reddit_test = fun.Reddit(x_test, y_test, tokenizer, max_length)

batch_size = 64
num_workers = 2
trainloader, validationloader, testloader = fun.get_data_loaders(reddit_train, reddit_val, reddit_test, batch_size, num_workers)

## Freeze tuning

In [6]:
def freeze_tuning(trainloader, validationloader, layer_counts, epoch_count, batch_size, learning_rate, device):
    print(device)
    ##loop over layer idx
    freeze_losses = []
    best_loss = float('inf')
    best_count = 0
    for i, num_layers in enumerate(layer_counts):
        
        print("Test {}/{}".format(i+1, len(layer_counts)))
        print("Number of layers to be unfrozen: {}".format(num_layers))
        print()
        
        #create new model/freeze appropriate
        bertconfig = BertConfig()
        bert = BertModel.from_pretrained("bert-base-uncased")
        fun.freeze_by_children(bert, num_layers)
        sarcasm_model = bert_for_sarcasm(bert)
    
        #train over x epochs
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(device)
        loss_function = nn.BCELoss()
        sarcasm_model.to(device)
        losses,val_losses = fun.train_reddit(sarcasm_model, trainloader, validationloader, epoch_count, 
                                              batch_size, device, lr = learning_rate, 
                                              model_save_dir = checkpoint_save_dir)
        
        curr_loss = min(val_losses)
        
        
        #add loss to losses array and update best if it beats best
        freeze_losses.append(curr_loss)
        if curr_loss < best_loss:
            best_loss = curr_loss
            best_count = num_layers
            
        print("Training done!")
        print("Loss for {} layers is {}. Best loss is {} for {} layers".format(num_layers, round(curr_loss, 4), 
                                                                               round(best_loss, 4), best_count))
        print()
        
    
    return freeze_losses, best_loss, best_count
        

In [7]:
epochs = 10
lr = 1e-5
layer_counts = [3 , 6 , 12]
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
freeze_losses, best_loss, best_count = freeze_tuning(trainloader, validationloader, layer_counts,
                                                     epoch_count = epochs, batch_size = batch_size, learning_rate = lr,
                                                    device = device)

cuda
Test 1/3
Number of layers to be unfrozen: 3



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


The input model has 12 encoding layers
The model has 1 pooling layers
Bert layer 10 has been unfrozen
Bert layer 11 has been unfrozen
Bert layer 12 has been unfrozen
Pooling layer has been unfrozen
cuda
Epoch:  1
Elapsed [0:00:00], Iteration [1/12635]Loss: 0.6962
Elapsed [0:03:03], Iteration [2001/12635]Loss: 0.5439
Elapsed [0:06:05], Iteration [4001/12635]Loss: 0.5215
Elapsed [0:09:07], Iteration [6001/12635]Loss: 0.5708
Elapsed [0:12:10], Iteration [8001/12635]Loss: 0.4953
Elapsed [0:15:13], Iteration [10001/12635]Loss: 0.4374
Elapsed [0:18:15], Iteration [12001/12635]Loss: 0.6449
Validating.....
Decrease in validation loss. Early stop counter reset to 0.
New lowest loss, saving model...
Model checkpoint saved to /projectnb/dl523/students/nannkat/Project/training/cp_freeze.ckpt
Epoch 1. Training accuracy: 0.73. Validation accuracy: 0.7543.

Epoch:  2
Elapsed [0:20:17], Iteration [1/12635]Loss: 0.4831
Elapsed [0:23:19], Iteration [2001/12635]Loss: 0.5980
Elapsed [0:26:22], Iteration [

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


The input model has 12 encoding layers
The model has 1 pooling layers
Bert layer 7 has been unfrozen
Bert layer 8 has been unfrozen
Bert layer 9 has been unfrozen
Bert layer 10 has been unfrozen
Bert layer 11 has been unfrozen
Bert layer 12 has been unfrozen
Pooling layer has been unfrozen
cuda
Epoch:  1
Elapsed [0:00:00], Iteration [1/12635]Loss: 0.6856
Elapsed [0:03:23], Iteration [2001/12635]Loss: 0.5485
Elapsed [0:06:47], Iteration [4001/12635]Loss: 0.4537
Elapsed [0:10:10], Iteration [6001/12635]Loss: 0.5322
Elapsed [0:13:34], Iteration [8001/12635]Loss: 0.5506
Elapsed [0:16:57], Iteration [10001/12635]Loss: 0.4415
Elapsed [0:20:21], Iteration [12001/12635]Loss: 0.5106
Validating.....
Decrease in validation loss. Early stop counter reset to 0.
New lowest loss, saving model...
Model checkpoint saved to /projectnb/dl523/students/nannkat/Project/training/cp_freeze.ckpt
Epoch 1. Training accuracy: 0.7437. Validation accuracy: 0.7664.

Epoch:  2
Elapsed [0:22:30], Iteration [1/12635]Lo

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


The input model has 12 encoding layers
The model has 1 pooling layers
Bert layer 1 has been unfrozen
Bert layer 2 has been unfrozen
Bert layer 3 has been unfrozen
Bert layer 4 has been unfrozen
Bert layer 5 has been unfrozen
Bert layer 6 has been unfrozen
Bert layer 7 has been unfrozen
Bert layer 8 has been unfrozen
Bert layer 9 has been unfrozen
Bert layer 10 has been unfrozen
Bert layer 11 has been unfrozen
Bert layer 12 has been unfrozen
Pooling layer has been unfrozen
cuda
Epoch:  1
Elapsed [0:00:00], Iteration [1/12635]Loss: 0.6825
Elapsed [0:04:08], Iteration [2001/12635]Loss: 0.4932
Elapsed [0:08:16], Iteration [4001/12635]Loss: 0.4355
Elapsed [0:12:24], Iteration [6001/12635]Loss: 0.4586
Elapsed [0:16:32], Iteration [8001/12635]Loss: 0.4297
Elapsed [0:20:41], Iteration [10001/12635]Loss: 0.5614
Elapsed [0:24:49], Iteration [12001/12635]Loss: 0.3751
Validating.....
Decrease in validation loss. Early stop counter reset to 0.
New lowest loss, saving model...
Model checkpoint saved

In [10]:
for i, num in enumerate(layer_counts):
    print("Layers unfroze: {} Loss: {}".format(num, freeze_losses[i]))

Layers unfroze: 3 Loss: 0.47319089902352685
Layers unfroze: 6 Loss: 0.464794858384736
Layers unfroze: 12 Loss: 0.4671874969065944
