In [321]:
# ! pip install sentence-transformers

In [322]:
import torch
from torch import nn
from datasets import load_dataset
from transformers import (
    RobertaTokenizer,
    RobertaForSequenceClassification,
    TrainingArguments,
    Trainer,
    AutoConfig,
)
from torch.optim.lr_scheduler import ReduceLROnPlateau
# from sklearnex import patch_sklearn
# patch_sklearn()
# from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
from torch.utils.data import Dataset, DataLoader

import gc
import json
import pickle
import numpy as np    
import pandas as pd
# from tqdm.notebook import tqdm
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, classification_report
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [323]:
# /kaggle/input/emotion-dataset/train_file.json

In [324]:
# PATH="/kaggle/input/"
# OUTPATH='/kaggle/working/'
# PATH="D:\\ghd\\NLP-Assignments\\Assignment4\\data\\"
PATH="data/"
OUTPATH='output/'
BATCH_SIZE=1024
MAX_LENGTH=256
MAX_UTTERANCES=10
ROBERTA_LABELS=100
EPOCHS=10
EOS='</s>'
SEP='[SEP]'
# SOS='o'

torch.manual_seed(0)

model_id = "roberta-base"

In [325]:
device = ""
if torch.cuda.is_available():
    device = "cuda"
    print(torch.cuda.get_device_name(0))
else:
    device = "cpu"

device = torch.device(device)
device

NVIDIA GeForce RTX 3060 Laptop GPU


device(type='cuda')

In [326]:

def get_train(data):
    x=[]
    y=[]
    for row in data:
        speakers=row['speakers']
        emotions=row['emotions']
        utterances=row['utterances']
        triggers=row['triggers']
        # print(speakers,emotions,utterances)
        # trim in front
        while len(utterances)>MAX_UTTERANCES:
            utterances.pop(0)
            speakers.pop(0)
            emotions.pop(0)
            triggers.pop(0)
        
        # print(speakers,emotions,utterances)
        # pad
        while len(utterances)<MAX_UTTERANCES:
            utterances.insert(0,EOS)
            speakers.insert(0,EOS)
            emotions.insert(0,EOS)
            triggers.insert(0,0)
        # print(speakers[1],emotions[1],utterances[1])
        t=""
        s1=[]
        s2=[]        
        for i in range(10):
            if(utterances[i]==EOS):
                s1.append(utterances[i])
                s2.append(utterances[i])
                t=EOS
                continue
            if(t==EOS):
                t=""
            # text.append([t,f"{speakers[-1]}:{utterances[-1]}:{emotions[-1]}"])
            s1.append(t)
            s2.append(f"{speakers[-1]}:{utterances[-1]}:{emotions[-1]}")

            t+=f"{speakers[i]}:{utterances[i]}:{emotions[i]} "
        x.append([s1,s2])
        trigs=[]
        for i in triggers:
            if(i is None):
                trigs.append(0)
            else:
                trigs.append(int(i))
        y.append(trigs)
    
    return x,y

def get_eval(data):
    x=[]
    y=[]
    max_len=max([len(i["triggers"]) for i in data])
    # left pad triggers

    for row in data:
        speakers=row['speakers']
        emotions=row['emotions']
        utterances=row['utterances']
        triggers=row['triggers']
        # trim in front
        while len(triggers)>max_len:
            triggers.pop(0)
        # left pad triggers
        while len(triggers)<max_len:
            triggers.insert(0,0)


        # print(speakers,emotions,utterances)
        # trim in front
        while len(utterances)>MAX_UTTERANCES:
            utterances.pop(0)
            speakers.pop(0)
            emotions.pop(0)
        
        # print(speakers,emotions,utterances)
        # pad
        while len(utterances)<MAX_UTTERANCES:
            utterances.insert(0,EOS)
            speakers.insert(0,EOS)
            emotions.insert(0,EOS)
        # print(speakers[1],emotions[1],utterances[1])
        s1=[]
        s2=[]
        t=""
        for i in range(10):
            if(utterances[i]==EOS):
                s1.append(utterances[i])
                s2.append(utterances[i])
                t=EOS
                continue
            if(t==EOS):
                t=""
            # text.append([t,f"{speakers[-1]}:{utterances[-1]}:{emotions[-1]}"])
            s1.append(t)
            s2.append(f"{speakers[-1]}:{utterances[-1]}:{emotions[-1]}")
            t+=f"{speakers[i]}:{utterances[i]}:{emotions[i]} "
        x.append([s1,s2])
        trigs=[]
        for i in triggers:
            if(i is None):
                trigs.append(0)
            else:
                trigs.append(int(i))
        y.append(trigs)
    
    return x,y


def load_data(PATH):
    with open(PATH) as f:
        data = json.load(f)
        return data
    
label_encoding = {
    "S":0,
    "surprise":1,
    "fear":2,
    "neutral":3,
    "sadness":4,
    "disgust":5,
    "anger":6,
    "joy":7
}


In [327]:

data = load_data(PATH+"train_file.json")
data_val = load_data(PATH+"val_file.json")
x_train,y_train = get_train(data)
x_val,y_val = get_eval(data_val)

In [328]:
x_train[0],y_train[0]

([['</s>',
   '</s>',
   '</s>',
   '</s>',
   '</s>',
   '',
   'Phoebe:You-you\x85you had sex with Ursula?!:surprise ',
   'Phoebe:You-you\x85you had sex with Ursula?!:surprise Eric:Uh, a little bit. She-she-she walked in and I thought she was you and I kissed her and:fear ',
   "Phoebe:You-you\x85you had sex with Ursula?!:surprise Eric:Uh, a little bit. She-she-she walked in and I thought she was you and I kissed her and:fear Phoebe:You didn't notice she was wearing different clothes?!:surprise ",
   "Phoebe:You-you\x85you had sex with Ursula?!:surprise Eric:Uh, a little bit. She-she-she walked in and I thought she was you and I kissed her and:fear Phoebe:You didn't notice she was wearing different clothes?!:surprise Eric:Well I was just so excited to see you.:sadness "],
  ['</s>',
   '</s>',
   '</s>',
   '</s>',
   '</s>',
   "Phoebe:Oh. Ew! Ew! Ew! Ugh! Y'know what? This is too weird.:disgust",
   "Phoebe:Oh. Ew! Ew! Ew! Ugh! Y'know what? This is too weird.:disgust",
   "Phoebe:

In [329]:
max_len=0
for i in range(len(x_train)):
    for j in range(len(x_train[i][0])):
        max_len=max(max_len,len(x_train[i][0][j].split())+len(x_train[i][1][j].split()))
print(max_len)


168


In [330]:
x_val[0],y_val[0]

([['',
   "Joey:Y'know what you should do, you should get her one of those um, barium enemas.:neutral ",
   "Joey:Y'know what you should do, you should get her one of those um, barium enemas.:neutral Joey:Those are dead serious.:neutral ",
   "Joey:Y'know what you should do, you should get her one of those um, barium enemas.:neutral Joey:Those are dead serious.:neutral Chandler:All right. Look, I'm gonna go in here, and you don't buy me anything ever.:anger ",
   "Joey:Y'know what you should do, you should get her one of those um, barium enemas.:neutral Joey:Those are dead serious.:neutral Chandler:All right. Look, I'm gonna go in here, and you don't buy me anything ever.:anger Joey:No, no, you can't, you can't, okay, you can't, you can't buy her pearls, you just can't, you can't, you can't.:fear ",
   "Joey:Y'know what you should do, you should get her one of those um, barium enemas.:neutral Joey:Those are dead serious.:neutral Chandler:All right. Look, I'm gonna go in here, and you d

In [331]:
# np.array(x_train).shape

In [332]:
len(x_train),len(y_train),len(x_val),len(y_val)

(6740, 6740, 843, 843)

In [333]:
tokenizer = RobertaTokenizer.from_pretrained(model_id)

In [334]:
# class

In [335]:
# len(x_train)
# len(x_train[0])
# x_train = np.array(x_train)
# x_train
y_train[0],y_train[4]

([0, 0, 0, 0, 0, 1, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1])

In [336]:
class Dataset(Dataset):
    def __init__(self, data, tokenizer,labels):
        self.data = data
        self.tokenizer = tokenizer
        self.labels = labels
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,idx):
#         print(self.data[idx])
#         dat = "".join(self.data[idx])
        s1,s2 = self.data[idx][0],self.data[idx][1]
        s1_tokenized = self.tokenizer(s1,max_length=MAX_LENGTH,truncation=True,return_tensors="pt",padding="max_length")
        s2_tokenized = self.tokenizer(s2,max_length=MAX_LENGTH,truncation=True,return_tensors="pt",padding="max_length")
        input_ids = torch.cat((s1_tokenized["input_ids"],s2_tokenized["input_ids"]),dim=1).squeeze(0)
        # x_tokenized = self.tokenizer(self.data[idx],max_length=MAX_LENGTH,truncation=True,return_tensors="pt",padding="max_length")
        # input_ids = x_tokenized["input_ids"].squeeze(0)
        # attention_mask = x_tokenized["attention_mask"].squeeze(0)
        return input_ids,torch.tensor(self.labels[idx]).squeeze(0)

In [337]:
train_dataset = Dataset(x_train,tokenizer,y_train)
val_dataset = Dataset(x_val,tokenizer,y_val)

In [338]:
train_dataset[0][0].shape

torch.Size([10, 512])

In [339]:
from torch.utils.data import DataLoader
train_dataloader=DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle=False)
val_dataloader=DataLoader(val_dataset,batch_size=BATCH_SIZE,shuffle=False)

In [340]:
# class my_model(nn.Module):
#     def __init__(self):
#         super(my_model,self).__init__()
# #         self.lstm_model = torch.nn.
# #         self.roberta_model = roberta_model
# #         self.num_labels = num_labels
#         self.LSTM = nn.LSTM(input_size=128,hidden_size=2, batch_first=True)
#         self.linear = nn.Linear(2,8)
#         self.softmax = nn.Softmax()
#     def forward(self,input_ids):
# #         self.lstm = torch.nn
#         lstm_output,_    =self.LSTM(input_ids)
# #         print(lstm_output)
# #         linear_output = self.linear(lstm_output)
#         linear_outputs = []
#         for i in range(lstm_output.size(1)):
#             linear_output = self.linear(lstm_output[:,i,:])
#             softmax_output = self.softmax(linear_output)
#             softmax_outputs.append(softmax_output.unsqueeze(1))
        
        
        
#         # Concatenate the list of linear outputs along the time step dimension
# #         linear_outputs_tensor = torch.cat(linear_outputs, dim=1)
#         return torch.tensor(softmax_outputs,device=device)

In [341]:
# class MyModel(nn.Module):
#     def __init__(self, input_size=MAX_LENGTH, hidden_size=MAX_UTTERANCES, num_linear_layers=MAX_UTTERANCES, linear_size=2):
#         super(MyModel, self).__init__()
        

#         self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, bidirectional=True, num_layers=3)
        
#         self.linear_layers1 = nn.ModuleList([nn.Linear(hidden_size*2, 128) for _ in range(num_linear_layers)])
#         self.relu = nn.ReLU()
#         self.linear_layers2 = nn.ModuleList([nn.Linear(128, linear_size) for _ in range(num_linear_layers)])
#         # self.softmax_layers = nn.ModuleList([nn.Softmax(dim=1) for _ in range(num_linear_layers)])
#         self.sigmoid = nn.Sigmoid()
#     def forward(self, x):
 
#         lstm_out, _ = self.lstm(x)
#         # print("Output shape of lstm = ",lstm_out.shape)

#         softmax_outputs = []
#         for i in range(len(self.linear_layers1)):
#             linear_output1 = self.linear_layers1[i](lstm_out[:, -1, :])

#             relu_output = self.relu(linear_output1)

#             linear_output2= self.linear_layers2[i](relu_output)


#             # softmax_output = self.softmax_layers[i](linear_output2)
#             sigmoid_output = self.sigmoid(linear_output2)
#             softmax_outputs.append(sigmoid_output.float())
           
#         stacked_tensor = torch.stack(softmax_outputs, dim=1)
#         return stacked_tensor
# #         return stacked_tensor.clone().detach().requires_grad_(True)


In [342]:
import torch
import torch.nn as nn

class MyModel(nn.Module):
    def __init__(self, input_size=MAX_LENGTH*2, lstm_hidden_size=256, transformer_hidden_size=512, num_linear_layers=MAX_UTTERANCES, linear_size=2):
        super(MyModel, self).__init__()
        
        self.lstm = nn.LSTM(input_size, lstm_hidden_size, batch_first=True, bidirectional=True, num_layers=3)
        
        self.transformer_layer = nn.TransformerEncoderLayer(d_model=lstm_hidden_size*2, nhead=4, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(self.transformer_layer, num_layers=2)
        
        self.linear_layers1 = nn.ModuleList([nn.Linear(lstm_hidden_size*2, 128) for _ in range(num_linear_layers)])
        self.relu = nn.Tanh()
        self.linear_layers2 = nn.ModuleList([nn.Linear(128, linear_size) for _ in range(num_linear_layers)])
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        
        # Permute to fit transformer input shape
        lstm_out = lstm_out.permute(1, 0, 2)
        
        transformer_out = self.transformer_encoder(lstm_out)
        
        # Permute back to original shape
        transformer_out = transformer_out.permute(1, 0, 2)
        
        softmax_outputs = []
        for i in range(len(self.linear_layers1)):
            linear_output1 = self.linear_layers1[i](transformer_out[:, -1, :])
            relu_output = self.relu(linear_output1)
            linear_output2 = self.linear_layers2[i](relu_output)
            sigmoid_output = self.sigmoid(linear_output2)
            softmax_outputs.append(sigmoid_output.float())
           
        stacked_tensor = torch.stack(softmax_outputs, dim=1)
        return stacked_tensor


In [343]:
model = MyModel()
model.to(device)


MyModel(
  (lstm): LSTM(512, 256, num_layers=3, batch_first=True, bidirectional=True)
  (transformer_layer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
    )
    (linear1): Linear(in_features=512, out_features=2048, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=2048, out_features=512, bias=True)
    (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_fea

In [344]:
import torch.nn as nn

criterion = nn.functional.cross_entropy

def metrics(y_true, y_pred):
    acc=accuracy_score(y_true, y_pred)
    f1_macro=f1_score(y_true, y_pred, average='macro')  
    f1_scores=f1_score(y_true, y_pred, average="weighted")
    return acc, f1_macro, f1_scores


In [345]:
predicted=torch.tensor([[0,1],[0.2,0.8],[0.7,0.3],[0.1,0.9],[1,0]]).to(device)
labels=torch.tensor([1,1,0,0,0]).to(device)
criterion(predicted, labels, weight=torch.tensor([0.1,0.9]).to(device)),criterion(predicted, labels)

(tensor(0.4169, device='cuda:0'), tensor(0.5496, device='cuda:0'))

In [346]:
def train_epoch(model, optimizer,epoch,val_dataloader,train_dataloader,metrics,weights,scheduler):
    model.train()
    losses = 0
    preds = []
    actuals = []
    for batch in tqdm(train_dataloader, desc=f"Epoch:{epoch}",total=len(train_dataloader), leave=False):

        input_ids = batch[0].to(device)
        # attention_mask = batch[1].to(device)
        labels = batch[1].to(device)

        batch_size = input_ids.size(0)
        utt_size = input_ids.size(1)

        outputs  = model(input_ids.float())

        outputs=outputs.view(batch_size*utt_size,-1)
        labels=labels.view(batch_size*utt_size)
        
        # print(outputs.shape,labels.shape)
#         print(outputs)
        loss = criterion(outputs, labels, weight=weights)

        loss = loss.to(device)
#         print("Loss = ",loss.item)
        losses+=loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # for name, param in model.named_parameters():
        #     if param.grad is not None:
        #         print(f'Gradient - {name}: {param.grad.norm()}')
    
        _, pred = torch.max(outputs, 1)

        # Flatten the predictions and targets
        predicted_flat = pred.view(-1)
        targets_flat = labels.view(-1)

        preds.extend(predicted_flat.cpu().numpy())
        actuals.extend(targets_flat.cpu().numpy())

        del input_ids
        # del attention_mask
        del labels
        del outputs

        gc.collect()
        torch.cuda.empty_cache()
        # break
#     print("BATCH FINISHED")
    x = losses /len(train_dataloader)
    tqdm.write(f"Epoch:{epoch}, Avg Train Loss: {x}")
    acc,macro,f1 = metrics(actuals,preds)
    tqdm.write(f"Avg Training Accuracy: {acc}, F1 Macro: {macro}, F1 Scores: {f1}")
    val_loss,acc,macro,f1,_ = evaluate(model,val_dataloader,"Validation",metrics,weights)
    scheduler.step(val_loss)
    gc.collect()
    torch.cuda.empty_cache()
    return x




def evaluate(model,val_dataloader,name,metric,weights):

    with torch.no_grad():
        model.eval()
        losses = 0

        preds = []
        actuals = []
        for batch in tqdm(val_dataloader, desc=name,total=len(val_dataloader), leave=False):

                input_ids = batch[0].to(device)
                # attention_mask = batch[1].to(device)
                labels = batch[1].to(device)

                batch_size = input_ids.size(0)
                utt_size = labels.size(1)

                outputs  = model(input_ids.float())

                size_diff = labels.size(1) - outputs.size(1)

                paddings=torch.zeros(batch_size,size_diff,2).to(device)


                # print(outputs.shape,paddings.shape)
                predicted = torch.cat((outputs,paddings),dim=1)

                predicted=predicted.view(batch_size*utt_size,-1)
                labels=labels.view(batch_size*utt_size)
                
                # print(outputs.shape,labels.shape)
                # loss = criterion(predicted, labels)
                loss = criterion(predicted, labels, weight=weights)


                loss = loss.to(device)
        #         print("Loss = ",loss.item)
                losses+=loss.item()
    
                _, pred = torch.max(predicted, 1)

                # Flatten the predictions and targets
                predicted_flat = pred.view(-1)
                targets_flat = labels.view(-1)

                preds.extend(predicted_flat.cpu().numpy())
                actuals.extend(targets_flat.cpu().numpy())

                del input_ids
                # del attention_mask
                del labels
                del predicted
                # del predicted_labels
                gc.collect()
                torch.cuda.empty_cache()
            # break
        # print(losses,len(val_dataloader))
        x = losses / len(val_dataloader)
        tqdm.write(f"Avg {name} Loss: {x}")
        gc.collect()
        torch.cuda.empty_cache()
        acc,macro,f1 = metric(actuals,preds)
        tqdm.write(f"Avg {name} Accuracy: {acc}, F1 Macro: {macro}, F1 Scores: {f1}")
        return x,acc,macro,f1,preds

In [347]:
for name, param in model.named_parameters():
    print(f'Parameter - {name}: {param.shape}, grad = {param.requires_grad}')

Parameter - lstm.weight_ih_l0: torch.Size([1024, 512]), grad = True
Parameter - lstm.weight_hh_l0: torch.Size([1024, 256]), grad = True
Parameter - lstm.bias_ih_l0: torch.Size([1024]), grad = True
Parameter - lstm.bias_hh_l0: torch.Size([1024]), grad = True
Parameter - lstm.weight_ih_l0_reverse: torch.Size([1024, 512]), grad = True
Parameter - lstm.weight_hh_l0_reverse: torch.Size([1024, 256]), grad = True
Parameter - lstm.bias_ih_l0_reverse: torch.Size([1024]), grad = True
Parameter - lstm.bias_hh_l0_reverse: torch.Size([1024]), grad = True
Parameter - lstm.weight_ih_l1: torch.Size([1024, 512]), grad = True
Parameter - lstm.weight_hh_l1: torch.Size([1024, 256]), grad = True
Parameter - lstm.bias_ih_l1: torch.Size([1024]), grad = True
Parameter - lstm.bias_hh_l1: torch.Size([1024]), grad = True
Parameter - lstm.weight_ih_l1_reverse: torch.Size([1024, 512]), grad = True
Parameter - lstm.weight_hh_l1_reverse: torch.Size([1024, 256]), grad = True
Parameter - lstm.bias_ih_l1_reverse: torch

In [348]:
# optim = torch.optim.AdamW(model.parameters(),lr=1e-1,weight_decay=0.01)
optim = torch.optim.SGD(model.parameters(),lr=1e1,weight_decay=0.01)
scheduler = ReduceLROnPlateau(optim,mode='min', patience=3, factor=0.5)
# scheduler = None

In [349]:
a=0.15
weights=torch.tensor([a,1-a]).to(device)
for i in range(1,EPOCHS+1):
    train_epoch(model,optim,i,val_dataloader,train_dataloader,metrics,weights,scheduler)
    # evaluate(model,val_dataloader,"Validation",metrics)

                                                      

Epoch:1, Avg Train Loss: 0.5343495649950845
Avg Training Accuracy: 0.7872848664688428, F1 Macro: 0.6714782433729558, F1 Scores: 0.8128470001071957


                                                         

Avg Validation Loss: 0.6529417634010315
Avg Validation Accuracy: 0.8561555521171799, F1 Macro: 0.46196381801881586, F1 Scores: 0.8699080836936413


                                                      

Epoch:2, Avg Train Loss: 0.5094197562762669
Avg Training Accuracy: 0.8609643916913946, F1 Macro: 0.7524099019858963, F1 Scores: 0.871231449009437


                                                         

Avg Validation Loss: 0.6575449109077454
Avg Validation Accuracy: 0.8561555521171799, F1 Macro: 0.46196381801881586, F1 Scores: 0.8699080836936413


                                                      

Epoch:3, Avg Train Loss: 0.5134858744485038
Avg Training Accuracy: 0.8611721068249258, F1 Macro: 0.7526412912130105, F1 Scores: 0.8713943507413353


                                                         

Avg Validation Loss: 0.6540600657463074
Avg Validation Accuracy: 0.8561555521171799, F1 Macro: 0.46196381801881586, F1 Scores: 0.8699080836936413


                                                      

Epoch:4, Avg Train Loss: 0.5010788270405361
Avg Training Accuracy: 0.8611721068249258, F1 Macro: 0.7526412912130105, F1 Scores: 0.8713943507413353


                                                         

Avg Validation Loss: 0.6488722562789917
Avg Validation Accuracy: 0.8561555521171799, F1 Macro: 0.46196381801881586, F1 Scores: 0.8699080836936413


                                                      

Epoch:5, Avg Train Loss: 0.4988069704600743
Avg Training Accuracy: 0.8611721068249258, F1 Macro: 0.7526412912130105, F1 Scores: 0.8713943507413353


                                                         

Avg Validation Loss: 0.6489195823669434
Avg Validation Accuracy: 0.8561555521171799, F1 Macro: 0.46196381801881586, F1 Scores: 0.8699080836936413


                                                      

Epoch:6, Avg Train Loss: 0.49879964334624155
Avg Training Accuracy: 0.8611721068249258, F1 Macro: 0.7526412912130105, F1 Scores: 0.8713943507413353


                                                         

Avg Validation Loss: 0.6488584876060486
Avg Validation Accuracy: 0.8561555521171799, F1 Macro: 0.46196381801881586, F1 Scores: 0.8699080836936413


                                                      

Epoch:7, Avg Train Loss: 0.49868859137807575
Avg Training Accuracy: 0.8611721068249258, F1 Macro: 0.7526412912130105, F1 Scores: 0.8713943507413353


                                                         

Avg Validation Loss: 0.6488105058670044
Avg Validation Accuracy: 0.8561555521171799, F1 Macro: 0.46196381801881586, F1 Scores: 0.8699080836936413


                                                      

Epoch:8, Avg Train Loss: 0.498614170721599
Avg Training Accuracy: 0.8611721068249258, F1 Macro: 0.7526412912130105, F1 Scores: 0.8713943507413353


                                                         

Avg Validation Loss: 0.6487798094749451
Avg Validation Accuracy: 0.8561555521171799, F1 Macro: 0.46196381801881586, F1 Scores: 0.8699080836936413


                                                      

Epoch:9, Avg Train Loss: 0.4985711319105966
Avg Training Accuracy: 0.8611721068249258, F1 Macro: 0.7526412912130105, F1 Scores: 0.8713943507413353


                                                         

Avg Validation Loss: 0.6487606167793274
Avg Validation Accuracy: 0.8561555521171799, F1 Macro: 0.46196381801881586, F1 Scores: 0.8699080836936413


                                                       

Epoch:10, Avg Train Loss: 0.49854560835020884
Avg Training Accuracy: 0.8611721068249258, F1 Macro: 0.7526412912130105, F1 Scores: 0.8713943507413353


                                                         

Avg Validation Loss: 0.6487456560134888
Avg Validation Accuracy: 0.8561555521171799, F1 Macro: 0.46196381801881586, F1 Scores: 0.8699080836936413


In [350]:
# gc.collect()
# torch.cuda.empty_cache()
# for epoch in range(1, EPOCHS+1):
#     train_loss = train_epoch(model, optimizer,epoch)
#     val_loss,all_labels = evaluate(model,val_dataloader=val_dataloader,name='Val')
#     if(epoch%2==0):
#         torch.save(model, f"{OUTPATH}modelM1_epoch{epoch}.pth")
#     print((f"Epoch: {epoch}, Train loss: {train_loss:.3f}, Val loss: {val_loss:.3f}"))

In [351]:
torch.save(model, f"{OUTPATH}modelM3.pth")
torch.save(tokenizer, f"{OUTPATH}tokenizerM3.pth")

In [352]:
# load model
loaded_model = torch.load(f"{OUTPATH}modelM3.pth")
loaded_tokenizer = torch.load(f"{OUTPATH}tokenizerM3.pth")
# weights=torch.tensor([0.1,0.9]).to(device)


In [353]:

# test
x_test, y_test = get_eval(load_data(PATH+"val_file.json"))



In [354]:
test_dataset = Dataset(x_test,loaded_tokenizer,y_test)
test_dataloader=DataLoader(test_dataset,batch_size=BATCH_SIZE,shuffle=False)
loss,acc,macro,f1,preds=evaluate(loaded_model,test_dataloader,"Test",metrics,weights)


                                                   

Avg Test Loss: 0.6487456560134888
Avg Test Accuracy: 0.8561555521171799, F1 Macro: 0.46196381801881586, F1 Scores: 0.8699080836936413


In [355]:
y_test_list = np.array(y_test).flatten()
preds=np.array(preds).flatten()

In [356]:
dic1={}
dic2={}
for i in range(len(y_test_list)):
    if y_test_list[i] in dic1:
        dic1[y_test_list[i]]+=1
    else:
        dic1[y_test_list[i]]=1
    if preds[i] in dic2:
        dic2[preds[i]]+=1
    else:
        dic2[preds[i]]=1

dic1,dic2

({0: 18282, 1: 1107}, {0: 17703, 1: 1686})

In [357]:
print(classification_report(y_test_list, preds))
f1_scores = f1_score(y_test_list,preds, average=None)

# Print F1 score for each label
for label, f1 in enumerate(f1_scores):
    print(f"F1 score for label {label}: {f1}")

              precision    recall  f1-score   support

           0       0.94      0.91      0.92     18282
           1       0.00      0.00      0.00      1107

    accuracy                           0.86     19389
   macro avg       0.47      0.45      0.46     19389
weighted avg       0.88      0.86      0.87     19389

F1 score for label 0: 0.9224954842295401
F1 score for label 1: 0.0014321518080916578
