In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.optim import Adam
from transformers import BertModel
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

In [2]:
# importing the dataset 
df_train = pd.read_csv('data/train.txt', header =None, sep =';', names = ['Input','Sentiment'], encoding='utf-8')
df_test = pd.read_csv('data/test.txt', header = None, sep =';', names = ['Input','Sentiment'],encoding='utf-8')
df_val=pd.read_csv('data/val.txt',header=None,sep=';',names=['Input','Sentiment'],encoding='utf-8')

In [3]:
X_train = df_train.Input.tolist()
X_test = df_test.Input.tolist()
X_val = df_val.Input.tolist()
y_train = df_train.Sentiment.tolist()
y_test = df_test.Sentiment.tolist()
y_val = df_val.Sentiment.tolist()

In [4]:
# Turning 'Sentiment' values into numeric values
label = { 'anger': 0,
    'fear': 1,
    'joy': 2,
    'love': 3,
    'sadness': 4,
    'surprise': 5
}

In [5]:
y_train = [label[key] for key in y_train]
y_test = [label[key] for key in y_test]
y_val = [label[key] for key in y_val]

In [6]:
#Encode the data with tokenizer
from transformers import BertTokenizer,BertForSequenceClassification
model_name = "bert-base-cased"
tokenizer = BertTokenizer.from_pretrained(model_name, do_lower_case=True)
train_encodings = tokenizer(X_train, truncation=True, padding=True, max_length=50)
test_encodings = tokenizer(X_test, truncation=True, padding=True, max_length=50)
val_encodings = tokenizer(X_val, truncation=True, padding=True, max_length=50)

In [7]:
train_encodings["label"] = y_train
test_encodings["label"] = y_test
val_encodings["label"] = y_val

In [8]:
# Encoded input
print("input_ids: ",train_encodings['input_ids'][100])
print("token_type_ids: ",train_encodings['token_type_ids'][100])
print("attention_mask: ",train_encodings['attention_mask'][100]) #a binary mask that identifies whether a token is a real word or just padding
print("label: ",train_encodings['label'][100])

input_ids:  [101, 178, 1281, 1204, 1519, 1143, 2027, 5354, 1122, 1149, 1272, 178, 1631, 1115, 9207, 1123, 1105, 181, 5264, 1165, 1131, 1108, 1376, 1108, 1280, 1106, 1129, 6305, 1115, 1178, 5695, 1111, 1343, 1603, 1374, 1808, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
token_type_ids:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
attention_mask:  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
label:  3


In [9]:
batch_size = 32

In [10]:
train_inputs = torch.tensor(train_encodings['input_ids'])
train_labels = torch.tensor(train_encodings['label'])
train_masks = torch.tensor(train_encodings['attention_mask'])
train_data = TensorDataset(train_inputs,train_masks,train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data,sampler=train_sampler,batch_size=batch_size)

In [11]:
test_inputs = torch.tensor(test_encodings['input_ids'])
test_labels = torch.tensor(test_encodings['label'])
test_masks = torch.tensor(test_encodings['attention_mask'])
test_data = TensorDataset(test_inputs,test_masks,test_labels)
test_sampler = RandomSampler(test_data)
test_dataloader = DataLoader(test_data,sampler=test_sampler,batch_size=batch_size)

In [12]:
val_inputs = torch.tensor(val_encodings['input_ids'])
val_labels = torch.tensor(val_encodings['label'])
val_masks = torch.tensor(val_encodings['attention_mask'])
val_data = TensorDataset(val_inputs,val_masks,val_labels)
val_sampler = RandomSampler(val_data)
val_dataloader = DataLoader(val_data,sampler=val_sampler,batch_size=batch_size)

In [13]:
# def Bert():
#     pooled_output = BertModel.from_pretrained('bert-base-cased')(input_ids= input_id, attention_mask=mask,return_dict=False)
#     dropout = nn.Dropout(dropout=0.5)(pooled_output)
#     linear = nn.Linear(768, 5)(dropout)
#     final_layer = nn.ReLU()(linear)
    
# #     pooled_output = bert(input_ids= input_id, attention_mask=mask,return_dict=False)
# #     dropout_output = dropout(pooled_output)
# #     linear_output = linear(dropout_output)
# #     final_layer = nn.ReLU()(linear_output)

#     return final_layer
class Bert(nn.Module):

    def __init__(self, dropout=0.5):

        super(Bert, self).__init__()

        self.bert = BertModel.from_pretrained(model_name, return_dict=False)
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, 6)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):

        _, pooled_output = self.bert(input_id, attention_mask=mask,return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        
        final_layer = self.relu(linear_output)

        return final_layer

In [15]:
EPOCHS = 12
model = Bert()
LR = 1e-6

criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr= LR)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [16]:
for epoch_num in range(EPOCHS):
    
    train_acc_all = 0
    train_loss_all = 0

    for train_input, train_masks, train_label in tqdm(train_dataloader):

        output = model(train_input, train_masks)
        
        batch_loss = criterion(output, train_label)
        train_loss_all += batch_loss.item()
                
        acc = (output.argmax(dim=1) == train_label).sum().item()
        train_acc_all += acc

        model.zero_grad()
        batch_loss.backward()
        optimizer.step()
            
    val_acc_all = 0
    val_loss_all = 0

    with torch.no_grad():

        for val_input, val_mask, val_label in val_dataloader:

            output = model(val_input, val_mask)

            batch_loss = criterion(output, val_label)
            val_loss_all += batch_loss.item()
                    
            acc = (output.argmax(dim=1) == val_label).sum().item()
            val_acc_all += acc
            
    print(f'Epochs: {epoch_num + 1} | Train Loss: {train_loss_all / len(train_data): .3f} | Train Accuracy: {train_acc_all / len(train_data): .3f} | Val Loss: {val_loss_all / len(val_data): .3f} | Val Accuracy: {val_acc_all / len(val_data): .3f}')
                  

100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [24:06<00:00,  2.89s/it]


Epochs: 1 | Train Loss:  0.049 | Train Accuracy:  0.412 | Val Loss:  0.043 | Val Accuracy:  0.543


100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [23:57<00:00,  2.88s/it]


Epochs: 2 | Train Loss:  0.041 | Train Accuracy:  0.560 | Val Loss:  0.038 | Val Accuracy:  0.587


100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [23:57<00:00,  2.87s/it]


Epochs: 3 | Train Loss:  0.036 | Train Accuracy:  0.596 | Val Loss:  0.034 | Val Accuracy:  0.618


100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [24:03<00:00,  2.89s/it]


Epochs: 4 | Train Loss:  0.030 | Train Accuracy:  0.669 | Val Loss:  0.026 | Val Accuracy:  0.734


100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [24:02<00:00,  2.89s/it]


Epochs: 5 | Train Loss:  0.021 | Train Accuracy:  0.792 | Val Loss:  0.019 | Val Accuracy:  0.826


100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [23:53<00:00,  2.87s/it]


Epochs: 6 | Train Loss:  0.014 | Train Accuracy:  0.873 | Val Loss:  0.014 | Val Accuracy:  0.866


100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [23:56<00:00,  2.87s/it]


Epochs: 7 | Train Loss:  0.011 | Train Accuracy:  0.898 | Val Loss:  0.012 | Val Accuracy:  0.882


100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [23:46<00:00,  2.85s/it]


Epochs: 8 | Train Loss:  0.009 | Train Accuracy:  0.911 | Val Loss:  0.011 | Val Accuracy:  0.883


100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [23:51<00:00,  2.86s/it]


Epochs: 9 | Train Loss:  0.008 | Train Accuracy:  0.921 | Val Loss:  0.010 | Val Accuracy:  0.886


100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [23:48<00:00,  2.86s/it]


Epochs: 10 | Train Loss:  0.006 | Train Accuracy:  0.933 | Val Loss:  0.009 | Val Accuracy:  0.906


100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [23:48<00:00,  2.86s/it]


Epochs: 11 | Train Loss:  0.005 | Train Accuracy:  0.951 | Val Loss:  0.008 | Val Accuracy:  0.918


100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [23:53<00:00,  2.87s/it]


Epochs: 12 | Train Loss:  0.004 | Train Accuracy:  0.959 | Val Loss:  0.007 | Val Accuracy:  0.917


In [17]:
test_loss_all = 0
test_acc_all = 0
with torch.no_grad():
    for test_input, test_mask, test_label in test_dataloader:
        output = model(test_input, test_mask)

        batch_loss = criterion(output, test_label)
        test_loss_all += batch_loss.item()
                    
        acc = (output.argmax(dim=1) == test_label).sum().item()
        test_acc_all += acc
            
print(f'Test Loss: {test_loss_all / len(test_data): .3f} | Test Accuracy: {test_acc_all / len(test_data): .3f}')

Test Loss:  0.008 | Test Accuracy:  0.912


In [23]:
#save weights
torch.save(model.state_dict(), model_name + '_weights_12epoches.pth')