In [5]:
import pickle
import torch
import numpy as np
from torch.nn.utils.rnn import pad_sequence

import torch.nn as nn

import torch.optim as optim

In [6]:
torch.manual_seed(42)

<torch._C.Generator at 0x795fbc8d34b0>

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device: {device}")

Using device: cuda


In [8]:

with open(r"/content/preprocessed_data.pkl","rb") as file :
     preprocessed_data = pickle.load(file)


Vocabulary = preprocessed_data["Vocabulary"]

lemm_tok_sent = preprocessed_data["lem_tokens_sent"]

labels = preprocessed_data["labels"]

In [9]:
# We need to create a look up dictionary for each word in vocabulary

word_dict = {}

for i,word in enumerate(Vocabulary):
    word_dict[word] = i + 1


In [10]:
# Processed the raw tokens of sentence to  numerical vectors
processed_sentences =  [[word_dict[word.lower()] for word in sentence] for sentence in lemm_tok_sent]


In [11]:
tensor_lst = [torch.tensor(sent) for sent in processed_sentences]

pad_sq = pad_sequence(tensor_lst,batch_first = True,padding_value = 0)

In [12]:
labels = torch.tensor(labels)

class_labels = torch.tensor([(label.item() + 1)  for label in labels],dtype = torch.long)

In [13]:

# Split train and test data
Batch_Size = 60

Dataset = torch.utils.data.TensorDataset(pad_sq,class_labels)

train_data,test_data = torch.utils.data.random_split(Dataset,[0.7,0.3])

train_data_loader = torch.utils.data.DataLoader(train_data,batch_size = Batch_Size,shuffle = True)

test_data_loader = torch.utils.data.DataLoader(test_data,batch_size= Batch_Size,shuffle = False)

In [14]:
print(len(Vocabulary))

7162


In [15]:
# Building the model

class SentimentAnalyser (nn.Module):

    def __init__(self,vocab_size,embedd_size,hidden_size,output_size) :

        super().__init__()

        self.embedding = nn.Embedding(vocab_size,embedd_size)

        self.lstm = nn.LSTM(embedd_size,hidden_size,batch_first = True)

        self.fc = nn.Linear(hidden_size,output_size)



    def forward(self,input_seq):
        # print(input_seq)
        embedding = self.embedding(input_seq)

        output, (hidden,cell_state)  = self.lstm(embedding)

        final_out = self.fc(hidden[-1,:,:])


        return final_out



In [16]:
vocab_size = len(Vocabulary) + 1

embedd_size = 50

hidden_size = 128

output_size = 3 # positve , negative, neutral

model = SentimentAnalyser(vocab_size,embedd_size,hidden_size,output_size).to(device)

In [17]:
criterion = nn.CrossEntropyLoss()

criterion.to(device)

optimizer = optim.Adam(model.parameters(),lr= 0.0071)


In [18]:
num_epochs = 20

In [19]:
# Train Loop

for epoch in range(num_epochs) :
    total_loss = 0
    count = 0
    for batch_inputs , batch_labels in train_data_loader :
         batch_inputs = batch_inputs.to(device)
         batch_labels = batch_labels.to(device)

         predict_labels = model(batch_inputs)

         loss = criterion(predict_labels,batch_labels)

         optimizer.zero_grad()

         total_loss += (loss.detach()).item()
         count+=1
         loss.backward()

         optimizer.step()
    print("Total loss incurred after Epoch {} is {}".format(epoch+1,(total_loss/count)))


Total loss incurred after Epoch 1 is 0.9193855058856126
Total loss incurred after Epoch 2 is 0.9077053855105144
Total loss incurred after Epoch 3 is 0.9081587442537633
Total loss incurred after Epoch 4 is 0.8904269861011971
Total loss incurred after Epoch 5 is 0.7925637611528722
Total loss incurred after Epoch 6 is 0.644454390537448
Total loss incurred after Epoch 7 is 0.5000706816591868
Total loss incurred after Epoch 8 is 0.410259278809152
Total loss incurred after Epoch 9 is 0.3553124698923855
Total loss incurred after Epoch 10 is 0.32486729251175395
Total loss incurred after Epoch 11 is 0.29870131466446853
Total loss incurred after Epoch 12 is 0.2850610061389644
Total loss incurred after Epoch 13 is 0.2921697613669605
Total loss incurred after Epoch 14 is 0.2947046782185392
Total loss incurred after Epoch 15 is 0.2736269457311165
Total loss incurred after Epoch 16 is 0.26069060949290673
Total loss incurred after Epoch 17 is 0.25165518246045926
Total loss incurred after Epoch 18 is 

In [20]:
# Test loop
correct_cls = 0
avg_loss = 0
total_avg_loss = 0
with torch.no_grad() :
  model.eval()

  for test_inputs,test_labels in test_data_loader :
     test_inputs = test_inputs.to(device)
     test_labels = test_labels.to(device)
    #  print(test_inputs.shape)
     pred_labels = model(test_inputs)

     _,pred_class = torch.max(pred_labels,1)

     correct_cls += ((pred_class == test_labels).sum()).item()

     loss = (criterion(pred_labels,test_labels))

     total_loss += loss.item()

  avg_loss = (total_loss)/(Batch_Size)

  print("Total loss incurred ",avg_loss)

  print("Accuracy ",(correct_cls)*100/len(test_data))

Total loss incurred  0.4610144879668951
Accuracy  73.6231884057971
