<a href="https://colab.research.google.com/github/AzitaKalantar/NLP-Projects/blob/main/Sentiment%20and%20Emotion%20Detector%20using%20ISEAR%20dataset/version2_tf_idf_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Building an Emotion Classifier Model

Extracting Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
train_data = pd.read_csv('drive/My Drive/Data/ISEAR/data_train.csv')
test_data = pd.read_csv('drive/My Drive/Data/ISEAR/data_test.csv')

In [None]:
train_data.columns = ["Emotion","Text"]
test_data.columns = ["Emotion","Text"]
print(train_data.head())
print(test_data.head())

   Emotion                                               Text
0  neutral   There are tons of other paintings that I thin...
1  sadness  Yet the dog had grown old and less capable , a...
2     fear  When I get into the tube or the train without ...
3     fear  This last may be a source of considerable disq...
4    anger  She disliked the intimacy he showed towards so...
   Emotion                                               Text
0  sadness  I experienced this emotion when my grandfather...
1  neutral   when I first moved in , I walked everywhere ....
2    anger  ` Oh ! " she bleated , her voice high and rath...
3     fear  However , does the right hon. Gentleman recogn...
4  sadness  My boyfriend didn't turn up after promising th...


Data Cleaning and Preprocessing

In [None]:
from nltk.corpus import stopwords
from textblob import TextBlob
from nltk.stem import PorterStemmer

In [None]:
def preprocess_data(data):
  #make the text lower case
  data['Text'] = data['Text'].apply(lambda a: " ".join(a.lower() for a in a.split()))
  #remove non-word characters (^\w) or white space characters (\s)
  data['Text'] = data['Text'].apply(lambda a: " ".join(a.replace('[^\w\s]','') for a in a.split()))
  #remove stop words
  stop = stopwords.words('english')
  data['Text'] = data['Text'].apply(lambda a: " ".join(a for a in a.split() if a not in stop))
  #correct spelling
  data['Text'] = data['Text'].apply(lambda a: str(TextBlob(a).correct()))
  #do stemming
  st = PorterStemmer()
  data['Text'] =  data['Text'].apply(lambda a: " ".join([st.stem(word) for word in a.split()]))
  return data


train_data = preprocess_data(train_data)
test_data = preprocess_data(test_data)
print(train_data.head())
print(test_data.head())

Unnamed: 0,Emotion,Text
0,neutral,ton paint think better .
1,sadness,"yet dog grown old less capabl , one day gilli ..."
2,fear,get tube train without pay ticket.
3,fear,last may sourc consider disquiet one might fir...
4,anger,"dislik intimaci show toward , resent memori sh..."


In [None]:
train_data.to_csv("drive/My Drive/Data/ISEAR/pre_processsed_data_train.csv")
test_data.to_csv("drive/My Drive/Data/ISEAR/pre_processsed_data_test.csv")

In [None]:
import pandas as pd
train_data = pd.read_csv('drive/My Drive/Data/ISEAR/pre_processsed_data_train.csv')
test_data = pd.read_csv('drive/My Drive/Data/ISEAR/pre_processsed_data_test.csv')

In [None]:
from sklearn import preprocessing,metrics,linear_model

In [None]:
object = preprocessing.LabelEncoder()
ytrain = object.fit_transform(train_data['Emotion'])
ytest = object.fit_transform(test_data['Emotion'])

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
tv = TfidfVectorizer()
tv.fit(train_data.Text)
tv_xtrain =  tv.transform(train_data.Text)
tv_xtest =  tv.transform(test_data.Text)

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim

In [None]:
class Dataset(Dataset):
    def __init__(self,x_train,y_train):
        self.x = torch.Tensor(x_train)
        self.y = torch.from_numpy(y_train)

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [None]:
class Classifier(nn.Module):
    def __init__(self,in_features,num_classes):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(in_features=in_features,out_features=in_features)
        self.fc2 = nn.Linear(in_features=in_features,out_features=num_classes)

    def forward(self, x,apply_softmax=False):
        y_out = F.relu(self.fc1(x))
        y_out = self.fc2(y_out)
        if apply_softmax:
            y_out = F.softmax(y_out, dim=1)
        return y_out

In [None]:
modelPred=model.predict(tv_xtrain)
print(metrics.accuracy_score(modelPred, ytrain))

0.8531635996975044


In [None]:
tv_xtrain = tv_xtrain.toarray()
tv_xtest = tv_xtest.toarray()

In [None]:
torch.from_numpy(ytrain)

tensor([3, 4, 1,  ..., 4, 1, 0])

In [None]:
train_dataset = Dataset(tv_xtrain,ytrain)
test_dataset = Dataset(tv_xtest,ytest)
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True,drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=True,drop_last=True)

In [None]:
tv_xtrain.shape

(7934, 7408)

In [None]:
classifier = Classifier(7408,7)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(classifier.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                           mode='min', factor=0.5,
                                           patience=1)

In [None]:
def compute_accuracy(y_pred, y_target):
    _, y_pred_indices = y_pred.max(dim=1)
    n_correct = torch.eq(y_pred_indices, y_target).sum().item()
    return n_correct / len(y_pred_indices) * 100

In [None]:
train_loss= []
train_acc = []
num_epochs = 2
for epoch in range(num_epochs):
  running_loss = 0.0
  running_acc = 0.0
  classifier.train()
  for x,y in train_dataloader:
    #zero the gradients
    optimizer.zero_grad()

    #compute the output
    y_pred = classifier(x)

    #compute the loss
    loss = loss_func(y_pred, y)

    #use loss to produce gradients
    loss.backward()

    #use optimizer to take gradient step
    optimizer.step()
    
    # compute the accuracy
    acc_t = compute_accuracy(y_pred, y)
    

In [None]:
acc_t

84.375

In [None]:
running_loss = 0.0
running_acc = 0.0
classifier.eval()

for batch_index, (x,y) in enumerate(test_dataloader):
  # compute the output
  y_pred =  classifier(x)
  
  # compute the loss
  loss = loss_func(y_pred, y)
  loss_t = loss.item()
  running_loss += (loss_t - running_loss) / (batch_index + 1)

  # compute the accuracy
  acc_t = compute_accuracy(y_pred, y)
  running_acc += (acc_t - running_acc) / (batch_index + 1)

In [None]:
running_acc

67.96875