# ** Sentiment Analysis Using Fully Connected Feed Forward Neural Network **


## **Load Data**

In [1]:
# Import libraries Pandas and Numpy
import pandas as pd
import numpy as np

In [2]:
# Read the training data
fname = 'facebook_comments.csv'
df_train = pd.read_csv(fname, header = None, names = ['text','sentiment'],encoding = 'iso-8859-1',lineterminator = '\n')

In [3]:
# Print the first 5 records in the dataset
df_train.head()

Unnamed: 0,text,sentiment
0,Heres a single to add to Kindle. Just read t...,neutral
1,If you tire of Non-Fiction.. Check out http://...,neutral
2,Ghost of Round Island is supposedly nonfiction.,neutral
3,Why is Barnes and Nobles version of the Kindle...,negative
4,@Maria: Do you mean the Nook? Be careful bo...,positive


In [4]:
# Get texts and labels 
sent = {'positive':2,'neutral':1,'negative':0}
df_train['labels'] = df_train['sentiment'].str.strip().map(sent)
training_texts = df_train.text.values
labels = df_train.labels.values
print(type(training_texts), type(labels))
# Show the first 5 records
df_train.head()

<class 'numpy.ndarray'> <class 'numpy.ndarray'>


Unnamed: 0,text,sentiment,labels
0,Heres a single to add to Kindle. Just read t...,neutral,1
1,If you tire of Non-Fiction.. Check out http://...,neutral,1
2,Ghost of Round Island is supposedly nonfiction.,neutral,1
3,Why is Barnes and Nobles version of the Kindle...,negative,0
4,@Maria: Do you mean the Nook? Be careful bo...,positive,2


## **Preprocess Data** 


In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Tokensize and create a document-feature matrix X and label vector Y
vectorizer = TfidfVectorizer(stop_words = 'english', max_features=500,ngram_range = (1,1))
instances = vectorizer.fit_transform(training_texts)
X = instances.toarray()
Y = labels

# Print out the shape of X and Y 
print(X.shape,',',Y.shape)
print(Y[:10])
print(X[0,:50])

(1999, 500) , (1999,)
[1 1 1 0 2 2 2 0 2 0]
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.28915636 0.         0.         0.
 0.         0.         0.2971592  0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.        ]


## **Traditional Machine Learning Models : Random Forest**

In [6]:
# Using 10-fold cross-validation
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier

kfold = KFold(n_splits = 10, shuffle = True, random_state = 2020)
rf_model = RandomForestClassifier(criterion = 'entropy', max_depth = 2, random_state = 2020)
rf_cvscores = []

# Getting the accuracy 
for train_idx,value_idx in kfold.split(X):
  rf_model.fit(X[train_idx],Y[train_idx])
  accuracy = rf_model.score(X[value_idx],Y[value_idx])
  rf_cvscores.append(accuracy)

print("Random Forest - mean: %.4f%% (std: +/- %.4f%%)" % (np.mean(rf_cvscores)*100, np.std(rf_cvscores)*100))


Random Forest - mean: 64.1332% (std: +/- 2.0919%)


## **Fully Connected Feed Forward Neural Network** 




In [7]:
# Loading the packages

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
from torch.autograd import Variable


In [755]:
# Defining the hyperparameters

epochs = 20
lr = 1e-2
in_dim = X.shape[1]
out_dim = 3
drate = 0.55
batch_size = 12

# Creating Tensors

X_tensor = torch.from_numpy(X)
Y_tensor = torch.from_numpy(Y)

dataset = TensorDataset(X_tensor,Y_tensor)
train_size = int(0.8*len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset,[train_size,val_size])

train_loader = DataLoader(train_dataset, shuffle = True, batch_size = batch_size)
val_loader = DataLoader(val_dataset, shuffle = True, batch_size = batch_size)

In [756]:
# Creating the network

class SentimentNetwork(nn.Module):
  def __init__(self, input_dim, output_dim, dropout_rate):
    super(SentimentNetwork,self).__init__()
    self.fc1 = nn.Linear(in_dim, 1000)
    self.fc2 = nn.Linear(1000, 1000)
    self.fc3 = nn.Linear(1000, out_dim)
    self.do1 = nn.Dropout(dropout_rate)
  
  def forward(self,x):
    x = F.relu(self.fc1(x))
    x = self.do1(x)
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    x = F.log_softmax(x)

    return x

In [757]:
# Training the network

def train(model, train_loader, optimizer, criterion):
  epoch_loss, epoch_acc = 0.0, 0.0;

  model.train()

# Training the model to get accuracy and loss for a batch
  for batch_x, batch_y in train_loader:
    batch_x,batch_y = Variable(batch_x), Variable(batch_y)
    optimizer.zero_grad()
    network_output = model(batch_x.float())
    loss = criterion(network_output,batch_y)
    pred = network_output.max(1)[1]
    correct = pred.eq(batch_y).sum()
    acc = correct.float()/len(pred)*100
# Backpropagating the network 
    loss.backward()
    optimizer.step()

# Total loss and accuracy for the batch 
    epoch_loss += loss.item()
    epoch_acc += acc
  
  num_of_batches = train_size/batch_size
  avg_epoch_acc = epoch_acc/num_of_batches
  avg_epoch_loss = epoch_loss/num_of_batches

# Average epoch loss and accuracy 
  return avg_epoch_loss, avg_epoch_acc


In [758]:
# Evaluating the Network

def evaluate(model, val_loader, criterion):
  epoch_loss, epoch_acc = 0.0, 0.0;

  model.eval()

  with torch.no_grad():

# Evaluating the model to get accuracy and loss for a batch
    for batch_x, batch_y in val_loader:
      batch_x,batch_y = Variable(batch_x), Variable(batch_y)
      network_output = model(batch_x.float())
      loss = criterion(network_output,batch_y)
      pred = network_output.max(1)[1]
      correct = pred.eq(batch_y).sum()
      acc = correct.float()/len(pred)*100

  # Total loss and accuracy for the batch 
      epoch_loss += loss.item()
      epoch_acc += acc
  
  num_of_batches = val_size/batch_size
  avg_epoch_acc = epoch_acc/num_of_batches
  avg_epoch_loss = epoch_loss/num_of_batches

# Average epoch loss and accuracy 
  return avg_epoch_loss, avg_epoch_acc



In [759]:
# Defining Gradient Descent, Learning rate and the Loss Criteria 

network = SentimentNetwork(in_dim,out_dim,drate)
optimizer = torch.optim.SGD(network.parameters(),lr = lr, momentum = 0.9)
criterion = nn.CrossEntropyLoss()


In [760]:
# Run the training and evaluation process
for epoch in range(epochs):
  train_loss, train_acc = train(network, train_loader, optimizer, criterion)
  valid_loss, valid_acc = evaluate(network, val_loader, criterion)
    
  print(f'Epoch: {epoch+1:02}')
  print(f'\tTrain Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}')
  print(f'\t Val. Loss: {valid_loss:.4f} |  Val. Acc: {valid_acc:.4f}')



Epoch: 01
	Train Loss: 0.8732 | Train Acc: 64.4778
	 Val. Loss: 0.8585 |  Val. Acc: 65.0000
Epoch: 02
	Train Loss: 0.8289 | Train Acc: 64.8530
	 Val. Loss: 0.8340 |  Val. Acc: 64.0000
Epoch: 03
	Train Loss: 0.7863 | Train Acc: 64.4778
	 Val. Loss: 0.7496 |  Val. Acc: 65.2500
Epoch: 04
	Train Loss: 0.6860 | Train Acc: 68.5428
	 Val. Loss: 0.6327 |  Val. Acc: 81.5000
Epoch: 05
	Train Loss: 0.5765 | Train Acc: 78.5491
	 Val. Loss: 0.5564 |  Val. Acc: 80.0000
Epoch: 06
	Train Loss: 0.5176 | Train Acc: 80.9256
	 Val. Loss: 0.5058 |  Val. Acc: 82.0000
Epoch: 07
	Train Loss: 0.4752 | Train Acc: 82.7392
	 Val. Loss: 0.4812 |  Val. Acc: 82.5000
Epoch: 08
	Train Loss: 0.4474 | Train Acc: 84.2402
	 Val. Loss: 0.4349 |  Val. Acc: 85.7500
Epoch: 09
	Train Loss: 0.4148 | Train Acc: 84.9906
	 Val. Loss: 0.4217 |  Val. Acc: 87.2500
Epoch: 10
	Train Loss: 0.4031 | Train Acc: 85.8662
	 Val. Loss: 0.4647 |  Val. Acc: 82.7500
Epoch: 11
	Train Loss: 0.3655 | Train Acc: 86.9919
	 Val. Loss: 0.4209 |  Val. A

# Results
##### The training accuracy is 94% and the validation accuracy is 92%.