In [134]:
#important libraries
import re
import requests
from collections import defaultdict, Counter
import math
import pandas as pd
import numpy as np
import string
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, precision_recall_fscore_support


##Question 3: Implementing an Artificial Neural Network for Sentiment Classification [40 points]

In [135]:
#URLs for the datasets
train_url = "https://raw.githubusercontent.com/Kushal-Chandani/NLP-Homeworks/main/Homework2/Data/Question3/sentiment_train_dataset.csv"
test_url = "https://raw.githubusercontent.com/Kushal-Chandani/NLP-Homeworks/main/Homework2/Data/Question3/sentiment_test_dataset.csv"

#Loading the datasets
train_data = pd.read_csv(train_url)
test_data = pd.read_csv(test_url)

train_data

Unnamed: 0,sentence,label
0,word_216 word_272 word_22 word_56 word_247 wor...,1
1,word_45 word_149 word_130 word_273 word_151 wo...,1
2,word_45 word_265 word_246 word_206 word_1 word...,0
3,word_7 word_260 word_81 word_80 word_121 word_...,0
4,word_148 word_24 word_265 word_137 word_190 wo...,1
...,...,...
4995,word_247 word_69 word_191 word_60 word_205,0
4996,word_292 word_236 word_60 word_249 word_249 wo...,1
4997,word_231 word_120 word_32 word_135 word_47 wor...,0
4998,word_5 word_118 word_23 word_200 word_104 word...,1


In [136]:
test_data

Unnamed: 0,sentence,label
0,word_29 word_296 word_265 word_92 word_299 wor...,1
1,word_121 word_299 word_74 word_44 word_155,0
2,word_271 word_15 word_232 word_93 word_193 wor...,1
3,word_52 word_196 word_236 word_229 word_107 wo...,0
4,word_287 word_144 word_118 word_175 word_153 w...,1
...,...,...
495,word_195 word_263 word_208 word_128 word_86 wo...,0
496,word_204 word_97 word_243 word_171 word_70 wor...,0
497,word_170 word_209 word_97 word_186 word_298,1
498,word_95 word_41 word_133 word_222 word_274 wor...,1


In [137]:
#Function for text preprocessing: cleaning, lowercasing, and tokenization
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    tokens = text.split()
    return tokens

#Applying the preprocessing function to both datasets
train_data['tokens'] = train_data['sentence'].apply(preprocess_text)
test_data['tokens'] = test_data['sentence'].apply(preprocess_text)

#Building the vocabulary from the training data
vocabulary = Counter([word for sentence in train_data['tokens'] for word in sentence])
vocab_to_index = {word: idx for idx, (word, _) in enumerate(vocabulary.items())}

In [138]:
print(vocabulary)

Counter({'word_149': 205, 'word_77': 202, 'word_238': 194, 'word_61': 192, 'word_97': 192, 'word_45': 191, 'word_226': 191, 'word_156': 190, 'word_273': 188, 'word_133': 188, 'word_52': 188, 'word_175': 187, 'word_29': 187, 'word_178': 187, 'word_63': 187, 'word_121': 186, 'word_32': 186, 'word_184': 186, 'word_161': 186, 'word_115': 185, 'word_30': 185, 'word_259': 185, 'word_198': 184, 'word_55': 184, 'word_73': 184, 'word_92': 184, 'word_64': 184, 'word_87': 184, 'word_48': 184, 'word_290': 183, 'word_245': 183, 'word_262': 183, 'word_58': 183, 'word_119': 183, 'word_298': 182, 'word_94': 181, 'word_93': 181, 'word_292': 181, 'word_170': 181, 'word_158': 181, 'word_251': 181, 'word_31': 180, 'word_53': 180, 'word_169': 180, 'word_283': 180, 'word_143': 180, 'word_269': 180, 'word_287': 180, 'word_17': 180, 'word_54': 180, 'word_247': 179, 'word_81': 179, 'word_217': 179, 'word_162': 179, 'word_231': 179, 'word_197': 179, 'word_74': 178, 'word_35': 178, 'word_46': 178, 'word_9': 178,

In [139]:
print(vocab_to_index)

{'word_216': 0, 'word_272': 1, 'word_22': 2, 'word_56': 3, 'word_247': 4, 'word_123': 5, 'word_75': 6, 'word_274': 7, 'word_37': 8, 'word_271': 9, 'word_244': 10, 'word_45': 11, 'word_149': 12, 'word_130': 13, 'word_273': 14, 'word_151': 15, 'word_12': 16, 'word_175': 17, 'word_104': 18, 'word_116': 19, 'word_103': 20, 'word_115': 21, 'word_31': 22, 'word_78': 23, 'word_265': 24, 'word_246': 25, 'word_206': 26, 'word_1': 27, 'word_53': 28, 'word_150': 29, 'word_3': 30, 'word_253': 31, 'word_8': 32, 'word_18': 33, 'word_27': 34, 'word_114': 35, 'word_7': 36, 'word_260': 37, 'word_81': 38, 'word_80': 39, 'word_121': 40, 'word_204': 41, 'word_252': 42, 'word_169': 43, 'word_25': 44, 'word_200': 45, 'word_67': 46, 'word_201': 47, 'word_148': 48, 'word_24': 49, 'word_137': 50, 'word_190': 51, 'word_32': 52, 'word_179': 53, 'word_79': 54, 'word_135': 55, 'word_167': 56, 'word_294': 57, 'word_43': 58, 'word_232': 59, 'word_256': 60, 'word_203': 61, 'word_84': 62, 'word_74': 63, 'word_35': 64,

In [140]:
#Function to convert a tokenized sentence into a one-hot encoded vector
def one_hot_encode(tokens, vocab_size, vocab_to_index):
    vector = np.zeros(vocab_size)
    for token in tokens:
        if token in vocab_to_index:
            index = vocab_to_index[token]
            vector[index] = 1
    return vector

#Total size of the vocabulary
vocab_size = len(vocab_to_index)

#Applying one-hot encoding to the training and test data
train_data['one_hot'] = train_data['tokens'].apply(lambda x: one_hot_encode(x, vocab_size, vocab_to_index))
test_data['one_hot'] = test_data['tokens'].apply(lambda x: one_hot_encode(x, vocab_size, vocab_to_index))

In [141]:
print(vocab_size)

300


In [142]:
import numpy as np

#Initializing weights and biases
def initialize_parameters(input_size, hidden_size, output_size=1):
    parameters = {
        "W1": np.random.randn(input_size, hidden_size),
        "b1": np.zeros((1, hidden_size)),
        "W2": np.random.randn(hidden_size, output_size),
        "b2": np.zeros((1, output_size))
    }
    return parameters

#Activation functions: Sigmoid and ReLU
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(z):
    return z * (1 - z)

def relu(z):
    return np.maximum(0, z)

def relu_derivative(z):
    return np.where(z > 0, 1, 0)


In [143]:
#Forward propagation
def forward_propagation(X, parameters):
    Z1 = np.dot(X, parameters["W1"]) + parameters["b1"]
    A1 = relu(Z1)
    Z2 = np.dot(A1, parameters["W2"]) + parameters["b2"]
    A2 = sigmoid(Z2)
    cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}
    return A2, cache

#Back propagation
def backward_propagation(X, y, cache, parameters, learning_rate=0.01):
    output = cache["A2"]
    A1 = cache["A1"]

    error_output = output - y.reshape(-1, 1)
    dW2 = np.dot(A1.T, error_output * sigmoid_derivative(output))
    db2 = np.sum(error_output * sigmoid_derivative(output), axis=0, keepdims=True)

    error_hidden = np.dot(error_output * sigmoid_derivative(output), parameters["W2"].T)
    dW1 = np.dot(X.T, error_hidden * relu_derivative(A1))
    db1 = np.sum(error_hidden * relu_derivative(A1), axis=0, keepdims=True)

    # Update weights and biases
    parameters["W2"] -= learning_rate * dW2
    parameters["b2"] -= learning_rate * db2
    parameters["W1"] -= learning_rate * dW1
    parameters["b1"] -= learning_rate * db1

In [144]:
#Training the network
def train(X, y, input_size, hidden_size, output_size=1, learning_rate=0.01, epochs=20):
    parameters = initialize_parameters(input_size, hidden_size, output_size)
    for epoch in range(epochs):
        output, cache = forward_propagation(X, parameters)
        backward_propagation(X, y, cache, parameters, learning_rate)
        if (epoch + 1) % 1 == 0:
            loss = np.mean((output - y.reshape(-1, 1))**2)
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")
    return parameters

#Predicting function
def predict(X, parameters):
    output, _ = forward_propagation(X, parameters)
    return (output > 0.5).astype(int)

In [145]:
#Converting training data to numpy array
X_train = np.array(list(train_data['one_hot']))
y_train = np.array(train_data['label'])

#Initializing the ANN parameters
input_size = vocab_size
hidden_size = 128
output_size = 1
learning_rate = 0.01
epochs = 20

#Training the ANN using function-based approach
parameters = train(X_train,
                   y_train,
                   input_size=input_size,
                   hidden_size=hidden_size,
                   output_size=output_size,
                   learning_rate=learning_rate,
                   epochs=epochs)


Epoch 1/20, Loss: 0.4823
Epoch 2/20, Loss: 0.4782
Epoch 3/20, Loss: 0.4691
Epoch 4/20, Loss: 0.4848
Epoch 5/20, Loss: 0.4691
Epoch 6/20, Loss: 0.4707
Epoch 7/20, Loss: 0.4945
Epoch 8/20, Loss: 0.4915
Epoch 9/20, Loss: 0.4838
Epoch 10/20, Loss: 0.4881
Epoch 11/20, Loss: 0.4797
Epoch 12/20, Loss: 0.4815
Epoch 13/20, Loss: 0.4705
Epoch 14/20, Loss: 0.4768
Epoch 15/20, Loss: 0.4633
Epoch 16/20, Loss: 0.4574
Epoch 17/20, Loss: 0.4669
Epoch 18/20, Loss: 0.4511
Epoch 19/20, Loss: 0.4503
Epoch 20/20, Loss: 0.4375


In [146]:
#Converting test data to numpy array
X_test = np.array(list(test_data['one_hot']))
y_test = np.array(test_data['label'])

#Making predictions on the test set
y_pred = predict(X_test, parameters)

#Calculating accuracy
accuracy = accuracy_score(y_test, y_pred)

#Generating confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

#Calculating precision, recall, and F1-score for both classes separately (positive and negative)
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred, average=None)

#printing results
print(f"Accuracy: {accuracy:.3f}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Precision (Negative, Positive): {precision[0]:.3f}, {precision[1]:.3f}")
print(f"Recall (Negative, Positive): {recall[0]:.3f}, {recall[1]:.3f}")
print(f"F1-Score (Negative, Positive): {f1_score[0]:.3f}, {f1_score[1]:.3f}")

Accuracy: 0.508
Confusion Matrix:
[[130 119]
 [127 124]]
Precision (Negative, Positive): 0.506, 0.510
Recall (Negative, Positive): 0.522, 0.494
F1-Score (Negative, Positive): 0.514, 0.502
