## Importing Modules

In [1]:
import numpy as np
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

## Loading Dataset and Preprocessing Sentences

In [2]:
# Load data
df = pd.read_csv("/home/shakeer/Downloads/data.csv")

# Map sentiments to integers
sentiment = {'negative': 0, 'neutral': 1, 'positive': 2}
df['Sentiment'] = df['Sentiment'].map(sentiment)

# Preprocess text
def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = re.sub(r'\W+', ' ', text)  # Remove special characters
    return text

df['Sentence'] = df['Sentence'].apply(preprocess_text)

# Create vocabulary
vocabulary = {}
for sentence in df['Sentence']:
    for word in sentence.split():
        if word not in vocabulary:
            vocabulary[word] = len(vocabulary)

# Convert sentences to vectors
def sentence_to_vector(sentence, vocab):
    vector = np.zeros(len(vocab))
    for word in sentence.split():
        if word in vocab:
            vector[vocab[word]] += 1
    return vector

X = np.array([sentence_to_vector(sentence, vocabulary) for sentence in df['Sentence']])
y = df['Sentiment'].values


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Model for Classification

In [3]:
class SimpleNN:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.0001):
        self.weights1 = np.random.randn(input_size, hidden_size)
        self.bias1 = np.zeros((1, hidden_size))
        self.weights2 = np.random.randn(hidden_size, output_size)
        self.bias2 = np.zeros((1, output_size))
        self.learning_rate = learning_rate

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def softmax(self, z):
        exp_scores = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    def forward(self, X):
        self.z1 = np.dot(X, self.weights1) + self.bias1
        self.a1 = self.sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.weights2) + self.bias2
        self.a2 = self.softmax(self.z2)
        return self.a2

    def backward(self, X, y, output):
        m = X.shape[0]
        delta2 = output - y
        delta1 = np.dot(delta2, self.weights2.T) * (self.a1 * (1 - self.a1))

        self.weights2 -= self.learning_rate * np.dot(self.a1.T, delta2) / m
        self.bias2 -= self.learning_rate * np.sum(delta2, axis=0, keepdims=True) / m
        self.weights1 -= self.learning_rate * np.dot(X.T, delta1) / m
        self.bias1 -= self.learning_rate * np.sum(delta1, axis=0, keepdims=True) / m

    def train(self, X, y, epochs=1000):
        y_encoded = np.eye(np.max(y) + 1)[y]
        for epoch in range(epochs):
            output = self.forward(X)
            self.backward(X, y_encoded, output)
            if epoch % 100 == 0:
                loss = -np.mean(np.sum(y_encoded * np.log(output + 1e-9), axis=1))
                print(f'Epoch {epoch}, Loss: {loss}')

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)


## Model training and prediction

In [6]:
# Initialize model parameters
input_size = X_train.shape[1]
hidden_size = 20# no of hidden layers.
output_size = 3

# Create and train model
model = SimpleNN(input_size, hidden_size, output_size, learning_rate=0.0001)
model.train(X_train, y_train, epochs=2000)

# Predict and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Epoch 0, Loss: 1.7219829596923204
Epoch 100, Loss: 1.7188271126106538
Epoch 200, Loss: 1.7157288122452423
Epoch 300, Loss: 1.7126870457134338
Epoch 400, Loss: 1.7097008082703484
Epoch 500, Loss: 1.7067691035616206
Epoch 600, Loss: 1.7038909438707834
Epoch 700, Loss: 1.7010653503604838
Epoch 800, Loss: 1.6982913533067554
Epoch 900, Loss: 1.6955679923256812
Epoch 1000, Loss: 1.6928943165918344
Epoch 1100, Loss: 1.6902693850479806
Epoch 1200, Loss: 1.6876922666055572
Epoch 1300, Loss: 1.6851620403355485
Epoch 1400, Loss: 1.682677795649407
Epoch 1500, Loss: 1.6802386324697423
Epoch 1600, Loss: 1.6778436613905552
Epoch 1700, Loss: 1.6754920038268317
Epoch 1800, Loss: 1.6731827921533793
Epoch 1900, Loss: 1.670915169832818
Accuracy: 0.40718562874251496
