In [1]:
#import necessary libraries/dependicies 
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from torch.utils.data import DataLoader, Dataset


data = pd.read_csv('./IMDB Dataset.csv')
data.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [2]:
#train-test split

x_train, x_test, y_train, y_test  = train_test_split(data['review'], data['sentiment'], test_size=0.2, random_state=42)

In [4]:
#Vectorization of data

vectorizer = TfidfVectorizer()
x_train_vectorized = vectorizer.fit_transform(x_train)
x_test_vectorized = vectorizer.fit_transform(x_test)

In [11]:
#Create Dataset class

class SentimentDataset(Dataset):
    def __init__(self, x, y):
        self.x = x 
        self.y = y 
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        # Convert sparse matrix to dense format
        x_tensor = torch.FloatTensor(self.x[idx].toarray()[0])  # Ensure this is a float array
        y_label = 1 if self.y.iloc[idx] == 'spam' else 0  # Convert to numerical (0, 1)
        y_tensor = torch.tensor(y_label, dtype=torch.float)
        
        return x_tensor, y_tensor

#Dataloader   
train_dataset = SentimentDataset(x_train_vectorized, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [15]:
#Create model

class SentimentClassifier(nn.Module):
    def __init__(self, input_dim):
        super(SentimentClassifier, self).__init__()
        self.linear1 = nn.Linear(input_dim, 50)
        self.linear2 = nn.Linear(50, 1)
        self.activation = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.linear1(x))          # Apply ReLU activation
        x = self.linear2(x)                       # Second linear layer
        x = self.activation(x)                    # Sigmoid activation
        return x
    

In [16]:
#Train the model

input_dim = x_train_vectorized.shape[1]
model = SentimentClassifier(input_dim)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):  # Number of epochs
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/10], Loss: {loss.item():.4f}')

Epoch [1/10], Loss: 0.0050
Epoch [2/10], Loss: 0.0025
Epoch [3/10], Loss: 0.0006
Epoch [4/10], Loss: 0.0010
Epoch [5/10], Loss: 0.0002
Epoch [6/10], Loss: 0.0002
Epoch [7/10], Loss: 0.0001
Epoch [8/10], Loss: 0.0002
Epoch [9/10], Loss: 0.0008
Epoch [10/10], Loss: 0.0000
