In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split

## Load the Dataset

In [40]:
url1 = "https://raw.githubusercontent.com/clairett/pytorch-sentiment-classification/master/data/SST2/train.tsv"
url2 = "https://raw.githubusercontent.com/clairett/pytorch-sentiment-classification/master/data/SST2/test.tsv"

train = pd.read_csv(url1, sep="\t")
test = pd.read_csv(url2, sep="\t")

In [41]:
train.columns = ["sentence", "label"]
test.columns = ["sentence", "label"]

In [45]:
train

Unnamed: 0,sentence,label
0,apparently reassembled from the cutting room f...,0
1,they presume their audience wo n't sit still f...,0
2,this is a visually stunning rumination on love...,1
3,jonathan parker 's bartleby should have been t...,1
4,campanella gets the tone just right funny in t...,1
...,...,...
6914,"painful , horrifying and oppressively tragic ,...",1
6915,take care is nicely performed by a quintet of ...,0
6916,"the script covers huge , heavy topics in a bla...",0
6917,a seriously bad film with seriously warped log...,0


In [48]:
X_train, X_val, y_train, y_val = train_test_split(train["sentence"], train["label"], test_size=0.2, random_state=42)

## MLP Model

In [49]:
import torch
from torch import nn

In [50]:
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(MLP, self).__init__()
        
        self.model = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(64, output_dim)
        )

    def forward(self, x):
        return self.model(x)


In [51]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLP(10000, 2).to(device)

In [52]:
from torchinfo import summary

summary(model, input_size=(64, 10000))  # (batch_size, input_dim)

Layer (type:depth-idx)                   Output Shape              Param #
MLP                                      [64, 2]                   --
├─Sequential: 1-1                        [64, 2]                   --
│    └─Linear: 2-1                       [64, 512]                 5,120,512
│    └─ReLU: 2-2                         [64, 512]                 --
│    └─Dropout: 2-3                      [64, 512]                 --
│    └─Linear: 2-4                       [64, 256]                 131,328
│    └─ReLU: 2-5                         [64, 256]                 --
│    └─Dropout: 2-6                      [64, 256]                 --
│    └─Linear: 2-7                       [64, 128]                 32,896
│    └─ReLU: 2-8                         [64, 128]                 --
│    └─Dropout: 2-9                      [64, 128]                 --
│    └─Linear: 2-10                      [64, 64]                  8,256
│    └─ReLU: 2-11                        [64, 64]                 

In [56]:
from sklearn.feature_extraction.text import TfidfVectorizer
from torch.utils.data import TensorDataset, DataLoader

In [57]:

# Vectorize the text using TF-IDF
vectorizer = TfidfVectorizer(max_features=10000)  # 300 input_dim for MLP
X_train_vec = vectorizer.fit_transform(X_train).toarray()
X_val_vec = vectorizer.transform(X_val).toarray()

# Convert to tensors
X_train_tensor = torch.tensor(X_train_vec, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val_vec, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)

# Datasets and loaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)


## Training the model

In [None]:
import torch.optim as optim

# Hyperparameters
input_dim = 10000  # update this if using a different embedding size
output_dim = 2
num_epochs = 10
batch_size = 64
learning_rate = 0.001


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Trackers
best_val_acc = 0.0
train_losses, val_losses, val_accuracies = [], [], []

# Training loop
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_losses.append(train_loss / len(train_loader))

    # Validation
    model.eval()
    correct = 0
    total = 0
    val_loss = 0.0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)

            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    avg_val_loss = val_loss / len(val_loader)
    val_acc = correct / total
    val_losses.append(avg_val_loss)
    val_accuracies.append(val_acc)

    print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_losses[-1]:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.4f}")

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "checkpoint.pt")




Epoch 1/10 | Train Loss: 0.6249 | Val Loss: 0.4595 | Val Acc: 0.7919
Epoch 2/10 | Train Loss: 0.2649 | Val Loss: 0.5083 | Val Acc: 0.7731
Epoch 3/10 | Train Loss: 0.0772 | Val Loss: 0.7651 | Val Acc: 0.7717
Epoch 4/10 | Train Loss: 0.0216 | Val Loss: 1.0206 | Val Acc: 0.7645
Epoch 5/10 | Train Loss: 0.0077 | Val Loss: 1.3451 | Val Acc: 0.7652
Epoch 6/10 | Train Loss: 0.0034 | Val Loss: 1.6403 | Val Acc: 0.7702


In [None]:
# Plotting
import matplotlib.pyplot as plt
epochs = range(1, num_epochs + 1)

plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, val_accuracies, marker='o', label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Validation Accuracy')
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(epochs, val_losses, marker='o', color='red', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Validation Loss')
plt.grid(True)

plt.tight_layout()
plt.show()