In [1]:
# Toy Dataset
data = [
    ("I love this product", 1),
    ("This is the worst thing ever", 0),
    ("I am so happy with this purchase", 1),
    ("This is not what I expected", 0),
    ("Absolutely fantastic!", 1),
    ("Terrible experience", 0),
    ("I will never buy this again", 0),
    ("Best decision ever", 1),
    ("I regret buying this", 0),
    ("Such a great quality", 1)
]

# Create two categories: 1 for positive sentiment, 0 for negative sentiment


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import numpy as np


In [3]:
from collections import Counter

# Tokenization and Vocabulary Building
def tokenize_sentence(sentence):
    return sentence.lower().split()

# Build vocabulary
vocab = Counter()
for sentence, label in data:
    tokens = tokenize_sentence(sentence)
    vocab.update(tokens)

# Create a word to index mapping
word2idx = {word: idx for idx, (word, _) in enumerate(vocab.items(), 1)}
word2idx['<PAD>'] = 0  # Add padding token
vocab_size = len(word2idx)

# Convert sentences to sequences of indices
def encode_sentence(sentence, word2idx, max_len=10):
    tokens = tokenize_sentence(sentence)
    encoded = [word2idx.get(token, 0) for token in tokens]  # Convert words to indices
    # Padding to max length
    encoded = encoded[:max_len] + [0] * (max_len - len(encoded))
    return encoded

# Encode data
X = np.array([encode_sentence(sentence, word2idx) for sentence, _ in data])
y = np.array([label for _, label in data])

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
class SentenceDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.LongTensor(X)
        self.y = torch.LongTensor(y)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create DataLoader for training and testing
train_dataset = SentenceDataset(X_train, y_train)
test_dataset = SentenceDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=2)


In [5]:
class SelfAttention(nn.Module):
    def __init__(self, embed_dim):
        super(SelfAttention, self).__init__()
        self.query = nn.Linear(embed_dim, embed_dim)
        self.key = nn.Linear(embed_dim, embed_dim)
        self.value = nn.Linear(embed_dim, embed_dim)
        self.scale = embed_dim ** 0.5

    def forward(self, x):
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)

        # Compute scaled dot-product attention
        attn_weights = torch.matmul(Q, K.transpose(-2, -1)) / self.scale
        attn_weights = F.softmax(attn_weights, dim=-1)

        # Compute the attention-weighted sum of the values
        output = torch.matmul(attn_weights, V)
        return output, attn_weights


In [6]:
class SentenceClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, num_classes):
        super(SentenceClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.attention = SelfAttention(embed_dim)
        self.fc = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        # Embed the input tokens
        embedded = self.embedding(x)

        # Apply self-attention
        attention_output, attn_weights = self.attention(embedded)

        # Sum the attention outputs across the sequence
        pooled_output = attention_output.mean(dim=1)

        # Pass the pooled output through a fully connected layer
        output = self.fc(pooled_output)
        return output, attn_weights

# Hyperparameters
embed_dim = 16
num_classes = 2

# Initialize the model
model = SentenceClassifier(vocab_size=len(word2idx), embed_dim=embed_dim, num_classes=num_classes)


In [7]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 40
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()

        # Forward pass
        output, _ = model(batch_x)
        loss = criterion(output, batch_y)
        total_loss += loss.item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}, Loss: {total_loss/len(train_loader)}')


Epoch 1, Loss: 0.8256252855062485
Epoch 2, Loss: 0.7898856401443481
Epoch 3, Loss: 0.7559326142072678
Epoch 4, Loss: 0.7259760200977325
Epoch 5, Loss: 0.7064915597438812
Epoch 6, Loss: 0.6770203560590744
Epoch 7, Loss: 0.6615673005580902
Epoch 8, Loss: 0.6490078121423721
Epoch 9, Loss: 0.6392672806978226
Epoch 10, Loss: 0.6230557560920715


In [8]:
# Evaluate the model
model.eval()
all_preds = []

with torch.no_grad():
    for batch_x, batch_y in test_loader:
        output, _ = model(batch_x)
        _, predicted = torch.max(output, dim=1)
        all_preds.extend(predicted.cpu().numpy())

# Calculate accuracy
accuracy = accuracy_score(y_test, all_preds)
print(f'Accuracy on the test set: {accuracy * 100:.2f}%')


Accuracy on the test set: 0.00%


In [9]:
# Visualizing Attention for a Single Example
def visualize_attention(sentence):
    model.eval()
    encoded_sentence = torch.LongTensor(encode_sentence(sentence, word2idx)).unsqueeze(0)

    with torch.no_grad():
        output, attn_weights = model(encoded_sentence)

    attn_weights = attn_weights.squeeze().cpu().numpy()

    print(f'Attention Weights for Sentence: "{sentence}"')
    print(attn_weights)

# Example
visualize_attention("I love this product")


Attention Weights for Sentence: "I love this product"
[[0.02559064 0.04033669 0.06305679 0.06938002 0.13360597 0.13360597
  0.13360597 0.13360597 0.13360597 0.13360597]
 [0.06341338 0.09224927 0.05729984 0.11789563 0.11152366 0.11152366
  0.11152366 0.11152366 0.11152366 0.11152366]
 [0.10491624 0.06680733 0.0967507  0.06556741 0.11099305 0.11099305
  0.11099305 0.11099305 0.11099305 0.11099305]
 [0.10332625 0.12965928 0.05728712 0.13165252 0.0963458  0.0963458
  0.0963458  0.0963458  0.0963458  0.0963458 ]
 [0.03381648 0.07661147 0.09586184 0.09351337 0.11669948 0.11669948
  0.11669948 0.11669948 0.11669948 0.11669948]
 [0.03381648 0.07661147 0.09586184 0.09351337 0.11669948 0.11669948
  0.11669948 0.11669948 0.11669948 0.11669948]
 [0.03381648 0.07661147 0.09586184 0.09351337 0.11669948 0.11669948
  0.11669948 0.11669948 0.11669948 0.11669948]
 [0.03381648 0.07661147 0.09586184 0.09351337 0.11669948 0.11669948
  0.11669948 0.11669948 0.11669948 0.11669948]
 [0.03381648 0.07661147 0.0

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
from collections import Counter

# Dataset preparation
data = [
    ("I love this product", 1),
    ("This is the worst thing ever", 0),
    ("I am so happy with this purchase", 1),
    ("This is not what I expected", 0),
    ("Absolutely fantastic!", 1),
    ("Terrible experience", 0),
    ("I will never buy this again", 0),
    ("Best decision ever", 1),
    ("I regret buying this", 0),
    ("Such a great quality", 1)
]

# Tokenization and Vocabulary
def tokenize_sentence(sentence):
    return sentence.lower().split()

vocab = Counter()
for sentence, _ in data:
    tokens = tokenize_sentence(sentence)
    vocab.update(tokens)

word2idx = {word: idx for idx, (word, _) in enumerate(vocab.items(), 1)}
word2idx['<PAD>'] = 0  # Padding token

vocab_size = len(word2idx)

# Encode sentences to indices
def encode_sentence(sentence, word2idx, max_len=10):
    tokens = tokenize_sentence(sentence)
    encoded = [word2idx.get(token, 0) for token in tokens]
    return encoded[:max_len] + [0] * (max_len - len(encoded))

# Encode data
X = np.array([encode_sentence(sentence, word2idx) for sentence, _ in data])
y = np.array([label for _, label in data])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Dataset and DataLoader
class SentenceDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.LongTensor(X)
        self.y = torch.LongTensor(y)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = SentenceDataset(X_train, y_train)
test_dataset = SentenceDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=2)

# Self-Attention Layer
class SelfAttention(nn.Module):
    def __init__(self, embed_dim):
        super(SelfAttention, self).__init__()
        self.query = nn.Linear(embed_dim, embed_dim)
        self.key = nn.Linear(embed_dim, embed_dim)
        self.value = nn.Linear(embed_dim, embed_dim)
        self.scale = embed_dim ** 0.5

    def forward(self, x):
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)

        attn_weights = torch.matmul(Q, K.transpose(-2, -1)) / self.scale
        attn_weights = F.softmax(attn_weights, dim=-1)

        output = torch.matmul(attn_weights, V)
        return output, attn_weights

# Model
class SentenceClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, num_classes):
        super(SentenceClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.attention = SelfAttention(embed_dim)
        self.fc = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        embedded = self.embedding(x)
        attention_output, _ = self.attention(embedded)
        pooled_output = attention_output.mean(dim=1)
        output = self.fc(pooled_output)
        return output

# Hyperparameters
embed_dim = 100
num_classes = 2
model = SentenceClassifier(vocab_size=len(word2idx), embed_dim=embed_dim, num_classes=num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 40  # Increase number of epochs for more robust training
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        output = model(batch_x)
        loss = criterion(output, batch_y)
        total_loss += loss.item()
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}')

# Evaluation
model.eval()
all_preds = []

with torch.no_grad():
    for batch_x, batch_y in test_loader:
        output = model(batch_x)
        _, predicted = torch.max(output, dim=1)
        all_preds.extend(predicted.cpu().numpy())

accuracy = accuracy_score(y_test, all_preds)
print(f'Test Accuracy: {accuracy * 100:.2f}%')


Epoch 1, Loss: 0.8533
Epoch 2, Loss: 0.6413
Epoch 3, Loss: 0.5605
Epoch 4, Loss: 0.4628
Epoch 5, Loss: 0.3663
Epoch 6, Loss: 0.2807
Epoch 7, Loss: 0.1947
Epoch 8, Loss: 0.1206
Epoch 9, Loss: 0.0803
Epoch 10, Loss: 0.0477
Epoch 11, Loss: 0.0268
Epoch 12, Loss: 0.0186
Epoch 13, Loss: 0.0127
Epoch 14, Loss: 0.0090
Epoch 15, Loss: 0.0072
Epoch 16, Loss: 0.0059
Epoch 17, Loss: 0.0048
Epoch 18, Loss: 0.0043
Epoch 19, Loss: 0.0037
Epoch 20, Loss: 0.0033
Epoch 21, Loss: 0.0030
Epoch 22, Loss: 0.0027
Epoch 23, Loss: 0.0025
Epoch 24, Loss: 0.0023
Epoch 25, Loss: 0.0022
Epoch 26, Loss: 0.0020
Epoch 27, Loss: 0.0019
Epoch 28, Loss: 0.0018
Epoch 29, Loss: 0.0017
Epoch 30, Loss: 0.0016
Epoch 31, Loss: 0.0015
Epoch 32, Loss: 0.0015
Epoch 33, Loss: 0.0014
Epoch 34, Loss: 0.0013
Epoch 35, Loss: 0.0013
Epoch 36, Loss: 0.0012
Epoch 37, Loss: 0.0012
Epoch 38, Loss: 0.0011
Epoch 39, Loss: 0.0011
Epoch 40, Loss: 0.0010
Test Accuracy: 0.00%
