In [3]:
!pip install datasets



## Part 1

In [4]:
# Question 1(a): What is the size of the vocabulary formed from your training data?

from datasets import load_dataset
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt')

# Load the dataset
dataset = load_dataset("rotten_tomatoes")
train_dataset = dataset['train']

# Initialize an empty set to store unique words
vocabulary = set()

# Tokenize each review in the training dataset and update the vocabulary set
for text in train_dataset['text']:
    tokens = word_tokenize(text.lower())
    vocabulary.update(tokens)

# Print the size of the vocabulary
print("The size of the vocabulary is:", len(vocabulary))

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/7.46k [00:00<?, ?B/s]

train.parquet:   0%|          | 0.00/699k [00:00<?, ?B/s]

validation.parquet:   0%|          | 0.00/90.0k [00:00<?, ?B/s]

test.parquet:   0%|          | 0.00/92.2k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8530 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1066 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1066 [00:00<?, ? examples/s]

The size of the vocabulary is: 18029


In [10]:
# Question 1(b): How many OOV words exist in your training data?

import numpy as np

# Load the GloVe embeddings (make sure to download 'glove.6B.100d.txt' and place it in the working directory)
glove_vocab = set()
with open('glove.6B.100d.txt', 'r', encoding='utf8') as f:
    for line in f:
        word = line.split()[0]
        glove_vocab.add(word)

# Identify OOV words
oov_words = vocabulary - glove_vocab

# Print the number of OOV words
print("Number of OOV words in the training data:", len(oov_words))

Number of OOV words in the training data: 1865


In [11]:
# Question 1(c): Mitigating OOV Words by Initializing Random Embeddings

embedding_dim = 100  # Dimensionality of GloVe embeddings
import numpy as np

# Create mappings between words and indices
word2idx = {}
idx2word = {}
for idx, word in enumerate(vocabulary):
    word2idx[word] = idx
    idx2word[idx] = word

# Initialize the embedding matrix with random values
embedding_matrix = np.random.uniform(-0.05, 0.05, (len(vocabulary), embedding_dim))

# Load GloVe embeddings into the embedding matrix where possible
with open('glove.6B.100d.txt', 'r', encoding='utf8') as f:
    for line in f:
        values = line.split()
        glove_word = values[0]
        if glove_word in word2idx:
            vector = np.asarray(values[1:], dtype='float32')
            idx = word2idx[glove_word]
            embedding_matrix[idx] = vector

# Now, embedding_matrix contains GloVe embeddings for known words and random values for OOV words
print("Embedding matrix shape:", embedding_matrix.shape)

Embedding matrix shape: (18029, 100)


## Part 2

In [12]:
# Import necessary libraries
from datasets import load_dataset
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt')
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import torch.nn as nn

# Load the dataset
dataset = load_dataset("rotten_tomatoes")
train_raw = dataset['train']
validation_raw = dataset['validation']
test_raw = dataset['test']

# Build the vocabulary from training data
vocabulary = set()
for text in train_raw['text']:
    tokens = word_tokenize(text.lower())
    vocabulary.update(tokens)

# Add special tokens
vocabulary.add('<unk>')
vocabulary.add('<pad>')

# Create mappings between words and indices
word2idx = {word: idx for idx, word in enumerate(vocabulary)}
idx2word = {idx: word for word, idx in word2idx.items()}

# Initialize the embedding matrix
embedding_dim = 100  # Dimensionality of GloVe embeddings
vocab_size = len(word2idx)
embedding_matrix = np.random.uniform(-0.05, 0.05, (vocab_size, embedding_dim))

# Load GloVe embeddings (ensure 'glove.6B.100d.txt' is in your working directory)
glove_path = 'glove.6B.100d.txt'  # Update the path if necessary
with open(glove_path, 'r', encoding='utf8') as f:
    for line in f:
        values = line.strip().split()
        if len(values) == embedding_dim + 1:
            word = values[0]
            vector = np.asarray(values[1:], dtype='float32')
            if word in word2idx:
                idx = word2idx[word]
                embedding_matrix[idx] = vector

# Set the embedding for '<pad>' token to zeros
pad_idx = word2idx['<pad>']
embedding_matrix[pad_idx] = np.zeros(embedding_dim)

# Define a function to convert sentences to indices
def sentence_to_indices(sentence, word2idx):
    tokens = word_tokenize(sentence.lower())
    indices = []
    for token in tokens:
        if token in word2idx:
            indices.append(word2idx[token])
        else:
            indices.append(word2idx['<unk>'])  # Map unknown words to '<unk>'
    return indices

# Define the custom Dataset class
class SentimentDataset(Dataset):
    def __init__(self, texts, labels, word2idx):
        self.texts = texts
        self.labels = labels
        self.word2idx = word2idx

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        indices = sentence_to_indices(text, self.word2idx)
        return torch.tensor(indices, dtype=torch.long), torch.tensor(label, dtype=torch.long)

# Create datasets for training, validation, and testing
train_dataset = SentimentDataset(train_raw['text'], train_raw['label'], word2idx)
val_dataset = SentimentDataset(validation_raw['text'], validation_raw['label'], word2idx)
test_dataset = SentimentDataset(test_raw['text'], test_raw['label'], word2idx)

# Define the collate_fn function for padding within batches
def collate_fn(batch):
    sequences = [item[0] for item in batch]
    labels = torch.tensor([item[1] for item in batch], dtype=torch.float)
    sequences_padded = pad_sequence(sequences, batch_first=True, padding_value=word2idx['<pad>'])
    return sequences_padded, labels

# Create DataLoaders for training, validation, and testing
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Max Pooling

In [13]:
# Define the RNN model using pre-trained embeddings
class SentimentRNNWithMaxPool(nn.Module):
    def __init__(self, embedding_matrix):
        super(SentimentRNNWithMaxPool, self).__init__()
        vocab_size, embedding_dim = embedding_matrix.shape
        self.embedding = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float32), freeze=True)
        self.lstm = nn.LSTM(embedding_dim, hidden_size=256, batch_first=True, bidirectional=True, num_layers=2, dropout=0.5)
        self.fc1 = nn.Linear(512, 128)
        self.fc2 = nn.Linear(128, 1)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()

    def forward(self, x):
        embeds = self.embedding(x)
        lstm_out, (h_n, c_n) = self.lstm(embeds)

        # Concatenate the final hidden states from both directions
        # out = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim=1)

        # Max-Pooling
        out, _ = torch.max(lstm_out, dim=1)

        out = self.dropout(out)
        out = self.relu(self.fc1(out))
        out = self.dropout(out)
        out = torch.sigmoid(self.fc2(out))
        return out.squeeze()


# Instantiate the model
model = SentimentRNNWithMaxPool(embedding_matrix)

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2, factor=0.5, verbose=True)

# Training loop with validation and early stopping
num_epochs = 30
patience = 5  # Early stopping patience
best_val_accuracy = 0
epochs_no_improve = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for sequences, labels in train_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(sequences)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)  # Gradient clipping
        optimizer.step()
        running_loss += loss.item() * sequences.size(0)
    epoch_loss = running_loss / len(train_dataset)

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for sequences, labels in val_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)
            outputs = model(sequences)
            predicted = (outputs >= 0.5).long()
            correct += (predicted == labels.long()).sum().item()
            total += labels.size(0)
    val_accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    # Learning rate scheduling
    scheduler.step(val_accuracy)

    # Check for improvement
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        epochs_no_improve = 0
        # Save the best model
        torch.save(model.state_dict(), 'best_model_max_pooling.pt')
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print('Early stopping!')
            break

Epoch [1/30], Loss: 0.6267, Validation Accuracy: 0.6698
Epoch [2/30], Loss: 0.5237, Validation Accuracy: 0.7514
Epoch [3/30], Loss: 0.4834, Validation Accuracy: 0.7617
Epoch [4/30], Loss: 0.4584, Validation Accuracy: 0.7683
Epoch [5/30], Loss: 0.4383, Validation Accuracy: 0.7664
Epoch [6/30], Loss: 0.4143, Validation Accuracy: 0.7645
Epoch [7/30], Loss: 0.3880, Validation Accuracy: 0.7683
Epoch [8/30], Loss: 0.3369, Validation Accuracy: 0.7692
Epoch [9/30], Loss: 0.3103, Validation Accuracy: 0.7749
Epoch [10/30], Loss: 0.2886, Validation Accuracy: 0.7645
Epoch [11/30], Loss: 0.2616, Validation Accuracy: 0.7730
Epoch [12/30], Loss: 0.2403, Validation Accuracy: 0.7786
Epoch [13/30], Loss: 0.2163, Validation Accuracy: 0.7674
Epoch [14/30], Loss: 0.1843, Validation Accuracy: 0.7617
Epoch [15/30], Loss: 0.1675, Validation Accuracy: 0.7580
Epoch [16/30], Loss: 0.1307, Validation Accuracy: 0.7598
Epoch [17/30], Loss: 0.1061, Validation Accuracy: 0.7561
Early stopping!


In [50]:
# Load the best model and evaluate on the test set
model = SentimentRNNWithMaxPool(embedding_matrix)
model.load_state_dict(torch.load('best_model_max_pooling.pt'))
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for sequences, labels in test_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        outputs = model(sequences)
        predicted = (outputs >= 0.5).long()
        correct += (predicted == labels.long()).sum().item()
        total += labels.size(0)
test_accuracy = correct / total
print(f'Accuracy Score on Test dataset: {test_accuracy:.4f}')

  model.load_state_dict(torch.load('best_model_max_pooling.pt'))


Accuracy Score on Test dataset: 0.7730


Average Pooling

In [15]:
# Define the RNN model using pre-trained embeddings
class SentimentRNNWithAvgPool(nn.Module):
    def __init__(self, embedding_matrix):
        super(SentimentRNNWithAvgPool, self).__init__()
        vocab_size, embedding_dim = embedding_matrix.shape
        self.embedding = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float32), freeze=True)
        self.lstm = nn.LSTM(embedding_dim, hidden_size=256, batch_first=True, bidirectional=True, num_layers=2, dropout=0.5)
        self.fc1 = nn.Linear(512, 128)
        self.fc2 = nn.Linear(128, 1)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()

    def forward(self, x):
        embeds = self.embedding(x)
        lstm_out, (h_n, c_n) = self.lstm(embeds)

        # Concatenate the final hidden states from both directions
        # out = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim=1)

        # Average-Pooling
        out = torch.mean(lstm_out, dim=1)

        out = self.dropout(out)
        out = self.relu(self.fc1(out))
        out = self.dropout(out)
        out = torch.sigmoid(self.fc2(out))
        return out.squeeze()


# Instantiate the model
model = SentimentRNNWithAvgPool(embedding_matrix)

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2, factor=0.5, verbose=True)

# Training loop with validation and early stopping
num_epochs = 30
patience = 5  # Early stopping patience
best_val_accuracy = 0
epochs_no_improve = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for sequences, labels in train_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(sequences)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)  # Gradient clipping
        optimizer.step()
        running_loss += loss.item() * sequences.size(0)
    epoch_loss = running_loss / len(train_dataset)

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for sequences, labels in val_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)
            outputs = model(sequences)
            predicted = (outputs >= 0.5).long()
            correct += (predicted == labels.long()).sum().item()
            total += labels.size(0)
    val_accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    # Learning rate scheduling
    scheduler.step(val_accuracy)

    # Check for improvement
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        epochs_no_improve = 0
        # Save the best model
        torch.save(model.state_dict(), 'best_model_avg_pooling.pt')
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print('Early stopping!')
            break


Epoch [1/30], Loss: 0.6542, Validation Accuracy: 0.6717
Epoch [2/30], Loss: 0.5560, Validation Accuracy: 0.7420
Epoch [3/30], Loss: 0.5167, Validation Accuracy: 0.7439
Epoch [4/30], Loss: 0.4937, Validation Accuracy: 0.7617
Epoch [5/30], Loss: 0.4653, Validation Accuracy: 0.7514
Epoch [6/30], Loss: 0.4531, Validation Accuracy: 0.7692
Epoch [7/30], Loss: 0.4226, Validation Accuracy: 0.7617
Epoch [8/30], Loss: 0.4003, Validation Accuracy: 0.7777
Epoch [9/30], Loss: 0.3777, Validation Accuracy: 0.7617
Epoch [10/30], Loss: 0.3536, Validation Accuracy: 0.7749
Epoch [11/30], Loss: 0.3266, Validation Accuracy: 0.7880
Epoch [12/30], Loss: 0.2864, Validation Accuracy: 0.7533
Epoch [13/30], Loss: 0.2532, Validation Accuracy: 0.7542
Epoch [14/30], Loss: 0.2202, Validation Accuracy: 0.7767
Epoch [15/30], Loss: 0.1489, Validation Accuracy: 0.7758
Epoch [16/30], Loss: 0.1168, Validation Accuracy: 0.7795
Early stopping!


In [51]:
# Load the best model and evaluate on the test set
model = SentimentRNNWithAvgPool(embedding_matrix)
model.load_state_dict(torch.load('best_model_avg_pooling.pt'))
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for sequences, labels in test_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        outputs = model(sequences)
        predicted = (outputs >= 0.5).long()
        correct += (predicted == labels.long()).sum().item()
        total += labels.size(0)
test_accuracy = correct / total
print(f'Accuracy Score on Test dataset: {test_accuracy:.4f}')

  model.load_state_dict(torch.load('best_model_avg_pooling.pt'))


Accuracy Score on Test dataset: 0.7927


Simple Attention Mechanism

In [25]:
class SimpleAttention(nn.Module):
  def __init__(self, hidden_size):
      super(SimpleAttention, self).__init__()
      self.weight = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
      self.bias = nn.Parameter(torch.Tensor(hidden_size))

  def forward(self, hidden_states):
      # hidden_states: [batch_size, seq_len, hidden_size]
      batch_size, seq_len, hidden_size = hidden_states.size()

      # Calculate attention weights
      weights = torch.bmm(hidden_states, self.weight.unsqueeze(0).repeat(batch_size, 1, 1))
      weights = torch.tanh(weights + self.bias.unsqueeze(0).unsqueeze(1).repeat(batch_size, seq_len, 1))
      weights = torch.softmax(weights, dim=1)  # Normalize weights

      # Weight the hidden states
      weighted_hidden_states = hidden_states * weights

      # Aggregate the weighted hidden states
      context_vector = torch.sum(weighted_hidden_states, dim=1)  # Sum across time steps

      return context_vector


# Define the RNN model using pre-trained embeddings
class SentimentRNNWithSimpleAttention(nn.Module):
    def __init__(self, embedding_matrix):
        super(SentimentRNNWithSimpleAttention, self).__init__()
        vocab_size, embedding_dim = embedding_matrix.shape
        self.embedding = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float32), freeze=True)
        self.lstm = nn.LSTM(embedding_dim, hidden_size=256, batch_first=True, bidirectional=True, num_layers=2, dropout=0.5)
        self.fc1 = nn.Linear(512, 128)
        self.fc2 = nn.Linear(128, 1)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()
        self.attention = SimpleAttention(hidden_size=512)

    def forward(self, x):
        embeds = self.embedding(x)
        lstm_out, (h_n, c_n) = self.lstm(embeds)

        # Apply Attention Mechanism
        context_vector = self.attention(lstm_out)

        context_vector = self.dropout(context_vector)
        context_vector = self.relu(self.fc1(context_vector))
        context_vector = self.dropout(context_vector)
        context_vector = torch.sigmoid(self.fc2(context_vector))
        return context_vector.squeeze()


# Instantiate the model
model = SentimentRNNWithSimpleAttention(embedding_matrix)

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2, factor=0.5, verbose=True)

# Training loop with validation and early stopping
num_epochs = 30
patience = 5  # Early stopping patience
best_val_accuracy = 0
epochs_no_improve = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for sequences, labels in train_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(sequences)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)  # Gradient clipping
        optimizer.step()
        running_loss += loss.item() * sequences.size(0)
    epoch_loss = running_loss / len(train_dataset)

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for sequences, labels in val_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)
            outputs = model(sequences)
            predicted = (outputs >= 0.5).long()
            correct += (predicted == labels.long()).sum().item()
            total += labels.size(0)
    val_accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    # Learning rate scheduling
    scheduler.step(val_accuracy)

    # Check for improvement
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        epochs_no_improve = 0
        # Save the best model
        torch.save(model.state_dict(), 'best_model_simple_attention.pt')
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print('Early stopping!')
            break

Epoch [1/30], Loss: 0.6519, Validation Accuracy: 0.6332
Epoch [2/30], Loss: 0.5405, Validation Accuracy: 0.7495
Epoch [3/30], Loss: 0.4987, Validation Accuracy: 0.7514
Epoch [4/30], Loss: 0.4682, Validation Accuracy: 0.7627
Epoch [5/30], Loss: 0.4439, Validation Accuracy: 0.7627
Epoch [6/30], Loss: 0.4263, Validation Accuracy: 0.7514
Epoch [7/30], Loss: 0.4061, Validation Accuracy: 0.7580
Epoch [8/30], Loss: 0.3572, Validation Accuracy: 0.7852
Epoch [9/30], Loss: 0.3296, Validation Accuracy: 0.7598
Epoch [10/30], Loss: 0.2967, Validation Accuracy: 0.7692
Epoch [11/30], Loss: 0.2785, Validation Accuracy: 0.7533
Epoch [12/30], Loss: 0.2279, Validation Accuracy: 0.7711
Epoch [13/30], Loss: 0.2074, Validation Accuracy: 0.7655
Early stopping!


In [26]:
# Load the best model and evaluate on the test set
model = SentimentRNNWithSimpleAttention(embedding_matrix)
model.load_state_dict(torch.load('best_model_simple_attention.pt'))
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for sequences, labels in test_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        outputs = model(sequences)
        predicted = (outputs >= 0.5).long()
        correct += (predicted == labels.long()).sum().item()
        total += labels.size(0)
test_accuracy = correct / total
print(f'Accuracy Score on Test dataset: {test_accuracy:.4f}')

  model.load_state_dict(torch.load('best_model_simple_attention.pt'))


Accuracy Score on Test dataset: 0.7899


Self-Attention Mechanism

In [13]:
class SelfAttention(nn.Module):
  def __init__(self, hidden_size):
      super(SelfAttention, self).__init__()
      self.query = nn.Linear(hidden_size, hidden_size)  # Linear layer for query
      self.key = nn.Linear(hidden_size, hidden_size)    # Linear layer for key
      self.value = nn.Linear(hidden_size, hidden_size)  # Linear layer for value

  def forward(self, hidden_states):
      # hidden_states: [batch_size, seq_len, hidden_size]
      batch_size, seq_len, hidden_size = hidden_states.size()

      # Calculate vectors
      Q = self.query(hidden_states)
      K = self.key(hidden_states)
      V = self.value(hidden_states)

      # Calculate attention weights
      attention_weights = torch.bmm(Q, K.transpose(1, 2)) / (hidden_size ** 0.5)
      attention_weights = torch.softmax(attention_weights, dim=-1)

      # Weight the value vectors by attention weights
      weighted_values = torch.bmm(attention_weights, V)

      return weighted_values

class SimpleAttention(nn.Module):
  def __init__(self, hidden_size):
      super(SimpleAttention, self).__init__()
      self.weight = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
      self.bias = nn.Parameter(torch.Tensor(hidden_size))

  def forward(self, hidden_states):
      # hidden_states: [batch_size, seq_len, hidden_size]
      batch_size, seq_len, hidden_size = hidden_states.size()

      # Calculate attention weights
      weights = torch.bmm(hidden_states, self.weight.unsqueeze(0).repeat(batch_size, 1, 1))
      weights = torch.tanh(weights + self.bias.unsqueeze(0).unsqueeze(1).repeat(batch_size, seq_len, 1))
      weights = torch.softmax(weights, dim=1)  # Normalize weights

      # Weight the hidden states
      weighted_hidden_states = hidden_states * weights

      # Aggregate the weighted hidden states
      context_vector = torch.sum(weighted_hidden_states, dim=1)  # Sum across time steps

      return context_vector


# Define the RNN model using pre-trained embeddings
class SentimentRNNWithSelfAttention(nn.Module):
    def __init__(self, embedding_matrix):
        super(SentimentRNNWithSelfAttention, self).__init__()
        vocab_size, embedding_dim = embedding_matrix.shape
        self.embedding = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float32), freeze=True)
        self.lstm = nn.LSTM(embedding_dim, hidden_size=256, batch_first=True, bidirectional=True, num_layers=2, dropout=0.5)
        self.fc1 = nn.Linear(512, 128)
        self.fc2 = nn.Linear(128, 1)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()
        self.self_attention = SelfAttention(hidden_size=512)
        self.simple_attention = SimpleAttention(hidden_size=512)

    def forward(self, x):
        embeds = self.embedding(x)
        lstm_out, (h_n, c_n) = self.lstm(embeds)

        # Apply self-attention
        self_attention_out = self.self_attention(lstm_out)

        # Apply Simple attention
        context_vector = self.simple_attention(self_attention_out)

        context_vector = self.dropout(context_vector)
        context_vector = self.relu(self.fc1(context_vector))
        context_vector = self.dropout(context_vector)
        context_vector = torch.sigmoid(self.fc2(context_vector))
        return context_vector.squeeze()


# Instantiate the model
model = SentimentRNNWithSelfAttention(embedding_matrix)

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2, factor=0.5, verbose=True)

# Training loop with validation and early stopping
num_epochs = 30
patience = 5  # Early stopping patience
best_val_accuracy = 0
epochs_no_improve = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for sequences, labels in train_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(sequences)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)  # Gradient clipping
        optimizer.step()
        running_loss += loss.item() * sequences.size(0)
    epoch_loss = running_loss / len(train_dataset)

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for sequences, labels in val_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)
            outputs = model(sequences)
            predicted = (outputs >= 0.5).long()
            correct += (predicted == labels.long()).sum().item()
            total += labels.size(0)
    val_accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    # Learning rate scheduling
    scheduler.step(val_accuracy)

    # Check for improvement
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        epochs_no_improve = 0
        # Save the best model
        torch.save(model.state_dict(), 'best_model_self_attention.pt')
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print('Early stopping!')
            break



Epoch [1/30], Loss: 0.6259, Validation Accuracy: 0.7223
Epoch [2/30], Loss: 0.5348, Validation Accuracy: 0.7373
Epoch [3/30], Loss: 0.4940, Validation Accuracy: 0.7533
Epoch [4/30], Loss: 0.4661, Validation Accuracy: 0.7570
Epoch [5/30], Loss: 0.4539, Validation Accuracy: 0.7767
Epoch [6/30], Loss: 0.4307, Validation Accuracy: 0.7486
Epoch [7/30], Loss: 0.4088, Validation Accuracy: 0.7655
Epoch [8/30], Loss: 0.3816, Validation Accuracy: 0.7711
Epoch [9/30], Loss: 0.3324, Validation Accuracy: 0.7720
Epoch [10/30], Loss: 0.3011, Validation Accuracy: 0.7767
Early stopping!


In [15]:
# Load the best model and evaluate on the test set
model = SentimentRNNWithSelfAttention(embedding_matrix)
model.load_state_dict(torch.load('best_model_self_attention.pt'))
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for sequences, labels in test_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        outputs = model(sequences)
        predicted = (outputs >= 0.5).long()
        correct += (predicted == labels.long()).sum().item()
        total += labels.size(0)
test_accuracy = correct / total
print(f'Accuracy Score on Test dataset: {test_accuracy:.4f}')

  model.load_state_dict(torch.load('best_model_self_attention.pt'))


Accuracy Score on Test dataset: 0.7645


In [None]:
#3.1

In [None]:
class SentimentRNN_UpdateEmbeddings(nn.Module):
    def __init__(self, embedding_matrix):
        super(SentimentRNN_UpdateEmbeddings, self).__init__()
        vocab_size, embedding_dim = embedding_matrix.shape
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.embedding.weight.data.copy_(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = True  # Update the embeddings during training
        self.lstm = nn.LSTM(embedding_dim, hidden_size=256, batch_first=True, bidirectional=True, num_layers=2, dropout=0.5)
        self.fc1 = nn.Linear(512, 128)
        self.fc2 = nn.Linear(128, 1)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()

    def forward(self, x):
        embeds = self.embedding(x)
        lstm_out, (h_n, c_n) = self.lstm(embeds)

        # Concatenate the final hidden states from both directions
        out = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim=1)

        out = self.dropout(out)
        out = self.relu(self.fc1(out))
        out = self.dropout(out)
        out = torch.sigmoid(self.fc2(out))
        return out.squeeze()

In [None]:
# Instantiate the model
model = SentimentRNN_UpdateEmbeddings(embedding_matrix)

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2, factor=0.5, verbose=True)

# Training loop with validation and early stopping
num_epochs = 30
patience = 5  # Early stopping patience
best_val_accuracy = 0
epochs_no_improve = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for sequences, labels in train_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(sequences)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)  # Gradient clipping
        optimizer.step()
        running_loss += loss.item() * sequences.size(0)
    epoch_loss = running_loss / len(train_dataset)

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for sequences, labels in val_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)
            outputs = model(sequences)
            predicted = (outputs >= 0.5).long()
            correct += (predicted == labels.long()).sum().item()
            total += labels.size(0)
    val_accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    # Learning rate scheduling
    scheduler.step(val_accuracy)

    # Check for improvement
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        epochs_no_improve = 0
        # Save the best model
        torch.save(model.state_dict(), 'best_model_update_embeddings.pt')
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print('Early stopping!')
            break

# Load the best model and evaluate on the test set
model.load_state_dict(torch.load('best_model_update_embeddings.pt'))
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for sequences, labels in test_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        outputs = model(sequences)
        predicted = (outputs >= 0.5).long()
        correct += (predicted == labels.long()).sum().item()
        total += labels.size(0)
test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy:.4f}')

KeyboardInterrupt: 

In [None]:
#3.2

In [None]:
# Load the GloVe embeddings
glove_vocab = set()
with open('glove.6B.100d.txt', 'r', encoding='utf8') as f:
    for line in f:
        word = line.split()[0]
        glove_vocab.add(word)

# Initialize the embedding matrix with random values
embedding_dim = 100
vocab_size = len(vocabulary)
embedding_matrix = np.random.uniform(-0.05, 0.05, (vocab_size, embedding_dim))

# Create a dictionary to store the GloVe embeddings
glove_embeddings = {}
glove_file = 'glove.6B.100d.txt'
with open(glove_file, 'r', encoding='utf8') as f:
    for line in f:
        values = line.split()
        glove_word = values[0]
        vector = np.asarray(values[1:], dtype='float32')
        # Normalize the loaded vectors to have unit norm
        vector /= np.linalg.norm(vector)
        glove_embeddings[glove_word] = vector

# Load GloVe embeddings into the embedding matrix where possible
for idx, word in enumerate(vocabulary):
    if word in glove_embeddings:
        embedding_matrix[idx] = glove_embeddings[word]

In [None]:
class SentimentRNN_OOV(nn.Module):
    def __init__(self, embedding_matrix):
        super(SentimentRNN_OOV, self).__init__()
        vocab_size, embedding_dim = embedding_matrix.shape
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.embedding.weight.data.copy_(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = True
        self.lstm = nn.LSTM(embedding_dim, hidden_size=256, batch_first=True, bidirectional=True, num_layers=2, dropout=0.5)
        self.fc1 = nn.Linear(512, 128)
        self.fc2 = nn.Linear(128, 1)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()

    def forward(self, x):
        embeds = self.embedding(x)
        lstm_out, (h_n, c_n) = self.lstm(embeds)

        # Concatenate the final hidden states from both directions
        out = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim=1)

        out = self.dropout(out)
        out = self.relu(self.fc1(out))
        out = self.dropout(out)
        out = torch.sigmoid(self.fc2(out))
        return out.squeeze()

In [None]:

# Instantiate the model
model = SentimentRNN_OOV(embedding_matrix)

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2, factor=0.5, verbose=True)

# Training loop with validation and early stopping
num_epochs = 30
patience = 5  # Early stopping patience
best_val_accuracy = 0
epochs_no_improve = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for sequences, labels in train_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(sequences)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)  # Gradient clipping
        optimizer.step()
        running_loss += loss.item() * sequences.size(0)
    epoch_loss = running_loss / len(train_dataset)

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for sequences, labels in val_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)
            outputs = model(sequences)
            predicted = (outputs >= 0.5).long()
            correct += (predicted == labels.long()).sum().item()
            total += labels.size(0)
    val_accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    # Learning rate scheduling
    scheduler.step(val_accuracy)

    # Check for improvement
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        epochs_no_improve = 0
        # Save the best model
        torch.save(model.state_dict(), 'best_model_oov.pt')
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print('Early stopping!')
            break


Epoch [1/30], Loss: 0.6444, Validation Accuracy: 0.7514
Epoch [2/30], Loss: 0.4196, Validation Accuracy: 0.7552
Epoch [3/30], Loss: 0.2322, Validation Accuracy: 0.7495
Epoch [4/30], Loss: 0.1236, Validation Accuracy: 0.7542
Epoch [5/30], Loss: 0.0663, Validation Accuracy: 0.7392
Epoch [6/30], Loss: 0.0291, Validation Accuracy: 0.7458
Epoch [7/30], Loss: 0.0119, Validation Accuracy: 0.7477
Early stopping!


In [None]:
# Load the best model and evaluate on the test set
model.load_state_dict(torch.load('best_model_oov.pt'))
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for sequences, labels in test_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        outputs = model(sequences)
        predicted = (outputs >= 0.5).long()
        correct += (predicted == labels.long()).sum().item()
        total += labels.size(0)
test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy:.4f}')

Test Accuracy: 0.7720


In [None]:
# biLSTM Model
class SentimentBiLSTM(nn.Module):
    def __init__(self, embedding_matrix):
        super(SentimentBiLSTM, self).__init__()
        vocab_size, embedding_dim = embedding_matrix.shape
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.embedding.weight.data.copy_(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = True
        self.lstm = nn.LSTM(embedding_dim, hidden_size=256, batch_first=True, bidirectional=True, num_layers=2, dropout=0.5)
        self.fc1 = nn.Linear(512, 128)
        self.fc2 = nn.Linear(128, 1)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()

    def forward(self, x):
        embeds = self.embedding(x)
        lstm_out, (h_n, c_n) = self.lstm(embeds)

        # Concatenate the final hidden states from both directions
        out = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim=1)

        out = self.dropout(out)
        out = self.relu(self.fc1(out))
        out = self.dropout(out)
        out = torch.sigmoid(self.fc2(out))
        return out.squeeze()

In [None]:
# biLSTM Model
class SentimentBiLSTM(nn.Module):
    def __init__(self, embedding_matrix):
        super(SentimentBiLSTM, self).__init__()
        vocab_size, embedding_dim = embedding_matrix.shape
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.embedding.weight.data.copy_(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = True
        self.lstm = nn.LSTM(embedding_dim, hidden_size=256, batch_first=True, bidirectional=True, num_layers=2, dropout=0.5)
        self.fc1 = nn.Linear(512, 128)
        self.fc2 = nn.Linear(128, 1)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()

    def forward(self, x):
        embeds = self.embedding(x)
        lstm_out, (h_n, c_n) = self.lstm(embeds)

        # Concatenate the final hidden states from both directions
        out = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim=1)

        out = self.dropout(out)
        out = self.relu(self.fc1(out))
        out = self.dropout(out)
        out = torch.sigmoid(self.fc2(out))
        return out.squeeze()


# biGRU Model
class SentimentBiGRU(nn.Module):
    def __init__(self, embedding_matrix):
        super(SentimentBiGRU, self).__init__()
        vocab_size, embedding_dim = embedding_matrix.shape
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.embedding.weight.data.copy_(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = True
        self.gru = nn.GRU(embedding_dim, hidden_size=256, batch_first=True, bidirectional=True, num_layers=2, dropout=0.5)
        self.fc1 = nn.Linear(512, 128)
        self.fc2 = nn.Linear(128, 1)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()

    def forward(self, x):
        embeds = self.embedding(x)
        gru_out, h_n = self.gru(embeds)

        # Concatenate the final hidden states from both directions
        out = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim=1)

        out = self.dropout(out)
        out = self.relu(self.fc1(out))
        out = self.dropout(out)
        out = torch.sigmoid(self.fc2(out))
        return out.squeeze()




In [None]:
# Instantiate the models
bilstm_model = SentimentBiLSTM(embedding_matrix)
bigru_model = SentimentBiGRU(embedding_matrix)

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
bilstm_model.to(device)
bigru_model.to(device)

# Define loss function and optimizer
criterion = nn.BCELoss()
bilstm_optimizer = torch.optim.Adam(bilstm_model.parameters(), lr=0.001, weight_decay=1e-5)
bigru_optimizer = torch.optim.Adam(bigru_model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(bilstm_optimizer, 'max', patience=2, factor=0.5, verbose=True)

In [None]:
# Training loop with validation and early stopping for biLSTM
num_epochs = 30
patience = 5  # Early stopping patience
best_val_accuracy = 0
epochs_no_improve = 0

for epoch in range(num_epochs):
    bilstm_model.train()
    running_loss = 0.0
    for sequences, labels in train_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        bilstm_optimizer.zero_grad()
        outputs = bilstm_model(sequences)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(bilstm_model.parameters(), max_norm=1)  # Gradient clipping
        bilstm_optimizer.step()
        running_loss += loss.item() * sequences.size(0)
    epoch_loss = running_loss / len(train_dataset)

    # Validation
    bilstm_model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for sequences, labels in val_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)
            outputs = bilstm_model(sequences)
            predicted = (outputs >= 0.5).long()
            correct += (predicted == labels.long()).sum().item()
            total += labels.size(0)
    val_accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    # Learning rate scheduling
    scheduler.step(val_accuracy)

    # Check for improvement
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        epochs_no_improve = 0
        # Save the best model
        torch.save(bilstm_model.state_dict(), 'best_bilstm_model.pt')
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print('Early stopping!')
            break

# Load the best model and evaluate on the test set for biLSTM
bilstm_model.load_state_dict(torch.load('best_bilstm_model.pt'))
bilstm_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for sequences, labels in test_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        outputs = bilstm_model(sequences)
        predicted = (outputs >= 0.5).long()
        correct += (predicted == labels.long()).sum().item()
        total += labels.size(0)
bilstm_test_accuracy = correct / total
print(f'biLSTM Test Accuracy: {bilstm_test_accuracy:.4f}')

Epoch [1/30], Loss: 0.6494, Validation Accuracy: 0.7345
Epoch [2/30], Loss: 0.4284, Validation Accuracy: 0.7523
Epoch [3/30], Loss: 0.2194, Validation Accuracy: 0.7411
Epoch [4/30], Loss: 0.1183, Validation Accuracy: 0.7570
Epoch [5/30], Loss: 0.0712, Validation Accuracy: 0.7683
Epoch [6/30], Loss: 0.0434, Validation Accuracy: 0.7439
Epoch [7/30], Loss: 0.0275, Validation Accuracy: 0.7580
Epoch [8/30], Loss: 0.0184, Validation Accuracy: 0.7561
Epoch [9/30], Loss: 0.0102, Validation Accuracy: 0.7561
Epoch [10/30], Loss: 0.0059, Validation Accuracy: 0.7552
Early stopping!
biLSTM Test Accuracy: 0.7795


In [None]:
# Training loop with validation and early stopping for biGRU
num_epochs = 30
patience = 5  # Early stopping patience
best_val_accuracy = 0
epochs_no_improve = 0

for epoch in range(num_epochs):
    bigru_model.train()
    running_loss = 0.0
    for sequences, labels in train_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        bigru_optimizer.zero_grad()
        outputs = bigru_model(sequences)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(bigru_model.parameters(), max_norm=1)  # Gradient clipping
        bigru_optimizer.step()
        running_loss += loss.item() * sequences.size(0)
    epoch_loss = running_loss / len(train_dataset)

    # Validation
    bigru_model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for sequences, labels in val_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)
            outputs = bigru_model(sequences)
            predicted = (outputs >= 0.5).long()
            correct += (predicted == labels.long()).sum().item()
            total += labels.size(0)
    val_accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    # Learning rate scheduling
    scheduler.step(val_accuracy)

    # Check for improvement
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        epochs_no_improve = 0
        # Save the best model
        torch.save(bigru_model.state_dict(), 'best_bigru_model.pt')
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print('Early stopping!')
            break

# Load the best model and evaluate on the test set for biGRU
bigru_model.load_state_dict(torch.load('best_bigru_model.pt'))
bigru_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for sequences, labels in test_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        outputs = bigru_model(sequences)
        predicted = (outputs >= 0.5).long()
        correct += (predicted == labels.long()).sum().item()
        total += labels.size(0)
bigru_test_accuracy = correct / total
print(f'biGRU Test Accuracy: {bigru_test_accuracy:.4f}')

Epoch [1/30], Loss: 0.6209, Validation Accuracy: 0.7176
Epoch [2/30], Loss: 0.3693, Validation Accuracy: 0.7523
Epoch [3/30], Loss: 0.1926, Validation Accuracy: 0.7580
Epoch [4/30], Loss: 0.0981, Validation Accuracy: 0.7486
Epoch [5/30], Loss: 0.0543, Validation Accuracy: 0.7439
Epoch [6/30], Loss: 0.0264, Validation Accuracy: 0.7289
Epoch [7/30], Loss: 0.0156, Validation Accuracy: 0.7355
Epoch [8/30], Loss: 0.0113, Validation Accuracy: 0.7383
Early stopping!
biGRU Test Accuracy: 0.7795


In [None]:
# CNN Model
class SentimentCNN(nn.Module):
    def __init__(self, embedding_matrix):
        super(SentimentCNN, self).__init__()
        vocab_size, embedding_dim = embedding_matrix.shape
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.embedding.weight.data.copy_(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = True
        self.conv1 = nn.Conv1d(embedding_dim, 128, kernel_size=3, padding=1)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(128, 64)
        self.fc2 = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        embeds = self.embedding(x).permute(0, 2, 1)
        conv_out = self.conv1(embeds)
        pool_out = torch.max(conv_out, dim=-1)[0]
        out = self.dropout(pool_out)
        out = self.relu(self.fc1(out))
        out = self.dropout(out)
        out = torch.sigmoid(self.fc2(out))
        return out.squeeze()

In [None]:
# Instantiate the model
cnn_model = SentimentCNN(embedding_matrix)

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cnn_model.to(device)

# Define loss function and optimizer
criterion = nn.BCELoss()
cnn_optimizer = torch.optim.Adam(cnn_model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(cnn_optimizer, 'max', patience=2, factor=0.5, verbose=True)

# Training loop with validation and early stopping
num_epochs = 30
patience = 5  # Early stopping patience
best_val_accuracy = 0
epochs_no_improve = 0

for epoch in range(num_epochs):
    cnn_model.train()
    running_loss = 0.0
    for sequences, labels in train_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        cnn_optimizer.zero_grad()
        outputs = cnn_model(sequences)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(cnn_model.parameters(), max_norm=1)  # Gradient clipping
        cnn_optimizer.step()
        running_loss += loss.item() * sequences.size(0)
    epoch_loss = running_loss / len(train_dataset)

    # Validation
    cnn_model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for sequences, labels in val_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)
            outputs = cnn_model(sequences)
            predicted = (outputs >= 0.5).long()
            correct += (predicted == labels.long()).sum().item()
            total += labels.size(0)
    val_accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    # Learning rate scheduling
    scheduler.step(val_accuracy)

    # Check for improvement
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        epochs_no_improve = 0
        # Save the best model
        torch.save(cnn_model.state_dict(), 'best_cnn_model.pt')
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print('Early stopping!')
            break

# Load the best model and evaluate on the test set
cnn_model.load_state_dict(torch.load('best_cnn_model.pt'))
cnn_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for sequences, labels in test_loader:
        sequences = sequences.to(device)
        labels = labels.to(device)
        outputs = cnn_model(sequences)
        predicted = (outputs >= 0.5).long()
        correct += (predicted == labels.long()).sum().item()
        total += labels.size(0)
cnn_test_accuracy = correct / total
print(f'CNN Test Accuracy: {cnn_test_accuracy:.4f}')

  return F.conv1d(input, weight, bias, self.stride,


Epoch [1/30], Loss: 0.6628, Validation Accuracy: 0.7270
Epoch [2/30], Loss: 0.4245, Validation Accuracy: 0.7645
Epoch [3/30], Loss: 0.1890, Validation Accuracy: 0.7711
Epoch [4/30], Loss: 0.0689, Validation Accuracy: 0.7608
Epoch [5/30], Loss: 0.0259, Validation Accuracy: 0.7467
Epoch [6/30], Loss: 0.0147, Validation Accuracy: 0.7448
Epoch [7/30], Loss: 0.0059, Validation Accuracy: 0.7477


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch [8/30], Loss: 0.0047, Validation Accuracy: 0.7523
Early stopping!
CNN Test Accuracy: 0.7805
