In [13]:
import numpy as np
import re
import nltk
from datasets import load_dataset
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize

import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F

from sklearn.metrics import accuracy_score
from sklearn.model_selection import ParameterGrid

import matplotlib.pyplot as plt


import pytorch_lightning as pl
from torchmetrics import Accuracy

In [3]:
nltk.download('all')

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /Users/jz/nltk_data...
[nltk_data]    |   Unzipping corpora/abc.zip.
[nltk_data]    | Downloading package alpino to /Users/jz/nltk_data...
[nltk_data]    |   Unzipping corpora/alpino.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /Users/jz/nltk_data...
[nltk_data]    |   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_eng to
[nltk_data]    |     /Users/jz/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /Users/jz/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_ru.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_rus to
[nltk_data]    |     /Users/jz/nltk_da

True

# Part 0. Dataset Preparation

In [4]:
# loading the dataset from the library
dataset = load_dataset("rotten_tomatoes")
train_dataset = dataset ['train']
validation_dataset = dataset ['validation']
test_dataset = dataset ['test']

Generating train split: 100%|██████████| 8530/8530 [00:00<00:00, 581661.43 examples/s]
Generating validation split: 100%|██████████| 1066/1066 [00:00<00:00, 313148.06 examples/s]
Generating test split: 100%|██████████| 1066/1066 [00:00<00:00, 408919.71 examples/s]


In [4]:
# check the sizes of each dataset
train_size = len(train_dataset)
validation_size = len(validation_dataset)
test_size = len(test_dataset)

print(f"Training dataset size: {train_size}")
print(f"Validation dataset size: {validation_size}")
print(f"Test dataset size: {test_size}")

Training dataset size: 8530
Validation dataset size: 1066
Test dataset size: 1066


In [5]:
# view an example from each dataset
print("Train Dataset")
print(train_dataset.features)
print(train_dataset[0]) 

print("Test Dataset")
print(test_dataset.features)
print(test_dataset[0]) 

print("Validation Dataset")
print(validation_dataset.features)
print(validation_dataset[0])

Train Dataset
{'text': Value(dtype='string', id=None), 'label': ClassLabel(names=['neg', 'pos'], id=None)}
{'text': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .', 'label': 1}
Test Dataset
{'text': Value(dtype='string', id=None), 'label': ClassLabel(names=['neg', 'pos'], id=None)}
{'text': 'lovingly photographed in the manner of a golden book sprung to life , stuart little 2 manages sweetness largely without stickiness .', 'label': 1}
Validation Dataset
{'text': Value(dtype='string', id=None), 'label': ClassLabel(names=['neg', 'pos'], id=None)}
{'text': 'compassionately explores the seemingly irreconcilable situation between conservative christian parents and their estranged gay and lesbian children .', 'label': 1}


# Part 1. Preparing Word Embeddings

### Preprocessing

In [5]:
def preprocessing(text):

    # remove any other special characters but keep the general ones for potential sentiment usage
    text = re.sub(r'[^a-zA-Z0-9\'\!\?\.]', ' ', text)
    
    # replace multiple spaces with one space only
    text = re.sub(r'\s+', ' ', text)

    # remove leading and trailing whitespace to avoid unnecessary inconsistency 
    text = text.strip()

    return text

# apply the preprocessing function to the 'text' column of each dataset
train_dataset = train_dataset.map(lambda x: {'text': preprocessing(x['text'])})
validation_dataset = validation_dataset.map(lambda x: {'text': preprocessing(x['text'])})
test_dataset = test_dataset.map(lambda x: {'text': preprocessing(x['text'])})

# an example of the processed text
print("Train Dataset Example:")
print(train_dataset[0])

Map: 100%|██████████| 8530/8530 [00:00<00:00, 53024.47 examples/s]
Map: 100%|██████████| 1066/1066 [00:00<00:00, 55890.50 examples/s]
Map: 100%|██████████| 1066/1066 [00:00<00:00, 54442.96 examples/s]

Train Dataset Example:
{'text': "the rock is destined to be the 21st century's new conan and that he's going to make a splash even greater than arnold schwarzenegger jean claud van damme or steven segal .", 'label': 1}





In [6]:
# tokenization
# empty list to store the resulting sentences
tokenized_sentences = []

for text in train_dataset['text']:
    # Tokenize the text and append the tokenized sentence to the list
    tokenized_sentences.append(word_tokenize(text))

### (a) Size of vocabulary in training data

In [7]:
# empty set for storing unique words
original_vocab = set()

for sentence in tokenized_sentences:
    for word in sentence:
        # add each word in the sentence to the words set
        original_vocab.add(word)

print(f"(a) The size of vocabulary formed in the training data is {len(original_vocab)}")

(a) The size of vocabulary formed in the training data is 16683


### (b) Number of OOV in the training data

In [8]:
# adjust the parameters for word2vec
vector_size = 100 # Dimensionality of the word vectors
window = 3 # Maximum distance between the current and predicted word within a sentence
min_count = 2 # Ignores all words with total frequency lower than this
workers = 4 # CPU cores
sg = 1 # 1 for skip-gram, 0 for CBOW
epochs = 5 

# train the word2vec model
word2vec_model = Word2Vec(
    sentences = tokenized_sentences, 
    vector_size = vector_size, 
    window = window, 
    min_count = min_count, 
    workers = workers,
    epochs = epochs)

# variable to store model's vocab list 
word2vec_vocab = set(word2vec_model.wv.key_to_index)

# Calculate OOV words by comparing the original vocab and Word2Vec vocab
oov_words = original_vocab - word2vec_vocab

print(f"(b) Number of OOV words in the training data is {len(oov_words)} when the minimum threshold for each word is {min_count}")

(b) Number of OOV words in the training data is 7866 when the minimum threshold for each word is 2


### (c) Mitigating OOV

In [9]:
# Replace all OOV words with <UNK>

# define the UNK and PAD token
UNK_TOKEN = '<UNK>'
PAD_TOKEN = '<PAD>'

# process each sentence in the tokenized_sentences list
for i, sentence in enumerate(tokenized_sentences):
    # empty list to store the current processed sentence
    processed_sentence = []
    for word in sentence:
        if word in word2vec_vocab:
            # if the current word is in the model's vocab, keep it as it is
            processed_sentence.append(word)  
        else:
            # otherwise, replace the word with UNK
            processed_sentence.append(UNK_TOKEN) 

    # update the sentence in the original tokenized_sentences list
    tokenized_sentences[i] = processed_sentence

### Embedding matrix

In [10]:
# empty set for storing unique words
final_vocab = set()

for sentence in tokenized_sentences:
    for word in sentence:
        # add each word in the sentence to the final_vocab set
        final_vocab.add(word)

# add 'UNK' and '<PAD>' to the vocabulary
final_vocab.add(UNK_TOKEN)
final_vocab.add(PAD_TOKEN)

# create the dictionary that maps each word in final_vocab to a unique index
word_to_index = {word: i for i, word in enumerate(final_vocab)}

embedding_dim = word2vec_model.vector_size 

# initialize embedding matrix with number of vocab and embedding dimension
embedding_matrix = np.zeros((len(word_to_index), embedding_dim))

# fill the embedding matrix with the corresponding word vectors
for word, i in word_to_index.items():
    if word in word2vec_model.wv:
        embedding_matrix[i] = word2vec_model.wv[word]
    elif word == PAD_TOKEN:
        # give padding token a zero vector to have no impact on the word semantics
        embedding_matrix[i] = np.zeros(embedding_dim)
    else:
        # use average vector for unknown words 
        embedding_matrix[i] = np.mean(word2vec_model.wv.vectors, axis=0)

print(f"Shape of embedding matrix: {embedding_matrix.shape}")

Shape of embedding matrix: (8819, 100)


In [11]:
# convert word to indices 
def words_to_indices(sentence, word_to_index):
    return [word_to_index.get(word, word_to_index[UNK_TOKEN]) for word in sentence.split()]

train_X = [words_to_indices(sentence, word_to_index) for sentence in train_dataset['text']]
train_y = train_dataset['label']
val_X = [words_to_indices(sentence, word_to_index) for sentence in validation_dataset['text']]
val_y = validation_dataset['label']
test_X = [words_to_indices(sentence, word_to_index) for sentence in test_dataset['text']]
test_y = test_dataset['label']

def create_dataloader(X, y, batch_size=16, shuffle=True):
    X_tensor = [torch.tensor(seq, dtype=torch.long) for seq in X]
    X_padded = pad_sequence(X_tensor, batch_first=True, padding_value=word_to_index[PAD_TOKEN])
    y_tensor = torch.tensor(y, dtype=torch.long)
    dataset = TensorDataset(X_padded, y_tensor)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

train_dataloader = create_dataloader(train_X, train_y, shuffle=True)
val_dataloader = create_dataloader(val_X, val_y, shuffle=False)
test_dataloader = create_dataloader(test_X, test_y, shuffle=False)

# convert embedding_matrix to tensor
embedding_matrix = torch.FloatTensor(embedding_matrix)

# Part 3.3 Bi-LSTM and Bi-GRU

In [21]:
# Since 3.1-3.2 have not been done yet. Continue use Part 1 for the model

class SentimentBiLSTM(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_dim, pad_idx, embedding_matrix, freeze_embeddings=True, dropout_rate=0.5, num_layers=1):
        super(SentimentBiLSTM, self).__init__()
        
        embedding_tensor = torch.FloatTensor(embedding_matrix)
        
        self.embedding = nn.Embedding.from_pretrained(embedding_tensor, padding_idx=pad_idx, freeze=freeze_embeddings)
        
        self.lstm = nn.LSTM(input_size=embedding_dim,
                            hidden_size=hidden_dim,
                            num_layers=num_layers,
                            bidirectional=True, 
                            batch_first=True)
        
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.dropout = nn.Dropout(dropout_rate)
        

    def forward(self, text):
      
        embedded = self.dropout(self.embedding(text))
        
        lstm_output, (hidden, cell) = self.lstm(embedded)
        
        hidden_cat = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)
        hidden_cat = self.dropout(hidden_cat)
        
        output = self.fc(hidden_cat)
        
        return output
    

class SentimentBiGRU(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_dim, pad_idx, embedding_matrix, freeze_embeddings=True, dropout_rate=0.5, num_layers=1):
        super(SentimentBiGRU, self).__init__()
        
        embedding_tensor = torch.FloatTensor(embedding_matrix)
        
        self.embedding = nn.Embedding.from_pretrained(embedding_tensor, padding_idx=pad_idx, freeze=freeze_embeddings)
        
        self.gru = nn.GRU(input_size=embedding_dim,
                            hidden_size=hidden_dim,
                            num_layers=num_layers,
                            bidirectional=True, 
                            batch_first=True)
        
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.dropout = nn.Dropout(dropout_rate)
        

    def forward(self, text):
       
        embedded = self.dropout(self.embedding(text))
        
        gru_output, hidden = self.gru(embedded)

        hidden_cat = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)
        hidden_cat = self.dropout(hidden_cat)

        output = self.fc(hidden_cat)
        
        return output


In [35]:
class SentimentClassifier(pl.LightningModule):
    def __init__(self, model, learning_rate=1e-3):
        super(SentimentClassifier, self).__init__()
        self.model = model
        self.criterion = torch.nn.CrossEntropyLoss()
        self.learning_rate = learning_rate
        
        # Initialize accuracy metrics
        self.train_acc = Accuracy(task="multiclass", num_classes=2)
        self.val_acc = Accuracy(task="multiclass", num_classes=2)
        self.test_acc = Accuracy(task="multiclass", num_classes=2)
        
        # For storing epoch loss
        self.train_losses = []
        self.val_losses = []
        self.test_losses = []

    def forward(self, text):
        return self.model(text)

    def training_step(self, batch, batch_idx):
        text, labels = batch
        outputs = self(text)
        loss = self.criterion(outputs, labels)
        acc = self.train_acc(outputs, labels)
        
        # Store loss for logging at epoch end
        self.train_losses.append(loss.item())
        
        self.log('train_loss', loss, prog_bar=True)
        self.log('train_acc', acc, prog_bar=True)
        
        return loss

    def on_train_epoch_end(self):
        avg_train_loss = sum(self.train_losses) / len(self.train_losses)
        avg_train_acc = self.train_acc.compute()
        
        print(f"Train Loss: {avg_train_loss:.4f}, Train Acc: {avg_train_acc:.4f}")
        
        # Clear accumulated values
        self.train_losses.clear()
        self.train_acc.reset()

    def validation_step(self, batch, batch_idx):
        text, labels = batch
        outputs = self(text)
        loss = self.criterion(outputs, labels)
        acc = self.val_acc(outputs, labels)
        
        # Store loss for logging at epoch end
        self.val_losses.append(loss.item())
        
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)
        
        return loss

    def on_validation_epoch_end(self):
        avg_val_loss = sum(self.val_losses) / len(self.val_losses)
        avg_val_acc = self.val_acc.compute()
        
        print(f"Val Loss: {avg_val_loss:.4f}, Val Acc: {avg_val_acc:.4f}")
        
        # Clear accumulated values
        self.val_losses.clear()
        self.val_acc.reset()

    # New test_step method for testing phase
    def test_step(self, batch, batch_idx):
        text, labels = batch
        outputs = self(text)
        loss = self.criterion(outputs, labels)
        acc = self.test_acc(outputs, labels)
        
        # Store loss for logging at epoch end
        self.test_losses.append(loss.item())
        
        self.log('test_loss', loss, prog_bar=True)
        self.log('test_acc', acc, prog_bar=True)
        
        return loss

    def on_test_epoch_end(self):
        avg_test_loss = sum(self.test_losses) / len(self.test_losses)
        avg_test_acc = self.test_acc.compute()
        
        print(f"Test Loss: {avg_test_loss:.4f}, Test Acc: {avg_test_acc:.4f}")
        
        # Clear accumulated values
        self.test_losses.clear()
        self.test_acc.reset()

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.learning_rate)

In [39]:

vocab_size = len(word_to_index)
embedding_dim = embedding_matrix.shape[1]  # match word2vec vector size
hidden_dim = 256
output_dim = 2  
dropout_rate = 0.2
num_layers = 2


# Create the model instance (biLSTM and biGRU)
bilstm_model = SentimentBiLSTM(vocab_size=vocab_size,
                                embedding_dim=embedding_dim,
                                hidden_dim=hidden_dim,
                                output_dim=output_dim,
                                pad_idx=word_to_index[PAD_TOKEN], 
                                embedding_matrix=embedding_matrix,
                                freeze_embeddings=False, 
                                dropout_rate=dropout_rate,
                                num_layers=num_layers
                               )

bigr_model = SentimentBiGRU(vocab_size=vocab_size,
                                embedding_dim=embedding_dim,
                                hidden_dim=hidden_dim,
                                output_dim=output_dim,
                                pad_idx=word_to_index[PAD_TOKEN], 
                                embedding_matrix=embedding_matrix,
                                freeze_embeddings=False,
                                dropout_rate=dropout_rate,
                                num_layers=num_layers)

# Print the model architecture
print(bilstm_model)
print(bigr_model)


SentimentBiLSTM(
  (embedding): Embedding(8819, 100, padding_idx=2449)
  (lstm): LSTM(100, 256, num_layers=2, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=512, out_features=2, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)
SentimentBiGRU(
  (embedding): Embedding(8819, 100, padding_idx=2449)
  (gru): GRU(100, 256, num_layers=2, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=512, out_features=2, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)


In [40]:

# BiLSTM Training and Evaluation

bilstm_classifier = SentimentClassifier(model=bilstm_model)

early_stopping = pl.callbacks.EarlyStopping(monitor='val_loss', patience=5, mode='min')
model_checkpoint = pl.callbacks.ModelCheckpoint(monitor='val_loss', mode='min', save_top_k=1)

trainer = pl.Trainer(max_epochs=30, callbacks=[early_stopping, model_checkpoint])

# Training
trainer.fit(bilstm_classifier, train_dataloader, val_dataloader)

# Evaluation
trainer.test(bilstm_classifier, test_dataloader)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name      | Type               | Params | Mode 
---------------------------------------------------------
0 | model     | SentimentBiLSTM    | 3.2 M  | train
1 | criterion | CrossEntropyLoss   | 0      | train
2 | train_acc | MulticlassAccuracy | 0      | train
3 | val_acc   | MulticlassAccuracy | 0      | train
4 | test_acc  | MulticlassAccuracy | 0      | train
---------------------------------------------------------
3.2 M     Trainable params
0         Non-trainable params
3.2 M     Total params
12.772    Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 27.37it/s]Val Loss: 0.7054, Val Acc: 0.0000
                                                                           

/opt/anaconda3/envs/sc4002/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/envs/sc4002/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 534/534 [00:15<00:00, 33.50it/s, v_num=3, train_loss=1.000, train_acc=0.500]Val Loss: 0.6936, Val Acc: 0.5000
Epoch 0: 100%|██████████| 534/534 [00:16<00:00, 31.55it/s, v_num=3, train_loss=1.000, train_acc=0.500, val_loss=0.693, val_acc=0.500]Train Loss: 0.6970, Train Acc: 0.5076
Epoch 1: 100%|██████████| 534/534 [00:15<00:00, 34.22it/s, v_num=3, train_loss=1.000, train_acc=1.000, val_loss=0.693, val_acc=0.500]Val Loss: 0.5785, Val Acc: 0.6998
Epoch 1: 100%|██████████| 534/534 [00:16<00:00, 32.42it/s, v_num=3, train_loss=1.000, train_acc=1.000, val_loss=0.578, val_acc=0.700]Train Loss: 0.6597, Train Acc: 0.5968
Epoch 2: 100%|██████████| 534/534 [00:14<00:00, 35.69it/s, v_num=3, train_loss=1.000, train_acc=0.500, val_loss=0.578, val_acc=0.700]Val Loss: 0.5471, Val Acc: 0.7364
Epoch 2: 100%|██████████| 534/534 [00:15<00:00, 33.77it/s, v_num=3, train_loss=1.000, train_acc=0.500, val_loss=0.547, val_acc=0.736]Train Loss: 0.3840, Train Acc: 0.8279
Epoch 3: 100%|███

/opt/anaconda3/envs/sc4002/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 67/67 [00:01<00:00, 66.23it/s]Test Loss: 1.2717, Test Acc: 0.7326
Testing DataLoader 0: 100%|██████████| 67/67 [00:01<00:00, 66.00it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.7326453924179077
        test_loss           1.2749722003936768
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 1.2749722003936768, 'test_acc': 0.7326453924179077}]

In [41]:
# BiGRU Training and Evaluation

bigr_classifier = SentimentClassifier(model=bigr_model)

early_stopping = pl.callbacks.EarlyStopping(monitor='val_loss', patience=5, mode='min')
model_checkpoint = pl.callbacks.ModelCheckpoint(monitor='val_loss', mode='min', save_top_k=1)

trainer = pl.Trainer(max_epochs=30, callbacks=[early_stopping, model_checkpoint])

# Training
trainer.fit(bigr_classifier, train_dataloader, val_dataloader)

# Evaluation
trainer.test(bigr_classifier, test_dataloader)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name      | Type               | Params | Mode 
---------------------------------------------------------
0 | model     | SentimentBiGRU     | 2.6 M  | train
1 | criterion | CrossEntropyLoss   | 0      | train
2 | train_acc | MulticlassAccuracy | 0      | train
3 | val_acc   | MulticlassAccuracy | 0      | train
4 | test_acc  | MulticlassAccuracy | 0      | train
---------------------------------------------------------
2.6 M     Trainable params
0         Non-trainable params
2.6 M     Total params
10.462    Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 14.49it/s]Val Loss: 0.7156, Val Acc: 0.0625
Epoch 0: 100%|██████████| 534/534 [01:52<00:00,  4.74it/s, v_num=4, train_loss=1.000, train_acc=1.000]Val Loss: 0.6044, Val Acc: 0.6717
Epoch 0: 100%|██████████| 534/534 [01:56<00:00,  4.57it/s, v_num=4, train_loss=1.000, train_acc=1.000, val_loss=0.604, val_acc=0.672]Train Loss: 0.6844, Train Acc: 0.5501
Epoch 1: 100%|██████████| 534/534 [01:58<00:00,  4.51it/s, v_num=4, train_loss=1.000, train_acc=1.000, val_loss=0.604, val_acc=0.672]Val Loss: 0.5117, Val Acc: 0.7514
Epoch 1: 100%|██████████| 534/534 [02:03<00:00,  4.34it/s, v_num=4, train_loss=1.000, train_acc=1.000, val_loss=0.511, val_acc=0.751]Train Loss: 0.3846, Train Acc: 0.8274
Epoch 2: 100%|██████████| 534/534 [01:55<00:00,  4.60it/s, v_num=4, train_loss=1.000, train_acc=1.000, val_loss=0.511, val_acc=0.751]Val Loss: 0.6806, Val Acc: 0.7458
Epoch 2: 100%|██████████| 534/534 [02:01<00:00,  4.38it/s, v_num=4, train_loss=

[{'test_loss': 1.4354287385940552, 'test_acc': 0.7392120361328125}]