# Lab 070-01: Word2Vec Embeddings with Deep Learning Models
## Sentiment Analysis với MLP, RNN, LSTM, GRU

**Task:** Binary Sentiment Classification (Positive/Negative)  
**Dataset:** IMDB Movie Reviews  
**Embeddings:** Word2Vec (Skip-gram & CBOW)  
**Models:** MLP, RNN, LSTM, GRU  

## Part 0: Setup và Import Libraries

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter, defaultdict
import re
import time
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

# For Word2Vec
from gensim.models import Word2Vec
import gensim.downloader as api

# Set random seed
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
print(f"PyTorch version: {torch.__version__}")

## Part 1: Data Preparation

In [None]:
positive_reviews = [
    "this movie is absolutely amazing and wonderful",
    "i loved every minute of this film it was fantastic",
    "brilliant performance by all actors highly recommend",
    "one of the best movies i have ever seen",
    "outstanding cinematography and great story",
    "superb acting and excellent direction",
    "thoroughly enjoyed this masterpiece",
    "fantastic movie with incredible visuals",
    "amazing storyline and perfect execution",
    "loved it absolutely brilliant work",
    "excellent movie highly entertaining",
    "wonderful film with great performances",
    "best movie of the year must watch",
    "incredible acting and beautiful scenes",
    "perfect movie loved every second",
    "amazing experience highly recommended",
    "brilliant film with excellent cast",
    "fantastic story and great acting",
    "wonderful movie enjoyed it thoroughly",
    "outstanding film one of the best",
]

negative_reviews = [
    "this movie is terrible waste of time",
    "horrible acting and boring plot",
    "worst film i have ever watched",
    "completely disappointed terrible movie",
    "awful storyline and bad direction",
    "boring movie fell asleep halfway",
    "terrible acting worst experience",
    "horrible film total disaster",
    "awful movie do not watch",
    "worst performance terrible script",
    "boring and poorly executed film",
    "terrible movie complete waste",
    "awful acting and bad story",
    "horrible film very disappointing",
    "worst movie terrible experience",
    "boring plot and bad acting",
    "awful film not recommended",
    "terrible storyline horrible movie",
    "worst acting disappointing film",
    "boring waste of time awful",
]

# Create dataset
texts = positive_reviews * 10 + negative_reviews * 10  # 200 positive, 200 negative
labels = [1] * len(positive_reviews) * 10 + [0] * len(negative_reviews) * 10

# Shuffle
from sklearn.utils import shuffle
texts, labels = shuffle(texts, labels, random_state=SEED)

# Split
X_train, X_temp, y_train, y_temp = train_test_split(texts, labels, test_size=0.3, random_state=SEED)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=SEED)

print(f"Train samples: {len(X_train)}")
print(f"Val samples: {len(X_val)}")
print(f"Test samples: {len(X_test)}")
print(f"\nSample reviews:")
for i in range(3):
    label = "Positive" if y_train[i] == 1 else "Negative"
    print(f"  [{label}] {X_train[i]}")

In [None]:
# Text preprocessing
def preprocess_text(text):
    """Simple text preprocessing"""
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    tokens = text.split()
    return tokens

# Tokenize all texts
train_tokens = [preprocess_text(text) for text in X_train]
val_tokens = [preprocess_text(text) for text in X_val]
test_tokens = [preprocess_text(text) for text in X_test]

print("Tokenized examples:")
for i in range(2):
    print(f"  {train_tokens[i]}")

## Part 2: Training Word2Vec Embeddings

We'll train our own Word2Vec model on the dataset.

In [None]:
print("Training Word2Vec model...")

# Combine all tokens for training
all_tokens = train_tokens + val_tokens + test_tokens

# Skip-gram model
w2v_sg = Word2Vec(
    sentences=all_tokens,
    vector_size=100,      # Embedding dimension
    window=5,             # Context window
    min_count=1,          # Minimum word frequency
    sg=1,                 # 1 = Skip-gram, 0 = CBOW
    workers=4,
    epochs=20,
    seed=SEED
)

# CBOW model (for comparison)
w2v_cbow = Word2Vec(
    sentences=all_tokens,
    vector_size=100,
    window=5,
    min_count=1,
    sg=0,                 # CBOW
    workers=4,
    epochs=20,
    seed=SEED
)

print(f"Skip-gram vocabulary size: {len(w2v_sg.wv)}")
print(f"CBOW vocabulary size: {len(w2v_cbow.wv)}")
print(f"Embedding dimension: {w2v_sg.vector_size}")

# Test word similarities
print("\nWord Similarities (Skip-gram):")
test_words = ['great', 'excellent', 'terrible', 'awful']
for word in test_words:
    if word in w2v_sg.wv:
        similar = w2v_sg.wv.most_similar(word, topn=3)
        print(f"  {word}: {[w for w, _ in similar]}")

In [None]:
# Build vocabulary and create embedding matrix
class Vocabulary:
    def __init__(self, w2v_model):
        self.word2idx = {"<PAD>": 0, "<UNK>": 1}
        self.idx2word = {0: "<PAD>", 1: "<UNK>"}
        
        # Add words from Word2Vec vocabulary
        for idx, word in enumerate(w2v_model.wv.index_to_key, start=2):
            self.word2idx[word] = idx
            self.idx2word[idx] = word
        
        self.vocab_size = len(self.word2idx)
        self.embedding_dim = w2v_model.vector_size
        
        # Create embedding matrix
        self.embedding_matrix = np.zeros((self.vocab_size, self.embedding_dim))
        
        # Fill with Word2Vec vectors
        for word, idx in self.word2idx.items():
            if word in w2v_model.wv:
                self.embedding_matrix[idx] = w2v_model.wv[word]
            elif word == "<UNK>":
                # Random vector for unknown words
                self.embedding_matrix[idx] = np.random.randn(self.embedding_dim) * 0.01
        
        print(f"Vocabulary size: {self.vocab_size}")
        print(f"Embedding dimension: {self.embedding_dim}")
    
    def encode(self, tokens):
        """Convert tokens to indices"""
        return [self.word2idx.get(token, self.word2idx["<UNK>"]) for token in tokens]

# Create vocabulary with Skip-gram embeddings
vocab = Vocabulary(w2v_sg)
print(f"\nEmbedding matrix shape: {vocab.embedding_matrix.shape}")

In [None]:
# Dataset class
class SentimentDataset(Dataset):
    def __init__(self, tokens_list, labels, vocab):
        self.tokens_list = tokens_list
        self.labels = labels
        self.vocab = vocab
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        tokens = self.tokens_list[idx]
        label = self.labels[idx]
        
        # Encode tokens to indices
        indices = self.vocab.encode(tokens)
        
        return torch.tensor(indices, dtype=torch.long), torch.tensor(label, dtype=torch.float)

def collate_fn(batch):
    """Collate function for variable length sequences"""
    sequences, labels = zip(*batch)
    lengths = torch.tensor([len(seq) for seq in sequences])
    
    # Pad sequences
    sequences_padded = pad_sequence(sequences, batch_first=True, padding_value=0)
    labels = torch.stack(labels)
    
    return sequences_padded, labels, lengths

# Create datasets
BATCH_SIZE = 16

train_dataset = SentimentDataset(train_tokens, y_train, vocab)
val_dataset = SentimentDataset(val_tokens, y_val, vocab)
test_dataset = SentimentDataset(test_tokens, y_test, vocab)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")
print(f"Test batches: {len(test_loader)}")

## Part 3: Model Implementations

### 3.1. Multi-Layer Perceptron (MLP)

In [None]:
class MLP_Classifier(nn.Module):
    """
    Multi-Layer Perceptron for Sentiment Classification
    
    Architecture:
    Word2Vec Embedding → Average Pooling → MLP → Sigmoid
    """
    
    def __init__(self, embedding_matrix, hidden_dim=128, dropout=0.5, freeze_embeddings=True):
        super(MLP_Classifier, self).__init__()
        
        vocab_size, embedding_dim = embedding_matrix.shape
        
        # Initialize embedding layer with Word2Vec weights
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix))
        
        # Freeze or fine-tune embeddings
        self.embedding.weight.requires_grad = not freeze_embeddings
        
        # MLP layers
        self.fc1 = nn.Linear(embedding_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim // 2)
        self.fc3 = nn.Linear(hidden_dim // 2, 1)
        
        self.dropout = nn.Dropout(dropout)
        self.relu = nn.ReLU()
    
    def forward(self, x, lengths):
        """
        Args:
            x: [batch_size, seq_len]
            lengths: [batch_size]
        
        Returns:
            output: [batch_size]
        """
        # Embedding
        embedded = self.embedding(x)  # [batch, seq_len, embed_dim]
        
        # Average pooling (ignore padding)
        mask = (x != 0).float().unsqueeze(-1)  # [batch, seq_len, 1]
        embedded_masked = embedded * mask
        pooled = embedded_masked.sum(dim=1) / lengths.unsqueeze(1).float()  # [batch, embed_dim]
        
        # MLP
        out = self.relu(self.fc1(pooled))
        out = self.dropout(out)
        out = self.relu(self.fc2(out))
        out = self.dropout(out)
        out = self.fc3(out)
        out = torch.sigmoid(out).squeeze(1)
        
        return out

### 3.2. Recurrent Neural Network (RNN)

In [None]:
class RNN_Classifier(nn.Module):
    """
    Vanilla RNN for Sentiment Classification
    
    Architecture:
    Word2Vec Embedding → RNN → Last hidden state → FC → Sigmoid
    """
    
    def __init__(self, embedding_matrix, hidden_dim=128, num_layers=2, 
                 dropout=0.5, freeze_embeddings=True):
        super(RNN_Classifier, self).__init__()
        
        vocab_size, embedding_dim = embedding_matrix.shape
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix))
        self.embedding.weight.requires_grad = not freeze_embeddings
        
        # RNN layer
        self.rnn = nn.RNN(
            embedding_dim,
            hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )
        
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, 1)
    
    def forward(self, x, lengths):
        """
        Args:
            x: [batch_size, seq_len]
            lengths: [batch_size]
        
        Returns:
            output: [batch_size]
        """
        # Embedding
        embedded = self.embedding(x)  # [batch, seq_len, embed_dim]
        
        # Pack sequences
        packed = pack_padded_sequence(embedded, lengths.cpu(), batch_first=True, enforce_sorted=False)
        
        # RNN
        packed_out, hidden = self.rnn(packed)
        
        # Use last hidden state
        final_hidden = hidden[-1]  # [batch, hidden_dim]
        
        # FC layer
        out = self.dropout(final_hidden)
        out = self.fc(out)
        out = torch.sigmoid(out).squeeze(1)
        
        return out

### 3.3. LSTM Classifier

In [None]:
class LSTM_Classifier(nn.Module):
    """
    LSTM for Sentiment Classification
    
    Architecture:
    Word2Vec Embedding → LSTM → Last hidden state → FC → Sigmoid
    """
    
    def __init__(self, embedding_matrix, hidden_dim=128, num_layers=2, 
                 dropout=0.5, freeze_embeddings=True):
        super(LSTM_Classifier, self).__init__()
        
        vocab_size, embedding_dim = embedding_matrix.shape
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix))
        self.embedding.weight.requires_grad = not freeze_embeddings
        
        # LSTM layer
        self.lstm = nn.LSTM(
            embedding_dim,
            hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )
        
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, 1)
    
    def forward(self, x, lengths):
        """
        Args:
            x: [batch_size, seq_len]
            lengths: [batch_size]
        
        Returns:
            output: [batch_size]
        """
        # Embedding
        embedded = self.embedding(x)  # [batch, seq_len, embed_dim]
        
        # Pack sequences
        packed = pack_padded_sequence(embedded, lengths.cpu(), batch_first=True, enforce_sorted=False)
        
        # LSTM
        packed_out, (hidden, cell) = self.lstm(packed)
        
        # Use last hidden state
        final_hidden = hidden[-1]  # [batch, hidden_dim]
        
        # FC layer
        out = self.dropout(final_hidden)
        out = self.fc(out)
        out = torch.sigmoid(out).squeeze(1)
        
        return out

### 3.4. GRU Classifier

In [None]:
class GRU_Classifier(nn.Module):
    """
    GRU for Sentiment Classification
    
    Architecture:
    Word2Vec Embedding → GRU → Last hidden state → FC → Sigmoid
    """
    
    def __init__(self, embedding_matrix, hidden_dim=128, num_layers=2, 
                 dropout=0.5, freeze_embeddings=True):
        super(GRU_Classifier, self).__init__()
        
        vocab_size, embedding_dim = embedding_matrix.shape
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix))
        self.embedding.weight.requires_grad = not freeze_embeddings
        
        # GRU layer
        self.gru = nn.GRU(
            embedding_dim,
            hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )
        
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, 1)
    
    def forward(self, x, lengths):
        """
        Args:
            x: [batch_size, seq_len]
            lengths: [batch_size]
        
        Returns:
            output: [batch_size]
        """
        # Embedding
        embedded = self.embedding(x)  # [batch, seq_len, embed_dim]
        
        # Pack sequences
        packed = pack_padded_sequence(embedded, lengths.cpu(), batch_first=True, enforce_sorted=False)
        
        # GRU
        packed_out, hidden = self.gru(packed)
        
        # Use last hidden state
        final_hidden = hidden[-1]  # [batch, hidden_dim]
        
        # FC layer
        out = self.dropout(final_hidden)
        out = self.fc(out)
        out = torch.sigmoid(out).squeeze(1)
        
        return out

## Part 4: Training và Evaluation Functions

In [None]:
def train_epoch(model, dataloader, criterion, optimizer, device):
    """Train for one epoch"""
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for sequences, labels, lengths in tqdm(dataloader, desc="Training", leave=False):
        sequences = sequences.to(device)
        labels = labels.to(device)
        lengths = lengths.to(device)
        
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(sequences, lengths)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
        optimizer.step()
        
        total_loss += loss.item()
        
        # Calculate accuracy
        predictions = (outputs >= 0.5).float()
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
    
    avg_loss = total_loss / len(dataloader)
    accuracy = correct / total
    
    return avg_loss, accuracy

def evaluate(model, dataloader, criterion, device):
    """Evaluate model"""
    model.eval()
    total_loss = 0
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for sequences, labels, lengths in tqdm(dataloader, desc="Evaluating", leave=False):
            sequences = sequences.to(device)
            labels = labels.to(device)
            lengths = lengths.to(device)
            
            # Forward pass
            outputs = model(sequences, lengths)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            
            # Store predictions
            predictions = (outputs >= 0.5).float()
            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    avg_loss = total_loss / len(dataloader)
    accuracy = accuracy_score(all_labels, all_predictions)
    
    return avg_loss, accuracy, all_predictions, all_labels

## Part 5: Training All Models

In [None]:
# Training hyperparameters
N_EPOCHS = 20
LEARNING_RATE = 0.001
HIDDEN_DIM = 128
NUM_LAYERS = 2
DROPOUT = 0.3

# Loss function
criterion = nn.BCELoss()

# Models to train
embedding_matrix = vocab.embedding_matrix

models_to_train = {
    'MLP': MLP_Classifier(embedding_matrix, HIDDEN_DIM, DROPOUT, freeze_embeddings=True),
    'RNN': RNN_Classifier(embedding_matrix, HIDDEN_DIM, NUM_LAYERS, DROPOUT, freeze_embeddings=True),
    'LSTM': LSTM_Classifier(embedding_matrix, HIDDEN_DIM, NUM_LAYERS, DROPOUT, freeze_embeddings=True),
    'GRU': GRU_Classifier(embedding_matrix, HIDDEN_DIM, NUM_LAYERS, DROPOUT, freeze_embeddings=True)
}

print("Model Comparison (with Word2Vec Embeddings)")
for name, model in models_to_train.items():
    n_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"{name:<10} - Trainable Parameters: {n_params:,}")

In [None]:
# Train each model
results = {}

for model_name, model in models_to_train.items():
    print(f"Training {model_name} Model")
    
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': []
    }
    
    best_val_acc = 0
    training_time = 0
    
    for epoch in range(N_EPOCHS):
        print(f"Epoch {epoch+1}/{N_EPOCHS}")
        
        start_time = time.time()
        
        # Train
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        
        # Validate
        val_loss, val_acc, _, _ = evaluate(model, val_loader, criterion, device)
        
        epoch_time = time.time() - start_time
        training_time += epoch_time
        
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        
        print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
        print(f"  Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.4f}")
        print(f"  Time: {epoch_time:.2f}s")
        
        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), f'{model_name.lower()}_w2v_best.pt')
            print(f"  Saved best model (Val Acc: {best_val_acc:.4f})")
    
    # Test on best model
    model.load_state_dict(torch.load(f'{model_name.lower()}_w2v_best.pt'))
    test_loss, test_acc, test_preds, test_labels = evaluate(model, test_loader, criterion, device)
    
    # Store results
    results[model_name] = {
        'history': history,
        'best_val_acc': best_val_acc,
        'test_acc': test_acc,
        'test_loss': test_loss,
        'training_time': training_time,
        'predictions': test_preds,
        'labels': test_labels,
        'n_params': sum(p.numel() for p in model.parameters() if p.requires_grad)
    }
    
    print(f"{model_name} Training Complete!")
    print(f"  Best Val Acc: {best_val_acc:.4f}")
    print(f"  Test Acc: {test_acc:.4f}")
    print(f"  Total Training Time: {training_time:.2f}s")

## Part 7: Visualization và Comparison

In [None]:
# Plot training history
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

epochs = range(1, N_EPOCHS + 1)
model_names = ['MLP', 'RNN', 'LSTM', 'GRU']

# Training Loss
ax = axes[0, 0]
for name in model_names:
    ax.plot(epochs, results[name]['history']['train_loss'], 
            label=name, linewidth=2, marker='o', markersize=3)
ax.set_xlabel('Epoch', fontsize=12)
ax.set_ylabel('Loss', fontsize=12)
ax.set_title('Training Loss Comparison', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)

# Validation Loss
ax = axes[0, 1]
for name in model_names:
    ax.plot(epochs, results[name]['history']['val_loss'], 
            label=name, linewidth=2, marker='o', markersize=3)
ax.set_xlabel('Epoch', fontsize=12)
ax.set_ylabel('Loss', fontsize=12)
ax.set_title('Validation Loss Comparison', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)

# Training Accuracy
ax = axes[1, 0]
for name in model_names:
    ax.plot(epochs, results[name]['history']['train_acc'], 
            label=name, linewidth=2, marker='o', markersize=3)
ax.set_xlabel('Epoch', fontsize=12)
ax.set_ylabel('Accuracy', fontsize=12)
ax.set_title('Training Accuracy Comparison', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)

# Validation Accuracy
ax = axes[1, 1]
for name in model_names:
    ax.plot(epochs, results[name]['history']['val_acc'], 
            label=name, linewidth=2, marker='o', markersize=3)
ax.set_xlabel('Epoch', fontsize=12)
ax.set_ylabel('Accuracy', fontsize=12)
ax.set_title('Validation Accuracy Comparison', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
# plt.savefig('w2v_models_comparison.png', dpi=150)
plt.show()

In [None]:
# Performance comparison table
print("MODEL COMPARISON SUMMARY (Word2Vec Embeddings)")
print(f"{'Model':<10} {'Params':<15} {'Train Time':<15} {'Best Val Acc':<15} {'Test Acc':<12}")

for name in model_names:
    res = results[name]
    print(f"{name:<10} {res['n_params']:>13,}  "
          f"{res['training_time']:>12.2f}s  "
          f"{res['best_val_acc']:>12.4f}  "
          f"{res['test_acc']:>10.4f}")


# Bar plots for metrics
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

colors = ['#3498db', '#e74c3c', '#2ecc71', '#f39c12', '#9b59b6']

# Test Accuracy
ax = axes[0]
test_accs = [results[name]['test_acc'] for name in model_names]
bars = ax.bar(model_names, test_accs, color=colors, alpha=0.7, edgecolor='black')
ax.set_ylabel('Accuracy', fontsize=12)
ax.set_title('Test Accuracy Comparison', fontsize=14, fontweight='bold')
ax.set_ylim([0, 1.0])
for bar, acc in zip(bars, test_accs):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
            f'{acc:.4f}', ha='center', va='bottom', fontweight='bold', fontsize=9)
ax.grid(True, alpha=0.3, axis='y')

# Training Time
ax = axes[1]
train_times = [results[name]['training_time'] for name in model_names]
bars = ax.bar(model_names, train_times, color=colors, alpha=0.7, edgecolor='black')
ax.set_ylabel('Time (seconds)', fontsize=12)
ax.set_title('Training Time Comparison', fontsize=14, fontweight='bold')
for bar, time_val in zip(bars, train_times):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height + 0.5,
            f'{time_val:.1f}s', ha='center', va='bottom', fontweight='bold', fontsize=9)
ax.grid(True, alpha=0.3, axis='y')

# Parameters
ax = axes[2]
n_params = [results[name]['n_params'] for name in model_names]
bars = ax.bar(model_names, n_params, color=colors, alpha=0.7, edgecolor='black')
ax.set_ylabel('Number of Parameters', fontsize=12)
ax.set_title('Model Size Comparison', fontsize=14, fontweight='bold')
for bar, params in zip(bars, n_params):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height + 200,
            f'{params:,}', ha='center', va='bottom', fontweight='bold', fontsize=8)
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
# plt.savefig('w2v_metrics_comparison.png', dpi=150)
plt.show()

## Part 8: Detailed Analysis

In [None]:
# Classification reports and confusion matrices
for name in model_names:
    print(f"Classification Report - {name}")
    
    preds = np.array(results[name]['predictions'])
    labels = np.array(results[name]['labels'])
    
    print(classification_report(labels, preds, target_names=['Negative', 'Positive']))
    
    # Confusion matrix
    cm = confusion_matrix(labels, preds)
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=['Negative', 'Positive'], 
                yticklabels=['Negative', 'Positive'])
    plt.title(f'Confusion Matrix - {name}', fontsize=14, fontweight='bold')
    plt.ylabel('True Label', fontsize=12)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.tight_layout()
    # plt.savefig(f'confusion_matrix_w2v_{name.lower()}.png', dpi=150)
    plt.show()

## Part 9: Sample Predictions

In [None]:
def predict_sentiment(model, text, vocab, device):
    """Predict sentiment for a given text"""
    model.eval()
    
    # Preprocess
    tokens = preprocess_text(text)
    indices = vocab.encode(tokens)
    
    # Convert to tensor
    sequence = torch.tensor([indices]).to(device)
    length = torch.tensor([len(indices)]).to(device)
    
    # Predict
    with torch.no_grad():
        output = model(sequence, length)
        prediction = (output >= 0.5).float().item()
        confidence = output.item()
    
    sentiment = "Positive" if prediction == 1 else "Negative"
    return sentiment, confidence

# Load best models
best_models = {}
model_classes = {
    'MLP': MLP_Classifier,
    'RNN': RNN_Classifier,
    'LSTM': LSTM_Classifier,
    'GRU': GRU_Classifier
}

for name, model_class in model_classes.items():
    if name == 'MLP':
        model = model_class(embedding_matrix, HIDDEN_DIM, DROPOUT)
    else:
        model = model_class(embedding_matrix, HIDDEN_DIM, NUM_LAYERS, DROPOUT)
    
    model.load_state_dict(torch.load(f'{name.lower()}_w2v_best.pt'))
    model = model.to(device)
    best_models[name] = model

# Test sentences
test_sentences = [
    "this movie is absolutely amazing and wonderful",
    "terrible film complete waste of time",
    "loved every minute of this fantastic movie",
    "horrible acting and boring storyline",
    "excellent performance highly recommended",
    "worst experience ever very disappointing"
]

print("SAMPLE PREDICTIONS COMPARISON")

for sent in test_sentences:
    print(f"\n Review: {sent}")
    print(f"{'Model':<10} {'Prediction':<15} {'Confidence':<15}")
    
    for name, model in best_models.items():
        sentiment, confidence = predict_sentiment(model, sent, vocab, device)
        indicator = "Pos" if sentiment == "Positive" else "Neg"
        print(f"{name:<10} {indicator} {sentiment:<12} {confidence:>8.4f}")

## Part 10: Word2Vec Embedding Analysis

In [None]:
# Word similarity analysis
print("Word2Vec Embedding Analysis")

# Test semantic relationships
print("\nSemantic Similarities:")
word_pairs = [
    ('excellent', 'fantastic'),
    ('terrible', 'awful'),
    ('love', 'enjoy'),
    ('hate', 'dislike'),
    ('amazing', 'wonderful'),
    ('boring', 'dull')
]

for word1, word2 in word_pairs:
    if word1 in w2v_sg.wv and word2 in w2v_sg.wv:
        similarity = w2v_sg.wv.similarity(word1, word2)
        print(f"  {word1:15} <-> {word2:15}: {similarity:.4f}")

# Most similar words
print("\nMost Similar Words:")
sentiment_words = ['excellent', 'terrible', 'great', 'awful', 'fantastic', 'boring']

for word in sentiment_words:
    if word in w2v_sg.wv:
        similar = w2v_sg.wv.most_similar(word, topn=5)
        print(f"\n  {word}:")
        for sim_word, score in similar:
            print(f"    - {sim_word:15} ({score:.4f})")

In [None]:
# Visualize embeddings with t-SNE
from sklearn.manifold import TSNE

print("\nVisualizing Word Embeddings with t-SNE...")

# Select important sentiment words
sentiment_vocab = ['excellent', 'fantastic', 'amazing', 'wonderful', 'great', 'brilliant',
                   'terrible', 'awful', 'horrible', 'boring', 'worst', 'bad',
                   'movie', 'film', 'acting', 'story', 'performance']

# Get vectors
vectors = []
labels = []
for word in sentiment_vocab:
    if word in w2v_sg.wv:
        vectors.append(w2v_sg.wv[word])
        labels.append(word)

vectors = np.array(vectors)

# Apply t-SNE
tsne = TSNE(n_components=2, random_state=SEED, perplexity=min(5, len(vectors)-1))
vectors_2d = tsne.fit_transform(vectors)

# Plot
plt.figure(figsize=(12, 10))
positive_words = ['excellent', 'fantastic', 'amazing', 'wonderful', 'great', 'brilliant']
negative_words = ['terrible', 'awful', 'horrible', 'boring', 'worst', 'bad']

for i, label in enumerate(labels):
    x, y = vectors_2d[i]
    if label in positive_words:
        color = 'green'
        marker = 'o'
    elif label in negative_words:
        color = 'red'
        marker = 'x'
    else:
        color = 'blue'
        marker = 's'
    
    plt.scatter(x, y, c=color, marker=marker, s=200, alpha=0.6, edgecolors='black')
    plt.annotate(label, (x, y), xytext=(5, 5), textcoords='offset points', 
                fontsize=12, fontweight='bold')

plt.title('Word2Vec Embeddings Visualization (t-SNE)', fontsize=14, fontweight='bold')
plt.xlabel('Dimension 1', fontsize=12)
plt.ylabel('Dimension 2', fontsize=12)
plt.grid(True, alpha=0.3)

# Legend
from matplotlib.lines import Line2D
legend_elements = [
    Line2D([0], [0], marker='o', color='w', markerfacecolor='green', markersize=10, label='Positive'),
    Line2D([0], [0], marker='x', color='w', markerfacecolor='red', markersize=10, label='Negative'),
    Line2D([0], [0], marker='s', color='w', markerfacecolor='blue', markersize=10, label='Neutral')
]
plt.legend(handles=legend_elements, loc='best', fontsize=10)

plt.tight_layout()
# plt.savefig('w2v_embeddings_tsne.png', dpi=150)
plt.show()