# Bangla Sentiment Analysis - Complete Solution

This notebook implements a comprehensive sentiment analysis system for Bangla text, addressing underfitting issues with improved architecture and training strategies.

## 1. Setup and Imports

In [None]:
# Install required packages
!pip install --upgrade pip setuptools wheel -q
!pip install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118 -q
!pip install transformers==4.30.2 tokenizers==0.13.3 -q
!pip install scikit-learn pandas numpy matplotlib seaborn tqdm -q
!pip install nltk -q

import os
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore')

# ML libraries
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight

# Deep Learning
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam, AdamW
from torch.nn.utils.rnn import pad_sequence
from torch.optim.lr_scheduler import ReduceLROnPlateau

# NLP Processing
import re
import string
from collections import Counter, defaultdict
import nltk

# Download NLTK data
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

# System utilities
import gc
import json
import random
from datetime import datetime

# Set seeds for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(42)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

# Create directories
for dir_name in ['plots', 'models', 'results', 'embeddings']:
    os.makedirs(dir_name, exist_ok=True)

print("Setup completed successfully!")

## 2. Load and Analyze Dataset

In [None]:
# Load dataset
df = pd.read_csv('/kaggle/input/final-dataset/final-dataset.csv')
print(f"Original dataset shape: {df.shape}")

# Check unique polarities to determine number of classes
unique_polarities = df['Polarity'].unique()
print(f"\nUnique polarities found: {unique_polarities}")

# Map polarities based on what's in the dataset
if 'neutral' in unique_polarities:
    # 3-class case
    num_classes = 3
    df['Polarity'] = df['Polarity'].map({'positive': 1, 'negative': 0, 'neutral': 2})
    class_names = {0: 'Negative', 1: 'Positive', 2: 'Neutral'}
    print("\nDetected 3-class sentiment analysis")
else:
    # 2-class case
    num_classes = 2
    df['Polarity'] = df['Polarity'].map({'positive': 1, 'negative': 0})
    class_names = {0: 'Negative', 1: 'Positive'}
    print("\nDetected 2-class sentiment analysis")

print(f"Number of classes: {num_classes}")
print("\nClass distribution:")
print(df['Polarity'].value_counts().sort_index())

# Add text statistics
df['text_length'] = df['Text'].str.len()
df['word_count'] = df['Text'].str.split().str.len()

print(f"\nAverage text length: {df['text_length'].mean():.1f} characters")
print(f"Average word count: {df['word_count'].mean():.1f} words")

## 3. Text Preprocessing

In [None]:
class BanglaTextPreprocessor:
    """Text preprocessing for Bangla text"""
    
    def __init__(self, min_word_freq=2):
        self.min_word_freq = min_word_freq
        self.word2idx = {'<PAD>': 0, '<UNK>': 1, '<SOS>': 2, '<EOS>': 3}
        self.idx2word = {0: '<PAD>', 1: '<UNK>', 2: '<SOS>', 3: '<EOS>'}
        self.word_freq = Counter()
        self.vocab_size = 4
        
        # Common Bangla punctuation
        self.bangla_punct = '।॥,;:!?\'"-.…'
        
    def clean_text(self, text):
        """Basic cleaning for Bangla text"""
        # Convert to string
        text = str(text)
        
        # Remove extra whitespace
        text = ' '.join(text.split())
        
        # Keep Bangla characters, numbers, and basic punctuation
        # This regex keeps Bangla Unicode range and common characters
        text = re.sub(r'[^\u0980-\u09FF\s0-9।,!?.-]', ' ', text)
        
        # Remove extra spaces
        text = re.sub(r'\s+', ' ', text)
        
        return text.strip()
    
    def tokenize(self, text):
        """Simple word tokenization for Bangla"""
        # Clean text first
        text = self.clean_text(text)
        
        # Simple split-based tokenization
        tokens = text.split()
        
        return tokens
    
    def build_vocab(self, texts):
        """Build vocabulary from texts"""
        # Count word frequencies
        for text in tqdm(texts, desc="Building vocabulary"):
            tokens = self.tokenize(text)
            self.word_freq.update(tokens)
        
        # Add words that meet minimum frequency
        for word, freq in self.word_freq.items():
            if freq >= self.min_word_freq:
                if word not in self.word2idx:
                    self.word2idx[word] = self.vocab_size
                    self.idx2word[self.vocab_size] = word
                    self.vocab_size += 1
        
        print(f"Vocabulary size: {self.vocab_size}")
        print(f"Number of unique words: {len(self.word_freq)}")
        print(f"Words meeting min frequency: {self.vocab_size - 4}")  # Excluding special tokens
        
    def text_to_indices(self, text, max_length=None):
        """Convert text to indices"""
        tokens = self.tokenize(text)
        
        # Convert to indices
        indices = [self.word2idx.get(token, 1) for token in tokens]  # 1 is <UNK>
        
        # Add SOS and EOS
        indices = [2] + indices + [3]  # 2 is <SOS>, 3 is <EOS>
        
        # Truncate or pad if max_length specified
        if max_length:
            if len(indices) > max_length:
                indices = indices[:max_length-1] + [3]  # Keep EOS
            else:
                indices = indices + [0] * (max_length - len(indices))  # 0 is <PAD>
        
        return indices
    
    def texts_to_sequences(self, texts, max_length=None):
        """Convert multiple texts to sequences"""
        sequences = []
        for text in tqdm(texts, desc="Converting texts to sequences"):
            sequences.append(self.text_to_indices(text, max_length))
        return sequences

# Initialize preprocessor
preprocessor = BanglaTextPreprocessor(min_word_freq=2)

# Build vocabulary
print("Building vocabulary...")
preprocessor.build_vocab(df['Text'].values)

# Convert texts to sequences
max_seq_length = 100  # Maximum sequence length
sequences = preprocessor.texts_to_sequences(df['Text'].values, max_length=max_seq_length)

print(f"\nSequence shape: {len(sequences)} x {max_seq_length}")
print(f"Sample sequence: {sequences[0][:20]}...")

## 4. Train Word2Vec Embeddings


In [None]:
def create_embedding_matrix(vocab_size, embedding_dim=100, method='random'):
    """Create embedding matrix using different methods"""
    
    print(f"Creating {method} embedding matrix...")
    
    if method == 'random':
        # Random initialization
        embedding_matrix = np.random.normal(0, 0.1, (vocab_size, embedding_dim))
        # Special tokens get different initialization
        embedding_matrix[0] = np.zeros(embedding_dim)  # PAD token
        
    elif method == 'one_hot':
        # One-hot encoding (not recommended for large vocab)
        embedding_matrix = np.eye(vocab_size)[:, :embedding_dim]
        
    else:  # Default to random
        embedding_matrix = np.random.normal(0, 0.1, (vocab_size, embedding_dim))
    
    print(f"Embedding matrix shape: {embedding_matrix.shape}")
    return embedding_matrix

# Create embedding matrix using random initialization
embedding_dim = 100
embedding_matrix = create_embedding_matrix(preprocessor.vocab_size, embedding_dim, method='random')

# Optional: Create a simple frequency-based weighting
print("\nApplying frequency-based adjustments...")
for word, idx in preprocessor.word2idx.items():
    if idx < 4:  # Skip special tokens
        continue
    # Adjust embeddings based on frequency (optional)
    freq = preprocessor.word_freq.get(word, 1)
    # Words with higher frequency get slightly different initialization
    if freq > 10:
        embedding_matrix[idx] *= 1.1
    elif freq < 3:
        embedding_matrix[idx] *= 0.9

print("Embedding matrix created successfully!")

# Cell 5 - Create Dataset Class


In [None]:
class BanglaDataset(Dataset):
    """Dataset class for Bangla sentiment analysis"""
    
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels
        
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        return {
            'sequence': torch.tensor(self.sequences[idx], dtype=torch.long),
            'label': torch.tensor(self.labels[idx], dtype=torch.long)
        }

def create_data_loaders(sequences, labels, batch_size=32, val_split=0.2, test_split=0.1):
    """Create train, validation, and test data loaders"""
    
    # Convert to numpy arrays
    sequences = np.array(sequences)
    labels = np.array(labels)
    
    # First split: separate test set
    X_temp, X_test, y_temp, y_test = train_test_split(
        sequences, labels, test_size=test_split, stratify=labels, random_state=42
    )
    
    # Second split: separate train and validation
    val_size = val_split / (1 - test_split)
    X_train, X_val, y_train, y_val = train_test_split(
        X_temp, y_temp, test_size=val_size, stratify=y_temp, random_state=42
    )
    
    print(f"Train: {len(X_train)} samples")
    print(f"Val: {len(X_val)} samples")
    print(f"Test: {len(X_test)} samples")
    
    # Create datasets
    train_dataset = BanglaDataset(X_train, y_train)
    val_dataset = BanglaDataset(X_val, y_val)
    test_dataset = BanglaDataset(X_test, y_test)
    
    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader, (X_train, X_val, X_test, y_train, y_val, y_test)

# Create data loaders
train_loader, val_loader, test_loader, data_splits = create_data_loaders(
    sequences, 
    df['Polarity'].values,
    batch_size=64,
    val_split=0.2,
    test_split=0.1
)

# Cell 6 - CNN Model

In [None]:
class CNNSentimentClassifier(nn.Module):
    """CNN model for sentiment classification"""
    
    def __init__(self, vocab_size, embedding_dim, num_classes, 
                 embedding_matrix=None, num_filters=100, filter_sizes=[3, 4, 5], 
                 dropout=0.5, freeze_embeddings=False):
        super(CNNSentimentClassifier, self).__init__()
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        
        # Initialize with pre-trained embeddings if provided
        if embedding_matrix is not None:
            self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix).float())
            # Optionally freeze embeddings
            if freeze_embeddings:
                self.embedding.weight.requires_grad = False
        
        # Convolutional layers
        self.convs = nn.ModuleList([
            nn.Conv1d(embedding_dim, num_filters, kernel_size=k)
            for k in filter_sizes
        ])
        
        # Batch normalization
        self.batch_norms = nn.ModuleList([
            nn.BatchNorm1d(num_filters)
            for _ in filter_sizes
        ])
        
        # Dropout
        self.dropout = nn.Dropout(dropout)
        
        # Fully connected layers
        fc_input_dim = len(filter_sizes) * num_filters
        self.fc1 = nn.Linear(fc_input_dim, 128)
        self.batch_norm_fc = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, num_classes)
        
        # Activation
        self.relu = nn.ReLU()
        
    def forward(self, x):
        # x shape: (batch_size, sequence_length)
        
        # Embedding
        x = self.embedding(x)  # (batch_size, seq_len, embedding_dim)
        x = x.permute(0, 2, 1)  # (batch_size, embedding_dim, seq_len)
        
        # Apply convolutions
        conv_outputs = []
        for conv, bn in zip(self.convs, self.batch_norms):
            conv_out = conv(x)  # (batch_size, num_filters, new_seq_len)
            conv_out = bn(conv_out)  # Batch normalization
            conv_out = self.relu(conv_out)
            # Max pooling over time
            pooled = F.max_pool1d(conv_out, conv_out.size(2))  # (batch_size, num_filters, 1)
            pooled = pooled.squeeze(2)  # (batch_size, num_filters)
            conv_outputs.append(pooled)
        
        # Concatenate all conv outputs
        x = torch.cat(conv_outputs, dim=1)  # (batch_size, len(filter_sizes) * num_filters)
        
        # Apply dropout
        x = self.dropout(x)
        
        # Fully connected layers
        x = self.fc1(x)
        x = self.batch_norm_fc(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

# Initialize CNN model
cnn_model = CNNSentimentClassifier(
    vocab_size=preprocessor.vocab_size,
    embedding_dim=100,
    num_classes=num_classes,
    embedding_matrix=embedding_matrix,
    num_filters=100,
    filter_sizes=[3, 4, 5],
    dropout=0.5,
    freeze_embeddings=False
).to(device)

print(f"CNN Model initialized")
print(f"Total parameters: {sum(p.numel() for p in cnn_model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in cnn_model.parameters() if p.requires_grad):,}")

## Cell 7 - BiLSTM Model


In [None]:
class BiLSTMSentimentClassifier(nn.Module):
    """Bidirectional LSTM model for sentiment classification"""
    
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers, 
                 num_classes, embedding_matrix=None, dropout=0.5, freeze_embeddings=False):
        super(BiLSTMSentimentClassifier, self).__init__()
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        
        # Initialize with pre-trained embeddings if provided
        if embedding_matrix is not None:
            self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix).float())
            if freeze_embeddings:
                self.embedding.weight.requires_grad = False
        
        # BiLSTM layers
        self.lstm = nn.LSTM(
            embedding_dim,
            hidden_dim,
            num_layers,
            batch_first=True,
            bidirectional=True,
            dropout=dropout if num_layers > 1 else 0
        )
        
        # Attention mechanism
        self.attention = nn.Linear(hidden_dim * 2, 1)
        
        # Dropout
        self.dropout = nn.Dropout(dropout)
        
        # Fully connected layers
        # *2 for bidirectional
        self.fc1 = nn.Linear(hidden_dim * 2, 128)
        self.batch_norm = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, num_classes)
        
        # Activation
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()
        
    def attention_weights(self, lstm_output):
        """Calculate attention weights"""
        # lstm_output shape: (batch_size, seq_len, hidden_dim * 2)
        attention_scores = self.attention(lstm_output)  # (batch_size, seq_len, 1)
        attention_scores = attention_scores.squeeze(-1)  # (batch_size, seq_len)
        attention_weights = F.softmax(attention_scores, dim=1)  # (batch_size, seq_len)
        return attention_weights
        
    def forward(self, x):
        # x shape: (batch_size, sequence_length)
        
        # Get actual lengths (before padding)
        lengths = (x != 0).sum(dim=1).cpu()
        
        # Embedding
        x = self.embedding(x)  # (batch_size, seq_len, embedding_dim)
        
        # Pack padded sequence
        packed = nn.utils.rnn.pack_padded_sequence(
            x, lengths, batch_first=True, enforce_sorted=False
        )
        
        # LSTM
        packed_output, (hidden, cell) = self.lstm(packed)
        
        # Unpack sequence
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(
            packed_output, batch_first=True
        )
        # output shape: (batch_size, seq_len, hidden_dim * 2)
        
        # Apply attention
        attention_weights = self.attention_weights(output)
        # attention_weights shape: (batch_size, seq_len)
        
        # Apply attention to get weighted representation
        attended = torch.bmm(
            attention_weights.unsqueeze(1), 
            output
        ).squeeze(1)
        # attended shape: (batch_size, hidden_dim * 2)
        
        # Apply dropout
        attended = self.dropout(attended)
        
        # Fully connected layers
        x = self.fc1(attended)
        x = self.batch_norm(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

# Initialize BiLSTM model
bilstm_model = BiLSTMSentimentClassifier(
    vocab_size=preprocessor.vocab_size,
    embedding_dim=100,
    hidden_dim=128,
    num_layers=2,
    num_classes=num_classes,
    embedding_matrix=embedding_matrix,
    dropout=0.5,
    freeze_embeddings=False
).to(device)

print(f"BiLSTM Model initialized with Attention")
print(f"Total parameters: {sum(p.numel() for p in bilstm_model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in bilstm_model.parameters() if p.requires_grad):,}")

# Cell 7a - GRU Model


In [None]:
class GRUSentimentClassifier(nn.Module):
    """GRU model for sentiment classification"""
    
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers, 
                 num_classes, embedding_matrix=None, dropout=0.5, freeze_embeddings=False):
        super(GRUSentimentClassifier, self).__init__()
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        
        # Initialize with pre-trained embeddings if provided
        if embedding_matrix is not None:
            self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix).float())
            if freeze_embeddings:
                self.embedding.weight.requires_grad = False
        
        # GRU layers
        self.gru = nn.GRU(
            embedding_dim,
            hidden_dim,
            num_layers,
            batch_first=True,
            bidirectional=True,
            dropout=dropout if num_layers > 1 else 0
        )
        
        # Attention mechanism
        self.attention = nn.Linear(hidden_dim * 2, 1)
        
        # Dropout
        self.dropout = nn.Dropout(dropout)
        
        # Fully connected layers
        # *2 for bidirectional
        self.fc1 = nn.Linear(hidden_dim * 2, 128)
        self.batch_norm = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, num_classes)
        
        # Activation
        self.relu = nn.ReLU()
        
    def attention_weights(self, gru_output):
        """Calculate attention weights"""
        # gru_output shape: (batch_size, seq_len, hidden_dim * 2)
        attention_scores = self.attention(gru_output)  # (batch_size, seq_len, 1)
        attention_scores = attention_scores.squeeze(-1)  # (batch_size, seq_len)
        attention_weights = F.softmax(attention_scores, dim=1)  # (batch_size, seq_len)
        return attention_weights
        
    def forward(self, x):
        # x shape: (batch_size, sequence_length)
        
        # Get actual lengths (before padding)
        lengths = (x != 0).sum(dim=1).cpu()
        
        # Embedding
        x = self.embedding(x)  # (batch_size, seq_len, embedding_dim)
        
        # Pack padded sequence
        packed = nn.utils.rnn.pack_padded_sequence(
            x, lengths, batch_first=True, enforce_sorted=False
        )
        
        # GRU
        packed_output, hidden = self.gru(packed)
        
        # Unpack sequence
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(
            packed_output, batch_first=True
        )
        # output shape: (batch_size, seq_len, hidden_dim * 2)
        
        # Apply attention
        attention_weights = self.attention_weights(output)
        
        # Apply attention to get weighted representation
        attended = torch.bmm(
            attention_weights.unsqueeze(1), 
            output
        ).squeeze(1)
        # attended shape: (batch_size, hidden_dim * 2)
        
        # Apply dropout
        attended = self.dropout(attended)
        
        # Fully connected layers
        x = self.fc1(attended)
        x = self.batch_norm(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

# Initialize GRU model
gru_model = GRUSentimentClassifier(
    vocab_size=preprocessor.vocab_size,
    embedding_dim=100,
    hidden_dim=128,
    num_layers=2,
    num_classes=num_classes,
    embedding_matrix=embedding_matrix,
    dropout=0.5,
    freeze_embeddings=False
).to(device)

print(f"GRU Model initialized with Attention")
print(f"Total parameters: {sum(p.numel() for p in gru_model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in gru_model.parameters() if p.requires_grad):,}")

# Cell 7b - Hybrid CNN-LSTM Model


In [None]:
class CNNLSTMSentimentClassifier(nn.Module):
    """Hybrid CNN-LSTM model for sentiment classification"""
    
    def __init__(self, vocab_size, embedding_dim, num_filters, filter_sizes,
                 lstm_hidden_dim, num_lstm_layers, num_classes, 
                 embedding_matrix=None, dropout=0.5, freeze_embeddings=False):
        super(CNNLSTMSentimentClassifier, self).__init__()
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        
        # Initialize with pre-trained embeddings if provided
        if embedding_matrix is not None:
            self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix).float())
            if freeze_embeddings:
                self.embedding.weight.requires_grad = False
        
        # CNN layers
        self.convs = nn.ModuleList([
            nn.Conv1d(embedding_dim, num_filters, kernel_size=k)
            for k in filter_sizes
        ])
        
        # Batch normalization for CNN
        self.batch_norms_cnn = nn.ModuleList([
            nn.BatchNorm1d(num_filters)
            for _ in filter_sizes
        ])
        
        # LSTM layer
        # Input size is total number of filters from all conv layers
        lstm_input_size = len(filter_sizes) * num_filters
        self.lstm = nn.LSTM(
            lstm_input_size,
            lstm_hidden_dim,
            num_lstm_layers,
            batch_first=True,
            bidirectional=True,
            dropout=dropout if num_lstm_layers > 1 else 0
        )
        
        # Attention for LSTM output
        self.attention = nn.Linear(lstm_hidden_dim * 2, 1)
        
        # Dropout
        self.dropout = nn.Dropout(dropout)
        
        # Fully connected layers
        self.fc1 = nn.Linear(lstm_hidden_dim * 2, 128)
        self.batch_norm_fc = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, num_classes)
        
        # Activation
        self.relu = nn.ReLU()
        
    def forward(self, x):
        # x shape: (batch_size, sequence_length)
        batch_size = x.size(0)
        seq_len = x.size(1)
        
        # Embedding
        x_embed = self.embedding(x)  # (batch_size, seq_len, embedding_dim)
        x_cnn = x_embed.permute(0, 2, 1)  # (batch_size, embedding_dim, seq_len)
        
        # Apply CNNs
        conv_outputs = []
        for conv, bn in zip(self.convs, self.batch_norms_cnn):
            conv_out = conv(x_cnn)  # (batch_size, num_filters, new_seq_len)
            conv_out = bn(conv_out)
            conv_out = self.relu(conv_out)
            conv_outputs.append(conv_out)
        
        # Concatenate CNN outputs along filter dimension
        # Each conv_out has shape (batch_size, num_filters, conv_seq_len)
        # We need to transpose to (batch_size, conv_seq_len, num_filters)
        cnn_features = []
        min_seq_len = min(conv_out.size(2) for conv_out in conv_outputs)
        
        for conv_out in conv_outputs:
            # Trim to minimum sequence length for concatenation
            conv_out = conv_out[:, :, :min_seq_len]
            conv_out = conv_out.permute(0, 2, 1)  # (batch_size, min_seq_len, num_filters)
            cnn_features.append(conv_out)
        
        # Concatenate all CNN features
        cnn_combined = torch.cat(cnn_features, dim=2)  # (batch_size, min_seq_len, total_filters)
        
        # Pass through LSTM
        lstm_out, (hidden, cell) = self.lstm(cnn_combined)
        # lstm_out shape: (batch_size, min_seq_len, hidden_dim * 2)
        
        # Apply attention
        attention_scores = self.attention(lstm_out)  # (batch_size, min_seq_len, 1)
        attention_scores = attention_scores.squeeze(-1)  # (batch_size, min_seq_len)
        attention_weights = F.softmax(attention_scores, dim=1)  # (batch_size, min_seq_len)
        
        # Apply attention to get weighted representation
        attended = torch.bmm(
            attention_weights.unsqueeze(1), 
            lstm_out
        ).squeeze(1)
        # attended shape: (batch_size, hidden_dim * 2)
        
        # Apply dropout
        attended = self.dropout(attended)
        
        # Fully connected layers
        x = self.fc1(attended)
        x = self.batch_norm_fc(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

# Initialize CNN-LSTM model
cnn_lstm_model = CNNLSTMSentimentClassifier(
    vocab_size=preprocessor.vocab_size,
    embedding_dim=100,
    num_filters=100,
    filter_sizes=[3, 4, 5],
    lstm_hidden_dim=128,
    num_lstm_layers=2,
    num_classes=num_classes,
    embedding_matrix=embedding_matrix,
    dropout=0.5,
    freeze_embeddings=False
).to(device)

print(f"CNN-LSTM Hybrid Model initialized")
print(f"Total parameters: {sum(p.numel() for p in cnn_lstm_model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in cnn_lstm_model.parameters() if p.requires_grad):,}")

## Cell 8 - Training Functions


In [None]:
def train_epoch(model, train_loader, criterion, optimizer, device):
    """Train for one epoch"""
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for batch in tqdm(train_loader, desc="Training"):
        sequences = batch['sequence'].to(device)
        labels = batch['label'].to(device)
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(sequences)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        
        # Statistics
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    avg_loss = total_loss / len(train_loader)
    accuracy = correct / total
    
    return avg_loss, accuracy

def evaluate(model, data_loader, criterion, device):
    """Evaluate model"""
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    all_predictions = []
    all_labels = []
    all_probs = []
    
    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating"):
            sequences = batch['sequence'].to(device)
            labels = batch['label'].to(device)
            
            # Forward pass
            outputs = model(sequences)
            loss = criterion(outputs, labels)
            
            # Get probabilities
            probs = F.softmax(outputs, dim=1)
            
            # Statistics
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
    
    avg_loss = total_loss / len(data_loader)
    accuracy = correct / total
    f1 = f1_score(all_labels, all_predictions, average='weighted')
    
    return avg_loss, accuracy, f1, all_predictions, all_labels, all_probs

def train_model(model, train_loader, val_loader, num_epochs=20, learning_rate=0.001, model_name="model"):
    """Complete training loop"""
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5, verbose=True)
    
    # Training history
    history = {
        'train_loss': [], 'train_acc': [],
        'val_loss': [], 'val_acc': [], 'val_f1': []
    }
    
    best_val_f1 = 0
    best_model_state = None
    patience_counter = 0
    early_stopping_patience = 5
    
    print(f"\nTraining {model_name}...")
    print("="*70)
    
    for epoch in range(num_epochs):
        # Train
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        
        # Validate
        val_loss, val_acc, val_f1, _, _, _ = evaluate(model, val_loader, criterion, device)
        
        # Update scheduler
        scheduler.step(val_loss)
        
        # Save history
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['val_f1'].append(val_f1)
        
        # Save best model
        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            best_model_state = model.state_dict().copy()
            patience_counter = 0
        else:
            patience_counter += 1
        
        # Print progress
        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")
        
        if patience_counter == 0:
            print("✓ New best model!")
        
        print("-"*50)
        
        # Early stopping
        if patience_counter >= early_stopping_patience:
            print(f"Early stopping triggered after {epoch+1} epochs")
            break
    
    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
        print(f"\nLoaded best model with Val F1: {best_val_f1:.4f}")
    
    return model, history

## Cell 9 - Train CNN Model


In [None]:
# Train CNN model
cnn_model, cnn_history = train_model(
    cnn_model,
    train_loader,
    val_loader,
    num_epochs=25,
    learning_rate=0.001,
    model_name="CNN"
)

# Evaluate on test set
print("\nEvaluating CNN on test set...")
criterion = nn.CrossEntropyLoss()
test_loss, test_acc, test_f1, test_preds, test_labels, test_probs = evaluate(
    cnn_model, test_loader, criterion, device
)

print(f"\nCNN Test Results:")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test F1 Score: {test_f1:.4f}")

# Save CNN results
cnn_results = {
    'history': cnn_history,
    'test_metrics': {
        'loss': test_loss,
        'accuracy': test_acc,
        'f1': test_f1
    },
    'predictions': test_preds,
    'labels': test_labels,
    'probabilities': test_probs
}

## Cell 10 - Train BiLSTM Model

In [None]:
# Train BiLSTM model
bilstm_model, bilstm_history = train_model(
    bilstm_model,
    train_loader,
    val_loader,
    num_epochs=25,
    learning_rate=0.001,
    model_name="BiLSTM"
)

# Evaluate on test set
print("\nEvaluating BiLSTM on test set...")
test_loss, test_acc, test_f1, test_preds, test_labels, test_probs = evaluate(
    bilstm_model, test_loader, criterion, device
)

print(f"\nBiLSTM Test Results:")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test F1 Score: {test_f1:.4f}")

# Save BiLSTM results
bilstm_results = {
    'history': bilstm_history,
    'test_metrics': {
        'loss': test_loss,
        'accuracy': test_acc,
        'f1': test_f1
    },
    'predictions': test_preds,
    'labels': test_labels,
    'probabilities': test_probs
}

# Cell 10a - Train GRU Model


In [None]:
# Train GRU model
gru_model, gru_history = train_model(
    gru_model,
    train_loader,
    val_loader,
    num_epochs=25,
    learning_rate=0.001,
    model_name="GRU"
)

# Evaluate on test set
print("\nEvaluating GRU on test set...")
criterion = nn.CrossEntropyLoss()
test_loss, test_acc, test_f1, test_preds, test_labels, test_probs = evaluate(
    gru_model, test_loader, criterion, device
)

print(f"\nGRU Test Results:")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test F1 Score: {test_f1:.4f}")

# Save GRU results
gru_results = {
    'history': gru_history,
    'test_metrics': {
        'loss': test_loss,
        'accuracy': test_acc,
        'f1': test_f1
    },
    'predictions': test_preds,
    'labels': test_labels,
    'probabilities': test_probs
}

# Cell 10b - Train CNN-LSTM Model


In [None]:
# Train CNN-LSTM model
cnn_lstm_model, cnn_lstm_history = train_model(
    cnn_lstm_model,
    train_loader,
    val_loader,
    num_epochs=25,
    learning_rate=0.001,
    model_name="CNN-LSTM"
)

# Evaluate on test set
print("\nEvaluating CNN-LSTM on test set...")
test_loss, test_acc, test_f1, test_preds, test_labels, test_probs = evaluate(
    cnn_lstm_model, test_loader, criterion, device
)

print(f"\nCNN-LSTM Test Results:")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test F1 Score: {test_f1:.4f}")

# Save CNN-LSTM results
cnn_lstm_results = {
    'history': cnn_lstm_history,
    'test_metrics': {
        'loss': test_loss,
        'accuracy': test_acc,
        'f1': test_f1
    },
    'predictions': test_preds,
    'labels': test_labels,
    'probabilities': test_probs
}

# Cell 11 - Visualize Training History


In [None]:
def plot_training_history(histories, model_names):
    """Plot training history for multiple models"""
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Colors for different models
    colors = ['blue', 'red', 'green', 'orange']
    
    # Plot training loss
    for i, (history, name) in enumerate(zip(histories, model_names)):
        epochs = range(1, len(history['train_loss']) + 1)
        axes[0, 0].plot(epochs, history['train_loss'], 
                       color=colors[i], linestyle='-', label=f'{name} Train', alpha=0.8)
        axes[0, 0].plot(epochs, history['val_loss'], 
                       color=colors[i], linestyle='--', label=f'{name} Val', alpha=0.8)
    
    axes[0, 0].set_xlabel('Epochs')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].set_title('Training and Validation Loss')
    axes[0, 0].legend(loc='upper right', fontsize='small')
    axes[0, 0].grid(True, alpha=0.3)
    
    # Plot training accuracy
    for i, (history, name) in enumerate(zip(histories, model_names)):
        epochs = range(1, len(history['train_acc']) + 1)
        axes[0, 1].plot(epochs, history['train_acc'], 
                       color=colors[i], linestyle='-', label=f'{name} Train', alpha=0.8)
        axes[0, 1].plot(epochs, history['val_acc'], 
                       color=colors[i], linestyle='--', label=f'{name} Val', alpha=0.8)
    
    axes[0, 1].set_xlabel('Epochs')
    axes[0, 1].set_ylabel('Accuracy')
    axes[0, 1].set_title('Training and Validation Accuracy')
    axes[0, 1].legend(loc='lower right', fontsize='small')
    axes[0, 1].grid(True, alpha=0.3)
    
    # Plot validation F1 score
    for i, (history, name) in enumerate(zip(histories, model_names)):
        epochs = range(1, len(history['val_f1']) + 1)
        axes[1, 0].plot(epochs, history['val_f1'], 
                       color=colors[i], marker='o', label=name, markersize=4, alpha=0.8)
    
    axes[1, 0].set_xlabel('Epochs')
    axes[1, 0].set_ylabel('F1 Score')
    axes[1, 0].set_title('Validation F1 Score')
    axes[1, 0].legend(loc='lower right')
    axes[1, 0].grid(True, alpha=0.3)
    
    # Plot final metrics comparison
    model_names_list = list(model_names)
    metrics = ['Accuracy', 'F1 Score']
    x = np.arange(len(model_names_list))
    width = 0.35
    
    # Get final validation metrics
    val_accs = [hist['val_acc'][-1] for hist in histories]
    val_f1s = [hist['val_f1'][-1] for hist in histories]
    
    bars1 = axes[1, 1].bar(x - width/2, val_accs, width, label='Accuracy', color='skyblue')
    bars2 = axes[1, 1].bar(x + width/2, val_f1s, width, label='F1 Score', color='lightcoral')
    
    axes[1, 1].set_xlabel('Model')
    axes[1, 1].set_ylabel('Score')
    axes[1, 1].set_title('Final Validation Metrics Comparison')
    axes[1, 1].set_xticks(x)
    axes[1, 1].set_xticklabels(model_names_list, rotation=45, ha='right')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3, axis='y')
    
    # Add value labels on bars
    for i, (acc, f1) in enumerate(zip(val_accs, val_f1s)):
        axes[1, 1].text(i - width/2, acc + 0.01, f'{acc:.3f}', 
                       ha='center', va='bottom', fontsize=8)
        axes[1, 1].text(i + width/2, f1 + 0.01, f'{f1:.3f}', 
                       ha='center', va='bottom', fontsize=8)
    
    plt.tight_layout()
    plt.savefig('plots/dl_models_training_history_all.png', dpi=300, bbox_inches='tight')
    plt.show()

# Plot training histories for all models
plot_training_history(
    [cnn_history, bilstm_history, gru_history, cnn_lstm_history],
    ['CNN', 'BiLSTM', 'GRU', 'CNN-LSTM']
)

## Cell 12 - Compare Model Performance


In [None]:
def compare_model_performance(results_dict, model_names, num_classes, class_names):
    """Compare performance of multiple models"""
    
    # Create figure with appropriate size for 4 models
    fig = plt.figure(figsize=(20, 12))
    
    # 1. Test Metrics Comparison (Top left)
    ax1 = plt.subplot(2, 3, 1)
    metrics = ['Loss', 'Accuracy', 'F1 Score']
    model_list = list(model_names)
    x = np.arange(len(model_list))
    width = 0.25
    
    # Extract metrics for each model
    for i, metric in enumerate(metrics):
        values = []
        for model in model_list:
            if metric == 'Loss':
                values.append(results_dict[model]['test_metrics']['loss'])
            elif metric == 'Accuracy':
                values.append(results_dict[model]['test_metrics']['accuracy'])
            else:  # F1 Score
                values.append(results_dict[model]['test_metrics']['f1'])
        
        offset = (i - 1) * width
        bars = ax1.bar(x + offset, values, width, label=metric)
        
        # Add value labels
        for j, v in enumerate(values):
            ax1.text(j + offset, v + 0.01, f'{v:.3f}', 
                    ha='center', va='bottom', fontsize=8)
    
    ax1.set_xlabel('Model')
    ax1.set_ylabel('Score')
    ax1.set_title('Test Metrics Comparison')
    ax1.set_xticks(x)
    ax1.set_xticklabels(model_list, rotation=45, ha='right')
    ax1.legend()
    ax1.grid(True, alpha=0.3, axis='y')
    
    # 2-5. Confusion Matrices for each model
    positions = [(0, 1), (0, 2), (1, 0), (1, 1)]
    for idx, (model_name, pos) in enumerate(zip(model_list, positions)):
        ax = plt.subplot(2, 3, pos[0] * 3 + pos[1] + 1)
        cm = confusion_matrix(results_dict[model_name]['labels'], 
                             results_dict[model_name]['predictions'])
        labels = [class_names[i] for i in range(num_classes)]
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax,
                    xticklabels=labels, yticklabels=labels)
        ax.set_title(f'{model_name} Confusion Matrix')
        ax.set_ylabel('True Label')
        ax.set_xlabel('Predicted Label')
    
    # 6. Per-class F1 Scores Comparison (Bottom right)
    ax6 = plt.subplot(2, 3, 6)
    x_classes = np.arange(num_classes)
    width = 0.2
    
    for i, model_name in enumerate(model_list):
        f1_per_class = f1_score(results_dict[model_name]['labels'], 
                               results_dict[model_name]['predictions'], 
                               average=None)
        offset = (i - 1.5) * width
        bars = ax6.bar(x_classes + offset, f1_per_class, width, label=model_name)
        
        # Add value labels
        for j, v in enumerate(f1_per_class):
            ax6.text(j + offset, v + 0.01, f'{v:.2f}', 
                    ha='center', va='bottom', fontsize=7, rotation=90)
    
    ax6.set_xlabel('Class')
    ax6.set_ylabel('F1 Score')
    ax6.set_title('Per-class F1 Scores Comparison')
    ax6.set_xticks(x_classes)
    ax6.set_xticklabels([class_names[i] for i in range(num_classes)])
    ax6.legend()
    ax6.grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.savefig('plots/all_models_comparison.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    # Print comprehensive summary
    print("\nCOMPREHENSIVE MODEL COMPARISON SUMMARY")
    print("="*80)
    print(f"{'Model':<12} {'Accuracy':<10} {'F1 Score':<10} {'Loss':<10} {'Parameters':<15}")
    print("-"*80)
    
    for model_name in model_list:
        metrics = results_dict[model_name]['test_metrics']
        # Get parameter count (you'll need to pass models_dict to this function)
        print(f"{model_name:<12} {metrics['accuracy']:<10.4f} {metrics['f1']:<10.4f} {metrics['loss']:<10.4f}")
    
    print("\nPer-class F1 Scores:")
    print("-"*80)
    print(f"{'Model':<12}", end='')
    for i in range(num_classes):
        print(f"{class_names[i]:<15}", end='')
    print()
    print("-"*80)
    
    for model_name in model_list:
        f1_per_class = f1_score(results_dict[model_name]['labels'], 
                               results_dict[model_name]['predictions'], 
                               average=None)
        print(f"{model_name:<12}", end='')
        for f1 in f1_per_class:
            print(f"{f1:<15.4f}", end='')
        print()

# Compare all model performance
results_dict = {
    'CNN': cnn_results,
    'BiLSTM': bilstm_results,
    'GRU': gru_results,
    'CNN-LSTM': cnn_lstm_results
}

compare_model_performance(results_dict, ['CNN', 'BiLSTM', 'GRU', 'CNN-LSTM'], 
                         num_classes, class_names)

## Cell 13 - Classification Reports


In [None]:
# Print detailed classification reports for all models
print("="*80)
print("DETAILED CLASSIFICATION REPORTS - ALL MODELS")
print("="*80)

models_to_report = [
    ('CNN', cnn_results),
    ('BiLSTM', bilstm_results),
    ('GRU', gru_results),
    ('CNN-LSTM', cnn_lstm_results)
]

for model_name, results in models_to_report:
    print(f"\n{model_name} Model:")
    print("-"*60)
    print(classification_report(
        results['labels'], 
        results['predictions'],
        target_names=[class_names[i] for i in range(num_classes)],
        digits=4
    ))

## Cell 14 - Error Analysis


In [None]:
def analyze_errors(model, test_loader, model_name, num_samples=10):
    """Analyze model errors"""
    
    model.eval()
    errors = []
    
    with torch.no_grad():
        for batch in test_loader:
            sequences = batch['sequence'].to(device)
            labels = batch['label'].to(device)
            
            outputs = model(sequences)
            probs = F.softmax(outputs, dim=1)
            _, predicted = torch.max(outputs.data, 1)
            
            # Find errors
            error_mask = predicted != labels
            if error_mask.any():
                error_indices = error_mask.nonzero(as_tuple=True)[0]
                
                for idx in error_indices:
                    # Convert sequence back to text
                    seq = sequences[idx].cpu().numpy()
                    tokens = [preprocessor.idx2word.get(idx, '<UNK>') 
                             for idx in seq if idx not in [0, 2, 3]]  # Exclude special tokens
                    text = ' '.join(tokens)
                    
                    errors.append({
                        'text': text,
                        'true_label': class_names[labels[idx].item()],
                        'predicted_label': class_names[predicted[idx].item()],
                        'confidence': probs[idx].max().item(),
                        'probs': probs[idx].cpu().numpy()
                    })
    
    print(f"\n{model_name} - Error Analysis")
    print("="*70)
    print(f"Total errors: {len(errors)}")
    print(f"\nSample misclassified texts:")
    print("-"*70)
    
    # Sort by confidence (high confidence errors first)
    errors_sorted = sorted(errors, key=lambda x: x['confidence'], reverse=True)
    
    for i, error in enumerate(errors_sorted[:num_samples]):
        print(f"\nError {i+1}:")
        print(f"Text: {error['text'][:100]}...")
        print(f"True label: {error['true_label']}")
        print(f"Predicted: {error['predicted_label']}")
        print(f"Confidence: {error['confidence']:.4f}")
        
        # Show probabilities for all classes
        prob_str = ", ".join([f"{class_names[j]}: {error['probs'][j]:.3f}" 
                             for j in range(num_classes)])
        print(f"All probabilities: {prob_str}")

# Analyze errors for both models
analyze_errors(cnn_model, test_loader, "CNN", num_samples=5)
analyze_errors(bilstm_model, test_loader, "BiLSTM", num_samples=5)

## Cell 15 - Save Models and Results


In [None]:
# Save all models and results
def save_all_dl_models(models_dict, results_dict, preprocessor):
    """Save all deep learning models and results"""
    
    # Save models
    for model_name, model in models_dict.items():
        model_path = f'models/{model_name.lower()}_sentiment_model.pth'
        torch.save({
            'model_state_dict': model.state_dict(),
            'model_config': {
                'vocab_size': preprocessor.vocab_size,
                'embedding_dim': 100,
                'num_classes': num_classes,
                'model_type': model_name
            },
            'test_metrics': results_dict[model_name]['test_metrics']
        }, model_path)
        print(f"Saved {model_name} model to {model_path}")
    
    # Save preprocessor
    import pickle
    with open('models/preprocessor.pkl', 'wb') as f:
        pickle.dump(preprocessor, f)
    print("Saved preprocessor")
    
    # Save embedding matrix
    np.save('embeddings/embedding_matrix.npy', embedding_matrix)
    print("Saved embedding matrix")
    
    # Save comprehensive results summary
    summary = {
        'models': list(models_dict.keys()),
        'results': {
            name: {
                'test_accuracy': results['test_metrics']['accuracy'],
                'test_f1': results['test_metrics']['f1'],
                'test_loss': results['test_metrics']['loss'],
                'training_epochs': len(results['history']['train_loss'])
            }
            for name, results in results_dict.items()
        },
        'dataset_info': {
            'num_samples': len(df),
            'num_classes': num_classes,
            'vocab_size': preprocessor.vocab_size,
            'max_seq_length': max_seq_length,
            'embedding_dim': 100
        },
        'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    }
    
    with open('results/all_dl_models_summary.json', 'w') as f:
        json.dump(summary, f, indent=4)
    print("\nSaved results summary to results/all_dl_models_summary.json")
    
    return summary

# Save all models
models_dict = {
    'CNN': cnn_model,
    'BiLSTM': bilstm_model,
    'GRU': gru_model,
    'CNN-LSTM': cnn_lstm_model
}

results_dict = {
    'CNN': cnn_results,
    'BiLSTM': bilstm_results,
    'GRU': gru_results,
    'CNN-LSTM': cnn_lstm_results
}

summary = save_all_dl_models(models_dict, results_dict, preprocessor)

# Print final summary
print("\n" + "="*80)
print("FINAL SUMMARY - ALL DEEP LEARNING MODELS")
print("="*80)
print(f"Dataset: {summary['dataset_info']['num_samples']} samples, {summary['dataset_info']['num_classes']} classes")
print(f"Vocabulary size: {summary['dataset_info']['vocab_size']}")
print(f"Max sequence length: {summary['dataset_info']['max_seq_length']}")
print(f"Embedding dimension: {summary['dataset_info']['embedding_dim']}")

print(f"\nModel Performance Summary:")
print("-"*60)
print(f"{'Model':<12} {'Accuracy':<12} {'F1 Score':<12} {'Loss':<12}")
print("-"*60)

# Sort models by accuracy
sorted_models = sorted(summary['results'].items(), 
                      key=lambda x: x[1]['test_accuracy'], 
                      reverse=True)

for model, metrics in sorted_models:
    print(f"{model:<12} {metrics['test_accuracy']:<12.4f} "
          f"{metrics['test_f1']:<12.4f} {metrics['test_loss']:<12.4f}")

## Cell 16 - Inference Example


In [None]:
def predict_sentiment(text, model, preprocessor, model_name):
    """Predict sentiment for a given text"""
    
    # Preprocess text
    sequence = preprocessor.text_to_indices(text, max_length=max_seq_length)
    sequence_tensor = torch.tensor([sequence], dtype=torch.long).to(device)
    
    # Predict
    model.eval()
    with torch.no_grad():
        outputs = model(sequence_tensor)
        probs = F.softmax(outputs, dim=1)
        _, predicted = torch.max(outputs, 1)
    
    # Get results
    predicted_class = predicted.item()
    confidence = probs[0][predicted_class].item()
    
    return {
        'text': text,
        'sentiment': class_names[predicted_class],
        'confidence': confidence,
        'probabilities': {class_names[i]: probs[0][i].item() for i in range(num_classes)}
    }

# Test examples
test_texts = [
    "এই পণ্যটি খুবই ভালো, আমি খুব সন্তুষ্ট।",
    "সার্ভিস একদম বাজে, কখনো কিনবেন না।",
    "মোটামুটি ঠিক আছে, দাম অনুযায়ী ভালো।",
    "অসাধারণ! আমার খুব পছন্দ হয়েছে।",
    "খুবই হতাশাজনক অভিজ্ঞতা।"
]

print("SENTIMENT PREDICTIONS")
print("="*70)

for model_name, model in models_dict.items():
    print(f"\n{model_name} Predictions:")
    print("-"*50)
    
    for text in test_texts[:3]:  # Show first 3 examples
        result = predict_sentiment(text, model, preprocessor, model_name)
        print(f"\nText: {result['text']}")
        print(f"Sentiment: {result['sentiment']} (Confidence: {result['confidence']:.4f})")
        print(f"Probabilities: {', '.join([f'{k}: {v:.4f}' for k, v in result['probabilities'].items()])}")

# Performance comparison visualization
print("\n\nPERFORMANCE COMPARISON")
print("="*70)
print(f"{'Model':<10} {'Accuracy':<10} {'F1 Score':<10} {'Parameters':<15}")
print("-"*50)
for model_name, model in models_dict.items():
    params = sum(p.numel() for p in model.parameters())
    acc = results_dict[model_name]['test_metrics']['accuracy']
    f1 = results_dict[model_name]['test_metrics']['f1']
    print(f"{model_name:<10} {acc:<10.4f} {f1:<10.4f} {params:<15,}")