In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install scikit-learn pandas imbalanced-learn nltk sentencepiece transformers torch gensim -q


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.9/27.9 MB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
#import libraries
import pandas as pd
import json
import re
import string
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
from imblearn.over_sampling import SMOTE
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from gensim.models import Word2Vec
from transformers import (
    AutoTokenizer,
    BertTokenizer,
    BertModel,
    BertForSequenceClassification,
    get_linear_schedule_with_warmup
)
from torch.optim import AdamW   # AdamW Optimizer
from tqdm import tqdm
import random
import warnings
warnings.filterwarnings('ignore')



In [None]:
#for reproducibility

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

set_seed(42)
print(" Random seed set to 42 for reproducibility")

# Download NLTK resources
try:
    stop_words = set(stopwords.words('english'))
except LookupError:
    nltk.download('stopwords')
    nltk.download('punkt')
    nltk.download('punkt_tab')
    stop_words = set(stopwords.words('english'))


 Random seed set to 42 for reproducibility


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


In [None]:
#PART 1: Data Loading and Preprocessing

# Load training data
print("\nLoading dataset...")
with open('/content/drive/MyDrive/Colab Notebooks/AIDM /train.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
df = pd.DataFrame(data)

print(f" Dataset: {len(df)} samples")
print(f" Negative: {(df['sentiments']==0).sum()}, Positive: {(df['sentiments']==1).sum()}")

# Cleaning functions
def clean_text_traditional(text):
    """Aggressive cleaning for TF-IDF/Word2Vec to removes stopwords, punctuation, numbers"""
    text = text.lower()
    text = re.sub(f'[{re.escape(string.punctuation)}]', '', text)
    text = re.sub(r'\d+', '', text)
    words = text.split()
    filtered_words = [word for word in words if word not in stop_words]
    return ' '.join(filtered_words)

def clean_text_minimal(text):
    """Minimal cleaning for BERT to preserves punctuation and context"""
    return str(text).strip()

print("\nApplying text cleaning...")
df['clean_reviews'] = df['reviews'].apply(clean_text_traditional)
df['clean_minimal'] = df['reviews'].apply(clean_text_minimal)
print(" Created 2 versions: traditional (aggressive) & minimal (for BERT)")

# Train/validation split
print("\nSplitting data...")
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['sentiments'])
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)
print(f" Training: {len(train_df)}, Validation: {len(val_df)}")


Loading dataset...
 Dataset: 7401 samples
 Negative: 1082, Positive: 6319

Applying text cleaning...
 Created 2 versions: traditional (aggressive) & minimal (for BERT)

Splitting data...
 Training: 5920, Validation: 1481


In [None]:
#Part2: Feature Engineering and Exploration


# 2.1: TF-IDF + SMOTE (Baseline)

print("\n[2.1] TF-IDF + SMOTE")


vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(train_df['clean_reviews'])
y_train = train_df['sentiments']

print(f"Before SMOTE: {X_train_tfidf.shape}")

# Apply SMOTE for class balance
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train_tfidf, y_train)

print(f"After SMOTE: {X_train_smote.shape}")
print(f"Balanced classes: Neg={(y_train_smote==0).sum()}, Pos={(y_train_smote==1).sum()}")

# Transform validation data
X_val_tfidf = vectorizer.transform(val_df['clean_reviews'])
y_val = val_df['sentiments']

# Train baseline model
print("\nTraining Logistic Regression on TF-IDF...")
lr_tfidf = LogisticRegression(max_iter=1000, random_state=42)
lr_tfidf.fit(X_train_smote, y_train_smote)

val_pred_tfidf = lr_tfidf.predict(X_val_tfidf)
val_acc_tfidf = accuracy_score(y_val, val_pred_tfidf)
val_f1_tfidf = f1_score(y_val, val_pred_tfidf)

print(f" TF-IDF + LR: Accuracy={val_acc_tfidf:.4f}, F1={val_f1_tfidf:.4f}")


[2.1] TF-IDF + SMOTE
Before SMOTE: (5920, 5000)
After SMOTE: (10110, 5000)
Balanced classes: Neg=5055, Pos=5055

Training Logistic Regression on TF-IDF...
 TF-IDF + LR: Accuracy=0.8947, F1=0.9368


In [None]:

# 2.2: Word2Vec Embeddings

print("\n[2.2] Word2Vec Embeddings")


print("Tokenizing with NLTK...")
tokenized_items = [word_tokenize(train_df.loc[idx, 'clean_reviews']) for idx in range(len(train_df))]
print(f" Tokenized {len(tokenized_items)} samples")

print("Training Word2Vec (500 epochs)...")
w2v = Word2Vec(tokenized_items, vector_size=100, window=5, min_count=1, workers=4)
w2v.train(tokenized_items, total_examples=w2v.corpus_count, epochs=500)

print(" Word2Vec complete - Semantic similarities:")
print(f"  good ↔ bad: {w2v.wv.similarity('good', 'bad'):.3f}")
try:
    print(f"  excellent ↔ terrible: {w2v.wv.similarity('excellent', 'terrible'):.3f}")
    print(f"  love ↔ hate: {w2v.wv.similarity('love', 'hate'):.3f}")
except KeyError:
    print("  (some words not in vocabulary)")


[2.2] Word2Vec Embeddings
Tokenizing with NLTK...
 Tokenized 5920 samples
Training Word2Vec (500 epochs)...




 Word2Vec complete - Semantic similarities:
  good ↔ bad: 0.291
  excellent ↔ terrible: 0.018
  love ↔ hate: 0.070


In [None]:

# 2.3: XLNet Tokenization

print("\n[2.3] XLNet Tokenization (Exploration)")

xlnet_tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
sample_xlnet = xlnet_tokenizer(train_df.iloc[0]['clean_reviews'])
print(f" Sample XLNet tokens: {sample_xlnet['input_ids'][:20]}...")



[2.3] XLNet Tokenization (Exploration)


config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

 Sample XLNet tokens: [2712, 9888, 3251, 14616, 115, 359, 1081, 20848, 18649, 9355, 1859, 9888, 9888, 15974, 23, 63, 88, 13468, 117, 3617]...


In [None]:

# 2.4: Frozen BERT Features

print("\n[2.4] Frozen BERT Feature Extraction")


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

# Load BERT for feature extraction (frozen weights)
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_frozen = BertModel.from_pretrained('bert-base-uncased').to(device)
bert_frozen.eval()

class BERTDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer.encode_plus(
            str(self.texts[idx]),
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(self.labels[idx], dtype=torch.long)
        }

def extract_bert_features(dataloader, model, device):
    """Extract [CLS] embeddings from frozen BERT"""
    features, labels = [], []
    model.eval()
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Extracting features"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            cls_embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()

            features.append(cls_embeddings)
            labels.append(batch['labels'].numpy())

    return np.vstack(features), np.concatenate(labels)

# Create datasets
bert_train_dataset = BERTDataset(train_df['clean_minimal'].values, train_df['sentiments'].values, bert_tokenizer)
bert_val_dataset = BERTDataset(val_df['clean_minimal'].values, val_df['sentiments'].values, bert_tokenizer)

bert_train_loader = DataLoader(bert_train_dataset, batch_size=32, shuffle=False)
bert_val_loader = DataLoader(bert_val_dataset, batch_size=32, shuffle=False)

# Extract features
print("Extracting frozen BERT features...")
X_train_bert_frozen, y_train_bert = extract_bert_features(bert_train_loader, bert_frozen, device)
X_val_bert_frozen, y_val_bert = extract_bert_features(bert_val_loader, bert_frozen, device)
print(f" Features shape: {X_train_bert_frozen.shape}")

# Train classifier on frozen features
print("Training Logistic Regression on frozen BERT features...")
lr_bert_frozen = LogisticRegression(max_iter=1000, random_state=42)
lr_bert_frozen.fit(X_train_bert_frozen, y_train_bert)

val_pred_frozen = lr_bert_frozen.predict(X_val_bert_frozen)
val_acc_frozen = accuracy_score(y_val_bert, val_pred_frozen)
val_f1_frozen = f1_score(y_val_bert, val_pred_frozen)

print(f" Frozen BERT + LR: Accuracy={val_acc_frozen:.4f}, F1={val_f1_frozen:.4f}")


[2.4] Frozen BERT Feature Extraction
Device: cuda


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Extracting frozen BERT features...


Extracting features: 100%|██████████| 185/185 [00:41<00:00,  4.41it/s]
Extracting features: 100%|██████████| 47/47 [00:10<00:00,  4.46it/s]


 Features shape: (5920, 768)
Training Logistic Regression on frozen BERT features...
 Frozen BERT + LR: Accuracy=0.9196, F1=0.9533


In [None]:

# PART 3: Fine Tuned Bert Neural Network


# Hyperparameters
BATCH_SIZE = 16
MAX_LENGTH = 128
EPOCHS = 4
LEARNING_RATE = 2e-5
WEIGHT_DECAY = 0.01

print(f"\nConfiguration:")
print(f"  Batch Size: {BATCH_SIZE}")
print(f"  Max Length: {MAX_LENGTH}")
print(f"  Epochs: {EPOCHS}")
print(f"  Learning Rate: {LEARNING_RATE}")
print(f"  Optimizer: AdamW")
print(f"  Loss: CrossEntropyLoss (built-in)")

# Load BERT for fine-tuning (all layers trainable)
print("\nLoading BERT for fine-tuning...")
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
model.to(device)


# Create dataloaders (with shuffling for training)
train_dataset_finetune = BERTDataset(train_df['clean_minimal'].values, train_df['sentiments'].values, bert_tokenizer, MAX_LENGTH)
val_dataset_finetune = BERTDataset(val_df['clean_minimal'].values, val_df['sentiments'].values, bert_tokenizer, MAX_LENGTH)

train_loader_finetune = DataLoader(train_dataset_finetune, batch_size=BATCH_SIZE, shuffle=True)
val_loader_finetune = DataLoader(val_dataset_finetune, batch_size=BATCH_SIZE, shuffle=False)

# Optimizer and scheduler setup
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, eps=1e-8, weight_decay=WEIGHT_DECAY)
total_steps = len(train_loader_finetune) * EPOCHS
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

# Training functions
def train_epoch(model, data_loader, optimizer, scheduler, device):
    model.train()
    losses = []
    correct_predictions = 0

    for batch in tqdm(data_loader, desc="Training"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()

        losses.append(loss.item())
        _, preds = torch.max(outputs.logits, dim=1)
        correct_predictions += torch.sum(preds == labels)

    accuracy = correct_predictions.double() / len(data_loader.dataset)
    return accuracy, np.mean(losses)

def eval_model(model, data_loader, device):
    model.eval()
    losses = []
    predictions_list = []
    labels_list = []

    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)

            losses.append(outputs.loss.item())
            _, preds = torch.max(outputs.logits, dim=1)

            predictions_list.extend(preds.cpu().numpy())
            labels_list.extend(labels.cpu().numpy())

    accuracy = accuracy_score(labels_list, predictions_list)
    return accuracy, np.mean(losses), predictions_list, labels_list

# Training
print("\nStarting training...")
print("-" * 80)

best_accuracy = 0
history = {'train_acc': [], 'train_loss': [], 'val_acc': [], 'val_loss': []}

for epoch in range(EPOCHS):
    print(f'\nEpoch {epoch + 1}/{EPOCHS}')

    train_acc, train_loss = train_epoch(model, train_loader_finetune, optimizer, scheduler, device)
    print(f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}')

    val_acc, val_loss, val_preds, val_labels = eval_model(model, val_loader_finetune, device)
    print(f'Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}')

    history['train_acc'].append(train_acc.item() if torch.is_tensor(train_acc) else train_acc)
    history['train_loss'].append(train_loss)
    history['val_acc'].append(val_acc)
    history['val_loss'].append(val_loss)

    if val_acc > best_accuracy:
        torch.save(model.state_dict(), 'best_finetuned_bert.pt')
        best_accuracy = val_acc
        print(f' Best model saved!')


print("Training complete!")


Configuration:
  Batch Size: 16
  Max Length: 128
  Epochs: 4
  Learning Rate: 2e-05
  Optimizer: AdamW
  Loss: CrossEntropyLoss (built-in)

Loading BERT for fine-tuning...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Starting training...
--------------------------------------------------------------------------------

Epoch 1/4


Training: 100%|██████████| 370/370 [02:20<00:00,  2.64it/s]


Train Loss: 0.2275 | Train Acc: 0.9155


Evaluating: 100%|██████████| 93/93 [00:11<00:00,  8.00it/s]


Val Loss: 0.1529 | Val Acc: 0.9548
 Best model saved!

Epoch 2/4


Training: 100%|██████████| 370/370 [02:17<00:00,  2.69it/s]


Train Loss: 0.0930 | Train Acc: 0.9709


Evaluating: 100%|██████████| 93/93 [00:11<00:00,  8.08it/s]


Val Loss: 0.1715 | Val Acc: 0.9541

Epoch 3/4


Training: 100%|██████████| 370/370 [02:17<00:00,  2.69it/s]


Train Loss: 0.0367 | Train Acc: 0.9909


Evaluating: 100%|██████████| 93/93 [00:11<00:00,  7.97it/s]


Val Loss: 0.2399 | Val Acc: 0.9534

Epoch 4/4


Training: 100%|██████████| 370/370 [02:17<00:00,  2.69it/s]


Train Loss: 0.0132 | Train Acc: 0.9970


Evaluating: 100%|██████████| 93/93 [00:11<00:00,  8.07it/s]

Val Loss: 0.2652 | Val Acc: 0.9514
Training complete!





In [None]:
#Part 4: Comparitive Analysis

# Load best model and evaluate
model.load_state_dict(torch.load('best_finetuned_bert.pt'))
final_acc, final_loss, final_preds, final_labels = eval_model(model, val_loader_finetune, device)
final_f1 = f1_score(final_labels, final_preds)

# Comparison table
results = {
    'TF-IDF + SMOTE + LR': {'accuracy': val_acc_tfidf, 'f1_score': val_f1_tfidf, 'type': 'Baseline'},
    'Frozen BERT + LR': {'accuracy': val_acc_frozen, 'f1_score': val_f1_frozen, 'type': 'Intermediate'},
    'Fine-tuned BERT (NN)': {'accuracy': final_acc, 'f1_score': final_f1, 'type': 'Final Model'}
}

print("\n" + "-"*80)
print("MODEL COMPARISON")
print("-"*80)
print(f"{'Model':<30} {'Type':<15} {'Accuracy':<12} {'F1 Score':<12}")
print("-"*80)
for model_name, metrics in results.items():
    print(f"{model_name:<30} {metrics['type']:<15} {metrics['accuracy']:<12.4f} {metrics['f1_score']:<12.4f}")
print("-"*80)

print(f"\n Best Model: Fine-tuned BERT")
print(f"  Accuracy: {final_acc:.4f}")
print(f"  F1 Score: {final_f1:.4f}")

# Detailed evaluation
print("\n" + "-"*80)
print("CLASSIFICATION REPORT (Fine-tuned BERT)")
print("-"*80)
print(classification_report(final_labels, final_preds, target_names=['Negative', 'Positive']))

print("\nConfusion Matrix:")
cm = confusion_matrix(final_labels, final_preds)
print(cm)
print(f"\n[[TN={cm[0,0]}, FP={cm[0,1]}],")
print(f" [FN={cm[1,0]}, TP={cm[1,1]}]]")


Evaluating: 100%|██████████| 93/93 [00:11<00:00,  8.15it/s]


--------------------------------------------------------------------------------
MODEL COMPARISON
--------------------------------------------------------------------------------
Model                          Type            Accuracy     F1 Score    
--------------------------------------------------------------------------------
TF-IDF + SMOTE + LR            Baseline        0.8947       0.9368      
Frozen BERT + LR               Intermediate    0.9196       0.9533      
Fine-tuned BERT (NN)           Final Model     0.9548       0.9736      
--------------------------------------------------------------------------------

 Best Model: Fine-tuned BERT
  Accuracy: 0.9548
  F1 Score: 0.9736

--------------------------------------------------------------------------------
CLASSIFICATION REPORT (Fine-tuned BERT)
--------------------------------------------------------------------------------
              precision    recall  f1-score   support

    Negative       0.86      0.83      0




In [14]:
# Part 5: Test Set Predictions


try:
    print("\nLoading test data...")
    with open('/content/drive/MyDrive/Colab Notebooks/AIDM /test.json', 'r', encoding='utf-8') as f:
        test_data = json.load(f)
    test_df = pd.DataFrame(test_data)
    print(f" Loaded {len(test_df)} test samples")

    test_df['clean_minimal'] = test_df['reviews'].apply(clean_text_minimal)

    test_dataset = BERTDataset(test_df['clean_minimal'].values, np.zeros(len(test_df)), bert_tokenizer, MAX_LENGTH)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

    print("Making predictions...")
    model.eval()
    test_predictions = []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Predicting"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs.logits, dim=1)
            test_predictions.extend(preds.cpu().numpy())

    # submission.csv
    submission_df = pd.DataFrame({'review': test_df['reviews'].values, 'sentiment': test_predictions})
    submission_df.to_csv('submission.csv', index=False)

    print(f"\n Saved to 'submission.csv'")
    print(f"  Total: {len(test_predictions)}")
    print(f"  Positive: {(np.array(test_predictions)==1).sum()}")
    print(f"  Negative: {(np.array(test_predictions)==0).sum()}")

    print("\nSample Predictions:")
    print("-" * 80)
    for i in range(min(5, len(submission_df))):
        review = submission_df.iloc[i]['review'][:100] + "..."
        sentiment = "Positive" if submission_df.iloc[i]['sentiment'] == 1 else "Negative"
        print(f"{i+1}. {sentiment}: {review}")

except FileNotFoundError:
    print("\n! test.json not found - skipping test predictions")



Loading test data...
 Loaded 1851 test samples
Making predictions...


Predicting: 100%|██████████| 116/116 [00:17<00:00,  6.53it/s]


 Saved to 'submission.csv'
  Total: 1851
  Positive: 1595
  Negative: 256

Sample Predictions:
--------------------------------------------------------------------------------
1. Negative: I bought 2 sleepers.  sleeper had holes in the arm pit area and the other sleeper had a whole where ...
2. Positive: I dare say these are just about the sexiest things I've ever worn. Oh I've had and have G-strings, h...
3. Positive: everything about the transaction (price, delivery time, quality of item) was great.  I wouldn't hesi...
4. Positive: Not bad for just a shirt.  Very durable, and matched my teams colors perfectly.  Its just a shirt, b...
5. Positive: These are truly wrinkle free and longer than the average womans botton down, which I love!!   Overal...





In [None]:
# Part 6: Sample Analysis (Incorrect and Correct)


val_df_analysis = val_df.copy().reset_index(drop=True)
val_df_analysis['predictions'] = final_preds
val_df_analysis['correct'] = val_df_analysis['sentiments'] == val_df_analysis['predictions']

# Correctly classified
correct_samples = val_df_analysis[val_df_analysis['correct']]
print("\n--- Correctly Classified Examples ---")
for i in range(min(3, len(correct_samples))):
    sample = correct_samples.iloc[i]
    label = "Positive" if sample['sentiments'] == 1 else "Negative"
    print(f"\nExample {i+1} ({label}):")
    print(f"Review: {sample['reviews'][:200]}...")

# Incorrectly classified
incorrect_samples = val_df_analysis[~val_df_analysis['correct']]
if len(incorrect_samples) > 0:
    print("\n--- Incorrectly Classified Examples ---")
    for i in range(min(3, len(incorrect_samples))):
        sample = incorrect_samples.iloc[i]
        true_label = "Positive" if sample['sentiments'] == 1 else "Negative"
        pred_label = "Positive" if sample['predictions'] == 1 else "Negative"
        print(f"\nExample {i+1} (True: {true_label}, Predicted: {pred_label}):")
        print(f"Review: {sample['reviews'][:200]}...")
else:
    print("\n Perfect classification - no errors!")


--- Correctly Classified Examples ---

Example 1 (Positive):
Review: I love this bag!  The quality of the canvas and stitching is wonderful.  I would have expected to pay a lot more for this bag. The orange is a great color...

Example 2 (Positive):
Review: Great product at a great price!!  Fast shipping too!...

Example 3 (Positive):
Review: I've had two of these and they are great. Truly no headache! I'm here to buy a third...

--- Incorrectly Classified Examples ---

Example 1 (True: Negative, Predicted: Positive):
Review: I found the band underneath the cups rolled under and wasn't very elastic...very uncomfortable.  It fit great when I tried it in the store
(even had it fit to me by a Medela rep) but after a few weari...

Example 2 (True: Negative, Predicted: Positive):
Review: This is the only Haynes T-shirt that does not bunch at the back of the neck. It is light and drapes well. But it is too light for printing....

Example 3 (True: Negative, Predicted: Positive):
Review: This