In [1]:
# ================================================
# ✅ MULTIMODAL SENTIMENT ANALYSIS
# MuRIL (Text) + Swin Transformer (Vision) Fusion
# ================================================

import os
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from transformers import AutoTokenizer, AutoModel, AutoImageProcessor, SwinForImageClassification
from torch.optim import AdamW
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
import re
import string
import json

# ================================================
# ✅ 1️⃣ SETUP & PATHS
# ================================================
image_dir = "/kaggle/input/basem/images"
input_csv = "/kaggle/input/basem/dataset.csv"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# ================================================
# ✅ 2️⃣ LOAD & PREPROCESS DATA
# ================================================
df = pd.read_csv(input_csv)

def clean_text(text):
    if pd.isna(text): return ""
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = " ".join(text.split())
    return text

# Create multimodal dataset
multimodal_data = []
for _, row in df.iterrows():
    image_filename = row['image_path']
    full_image_path = os.path.join(image_dir, image_filename)
    
    # Check if both image and text exist
    if (os.path.exists(full_image_path) and 
        pd.notna(row['extracted_text']) and 
        row['extracted_text'].strip()):
        
        label_converted = row['label 2'] - 1  # Convert to 0-indexed
        multimodal_data.append({
            'Image_path': full_image_path,
            'text': clean_text(row['extracted_text']),
            'label': label_converted
        })

processed_df = pd.DataFrame(multimodal_data)
print(f"Total multimodal samples: {len(processed_df)}")

# ================================================
# ✅ 3️⃣ DATA SPLITS
# ================================================
train_df, temp_df = train_test_split(processed_df, test_size=0.3, stratify=processed_df['label'], random_state=42)
test_df, val_df = train_test_split(temp_df, test_size=1/3, stratify=temp_df['label'], random_state=42)

print(f"Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")

# ================================================
# ✅ 4️⃣ LOAD MODELS & PROCESSORS
# ================================================
# Text Model (MuRIL)
text_tokenizer = AutoTokenizer.from_pretrained("google/muril-base-cased")
muril_model = AutoModel.from_pretrained("google/muril-base-cased")

# Vision Model (Swin Transformer)
vision_processor = AutoImageProcessor.from_pretrained("microsoft/swin-base-patch4-window7-224")
swin_model = SwinForImageClassification.from_pretrained(
    "microsoft/swin-base-patch4-window7-224",
    num_labels=3,
    ignore_mismatched_sizes=True
)

# ================================================
# ✅ 5️⃣ MULTIMODAL DATASET
# ================================================
class MultiModalDataset(Dataset):
    def __init__(self, df, text_tokenizer, vision_processor, max_length=128):
        self.df = df
        self.text_tokenizer = text_tokenizer
        self.vision_processor = vision_processor
        self.max_length = max_length

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        
        # Process text
        text = row['text']
        text_encoded = self.text_tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        # Process image
        image = Image.open(row['Image_path']).convert('RGB')
        image_inputs = self.vision_processor(image, return_tensors="pt")
        
        # Label
        label = row['label']
        
        return {
            'input_ids': text_encoded['input_ids'].flatten(),
            'attention_mask': text_encoded['attention_mask'].flatten(),
            'pixel_values': image_inputs['pixel_values'].squeeze(0),
            'label': torch.tensor(label, dtype=torch.long)
        }

# ================================================
# ✅ 6️⃣ MULTIMODAL FUSION MODEL
# ================================================
class MultiModalSentimentModel(nn.Module):
    def __init__(self, muril_model, swin_model, num_classes=3, dropout=0.3, fusion_dim=512):
        super().__init__()
        
        # Text branch (MuRIL)
        self.text_encoder = muril_model
        self.text_projection = nn.Linear(muril_model.config.hidden_size, fusion_dim)
        
        # Vision branch (Swin)
        self.vision_encoder = swin_model.swin
        self.vision_projection = nn.Linear(swin_model.config.hidden_size, fusion_dim)
        
        # Fusion layers
        self.fusion_dropout = nn.Dropout(dropout)
        self.fusion_layer = nn.Linear(fusion_dim * 2, fusion_dim)
        self.fusion_activation = nn.ReLU()
        
        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(fusion_dim, fusion_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(fusion_dim // 2, num_classes)
        )
        
        # Attention mechanism for fusion
        self.attention = nn.MultiheadAttention(fusion_dim, num_heads=8, batch_first=True)
        
    def forward(self, input_ids, attention_mask, pixel_values):
        # Text encoding
        text_outputs = self.text_encoder(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        text_features = text_outputs.last_hidden_state[:, 0, :]  # [CLS] token
        text_projected = self.text_projection(text_features)
        
        # Vision encoding
        vision_outputs = self.vision_encoder(pixel_values)
        vision_features = vision_outputs.last_hidden_state.mean(dim=1)  # Global average pooling
        vision_projected = self.vision_projection(vision_features)
        
        # Cross-modal attention
        # Stack text and vision features
        multimodal_features = torch.stack([text_projected, vision_projected], dim=1)
        attended_features, _ = self.attention(multimodal_features, multimodal_features, multimodal_features)
        
        # Fusion
        fused_features = torch.cat([attended_features[:, 0, :], attended_features[:, 1, :]], dim=1)
        fused_features = self.fusion_dropout(fused_features)
        fused_features = self.fusion_activation(self.fusion_layer(fused_features))
        
        # Classification
        logits = self.classifier(fused_features)
        
        return logits

# ================================================
# ✅ 7️⃣ DATALOADERS
# ================================================
batch_size = 8

train_dataset = MultiModalDataset(train_df, text_tokenizer, vision_processor)
val_dataset = MultiModalDataset(val_df, text_tokenizer, vision_processor)
test_dataset = MultiModalDataset(test_df, text_tokenizer, vision_processor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# ================================================
# ✅ 8️⃣ MODEL INITIALIZATION
# ================================================
model = MultiModalSentimentModel(muril_model, swin_model, num_classes=3).to(device)

# ================================================
# ✅ 9️⃣ LOSS & OPTIMIZER
# ================================================
# Calculate class weights
class_weights = train_df['label'].value_counts().sort_index().tolist()
total = sum(class_weights)
weights = [total / c for c in class_weights]
print(f"Class distribution: {class_weights}")
print(f"Class weights: {weights}")

criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor(weights).to(device))

# Different learning rates for different components
optimizer = AdamW([
    {'params': model.text_encoder.parameters(), 'lr': 1e-5},
    {'params': model.vision_encoder.parameters(), 'lr': 1e-5},
    {'params': model.text_projection.parameters(), 'lr': 2e-4},
    {'params': model.vision_projection.parameters(), 'lr': 2e-4},
    {'params': model.fusion_layer.parameters(), 'lr': 2e-4},
    {'params': model.attention.parameters(), 'lr': 2e-4},
    {'params': model.classifier.parameters(), 'lr': 2e-4},
], weight_decay=0.01)

# ================================================
# ✅ 🔟 TRAINING LOOP
# ================================================
num_epochs = 15
patience = 5
patience_counter = 0
best_val_f1 = 0.0

print("🚀 Starting multimodal training...")

for epoch in range(num_epochs):
    # ============================================================
    # TRAINING PHASE
    # ============================================================
    model.train()
    total_train_loss = 0
    train_predictions = []
    train_labels = []

    for batch in tqdm(train_loader, desc=f"Train Epoch {epoch+1}"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        pixel_values = batch['pixel_values'].to(device)
        labels = batch['label'].to(device)

        optimizer.zero_grad()
        
        logits = model(input_ids, attention_mask, pixel_values)
        loss = criterion(logits, labels)
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()
        
        total_train_loss += loss.item()
        
        predictions = torch.argmax(logits, dim=1)
        train_predictions.extend(predictions.cpu().numpy())
        train_labels.extend(labels.cpu().numpy())

    avg_train_loss = total_train_loss / len(train_loader)
    train_accuracy = accuracy_score(train_labels, train_predictions)
    train_f1 = precision_recall_fscore_support(train_labels, train_predictions, average='weighted')[2]

    # ============================================================
    # VALIDATION PHASE
    # ============================================================
    model.eval()
    total_val_loss = 0
    val_predictions = []
    val_labels = []

    with torch.no_grad():
        for batch in tqdm(val_loader, desc=f"Validation Epoch {epoch+1}"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['label'].to(device)

            logits = model(input_ids, attention_mask, pixel_values)
            loss = criterion(logits, labels)

            total_val_loss += loss.item()
            
            predictions = torch.argmax(logits, dim=1)
            val_predictions.extend(predictions.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())

    avg_val_loss = total_val_loss / len(val_loader)
    val_accuracy = accuracy_score(val_labels, val_predictions)
    val_precision, val_recall, val_f1, _ = precision_recall_fscore_support(val_labels, val_predictions, average='weighted')
    
    print(f"\nEpoch [{epoch+1}/{num_epochs}]")
    print(f"Train - Loss: {avg_train_loss:.4f}, Acc: {train_accuracy:.4f}, F1: {train_f1:.4f}")
    print(f"Val   - Loss: {avg_val_loss:.4f}, Acc: {val_accuracy:.4f}, F1: {val_f1:.4f}")

    # ============================================================
    # EARLY STOPPING (based on F1 score)
    # ============================================================
    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        patience_counter = 0
        torch.save(model.state_dict(), "best_multimodal_model.pt")
        print("✅ Validation F1 improved — model saved.")
    else:
        patience_counter += 1
        print(f"⏰ No improvement — patience {patience_counter}/{patience}")

        if patience_counter >= patience:
            print(f"🛑 Early stopping triggered at epoch {epoch+1}")
            break
    
    print("-" * 60)

# ================================================
# ✅ 1️⃣1️⃣ FINAL TEST EVALUATION
# ================================================
print("\n🔍 Loading best model for final evaluation...")
model.load_state_dict(torch.load("best_multimodal_model.pt"))
model.eval()

test_predictions = []
test_labels = []
total_test_loss = 0

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Final Test Evaluation"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        pixel_values = batch['pixel_values'].to(device)
        labels = batch['label'].to(device)

        logits = model(input_ids, attention_mask, pixel_values)
        loss = criterion(logits, labels)
        
        total_test_loss += loss.item()
        predictions = torch.argmax(logits, dim=1)
        test_predictions.extend(predictions.cpu().numpy())
        test_labels.extend(labels.cpu().numpy())

# ================================================
# ✅ 1️⃣2️⃣ COMPREHENSIVE RESULTS
# ================================================
test_accuracy = accuracy_score(test_labels, test_predictions)
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, test_predictions, average='weighted')
cm = confusion_matrix(test_labels, test_predictions)

print("\n" + "="*80)
print("🎯 FINAL MULTIMODAL SENTIMENT ANALYSIS RESULTS")
print("   MuRIL (Text) + Swin Transformer (Vision) Fusion")
print("="*80)
print(f"Test Accuracy:  {test_accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall:    {recall:.4f}")
print(f"Test F1-Score:  {f1:.4f}")
print(f"Test Loss:      {total_test_loss/len(test_loader):.4f}")
print(f"\nConfusion Matrix:\n{cm}")

# Per-class metrics
precision_per_class, recall_per_class, f1_per_class, support = precision_recall_fscore_support(
    test_labels, test_predictions, average=None
)

print("\n📊 PER-CLASS DETAILED METRICS:")
print("-" * 50)
class_names = ['Negative', 'Neutral', 'Positive']
for i, class_name in enumerate(class_names):
    print(f"{class_name:>8}: Precision={precision_per_class[i]:.4f}, Recall={recall_per_class[i]:.4f}, F1={f1_per_class[i]:.4f}, Support={support[i]}")

print(f"\n📈 Classification Report:")
print(classification_report(test_labels, test_predictions, target_names=class_names))

# ================================================
# ✅ 1️⃣3️⃣ SAVE RESULTS
# ================================================
results = {
    'model_type': 'multimodal_muril_swin',
    'test_accuracy': float(test_accuracy),
    'test_precision': float(precision),
    'test_recall': float(recall),
    'test_f1': float(f1),
    'test_loss': float(total_test_loss/len(test_loader)),
    'confusion_matrix': cm.tolist(),
    'per_class_metrics': {
        'precision': precision_per_class.tolist(),
        'recall': recall_per_class.tolist(),
        'f1': f1_per_class.tolist(),
        'support': support.tolist()
    },
    'class_names': class_names,
    'dataset_info': {
        'train_samples': len(train_df),
        'val_samples': len(val_df),
        'test_samples': len(test_df),
        'total_samples': len(processed_df)
    }
}

with open('/kaggle/working/multimodal_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✅ Results saved to 'multimodal_results.json'")
print(f"🎉 Multimodal sentiment analysis complete!")
print(f"🏆 Best F1-Score achieved: {f1:.4f}")

2025-07-07 10:16:52.250143: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751883412.463166      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751883412.524248      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Using device: cuda
Total multimodal samples: 4509
Train: 3156, Val: 451, Test: 902


tokenizer_config.json:   0%|          | 0.00/206 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/113 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/953M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/953M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/255 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/352M [00:00<?, ?B/s]

Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([3, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Class distribution: [1404, 1237, 515]
Class weights: [2.247863247863248, 2.551333872271625, 6.128155339805825]
🚀 Starting multimodal training...



Train Epoch 1:   0%|          | 0/395 [00:00<?, ?it/s][A
Train Epoch 1:   0%|          | 1/395 [00:04<31:29,  4.79s/it][A
Train Epoch 1:   1%|          | 2/395 [00:06<18:12,  2.78s/it][A
Train Epoch 1:   1%|          | 3/395 [00:07<14:14,  2.18s/it][A
Train Epoch 1:   1%|          | 4/395 [00:09<13:12,  2.03s/it][A
Train Epoch 1:   1%|▏         | 5/395 [00:10<10:48,  1.66s/it][A
Train Epoch 1:   2%|▏         | 6/395 [00:11<09:45,  1.51s/it][A
Train Epoch 1:   2%|▏         | 7/395 [00:12<08:33,  1.32s/it][A
Train Epoch 1:   2%|▏         | 8/395 [00:13<08:05,  1.26s/it][A
Train Epoch 1:   2%|▏         | 9/395 [00:15<09:52,  1.54s/it][A
Train Epoch 1:   3%|▎         | 10/395 [00:17<09:40,  1.51s/it][A
Train Epoch 1:   3%|▎         | 11/395 [00:18<09:50,  1.54s/it][A
Train Epoch 1:   3%|▎         | 12/395 [00:20<09:10,  1.44s/it][A
Train Epoch 1:   3%|▎         | 13/395 [00:21<08:35,  1.35s/it][A
Train Epoch 1:   4%|▎         | 14/395 [00:22<08:02,  1.27s/it][A
Train Epoch 


Epoch [1/15]
Train - Loss: 0.9232, Acc: 0.6077, F1: 0.6102
Val   - Loss: 0.8163, Acc: 0.6541, F1: 0.6721
✅ Validation F1 improved — model saved.
------------------------------------------------------------


Train Epoch 2: 100%|██████████| 395/395 [03:59<00:00,  1.65it/s]
Validation Epoch 2: 100%|██████████| 57/57 [00:21<00:00,  2.63it/s]



Epoch [2/15]
Train - Loss: 0.6524, Acc: 0.7769, F1: 0.7791
Val   - Loss: 0.7273, Acc: 0.7361, F1: 0.7354
✅ Validation F1 improved — model saved.
------------------------------------------------------------


Train Epoch 3: 100%|██████████| 395/395 [04:00<00:00,  1.65it/s]
Validation Epoch 3: 100%|██████████| 57/57 [00:21<00:00,  2.65it/s]



Epoch [3/15]
Train - Loss: 0.4786, Acc: 0.8685, F1: 0.8681
Val   - Loss: 1.4125, Acc: 0.7472, F1: 0.7456
✅ Validation F1 improved — model saved.
------------------------------------------------------------


Train Epoch 4: 100%|██████████| 395/395 [04:00<00:00,  1.64it/s]
Validation Epoch 4: 100%|██████████| 57/57 [00:21<00:00,  2.61it/s]



Epoch [4/15]
Train - Loss: 0.3371, Acc: 0.9265, F1: 0.9264
Val   - Loss: 1.9627, Acc: 0.7494, F1: 0.7531
✅ Validation F1 improved — model saved.
------------------------------------------------------------


Train Epoch 5: 100%|██████████| 395/395 [04:00<00:00,  1.64it/s]
Validation Epoch 5: 100%|██████████| 57/57 [00:21<00:00,  2.65it/s]



Epoch [5/15]
Train - Loss: 0.2753, Acc: 0.9534, F1: 0.9532
Val   - Loss: 2.0989, Acc: 0.7472, F1: 0.7458
⏰ No improvement — patience 1/5
------------------------------------------------------------


Train Epoch 6: 100%|██████████| 395/395 [03:59<00:00,  1.65it/s]
Validation Epoch 6: 100%|██████████| 57/57 [00:21<00:00,  2.63it/s]



Epoch [6/15]
Train - Loss: 0.1870, Acc: 0.9658, F1: 0.9658
Val   - Loss: 2.3513, Acc: 0.7361, F1: 0.7397
⏰ No improvement — patience 2/5
------------------------------------------------------------


Train Epoch 7: 100%|██████████| 395/395 [03:59<00:00,  1.65it/s]
Validation Epoch 7: 100%|██████████| 57/57 [00:21<00:00,  2.65it/s]



Epoch [7/15]
Train - Loss: 0.1524, Acc: 0.9785, F1: 0.9784
Val   - Loss: 3.4326, Acc: 0.7517, F1: 0.7467
⏰ No improvement — patience 3/5
------------------------------------------------------------


Train Epoch 8: 100%|██████████| 395/395 [04:01<00:00,  1.63it/s]
Validation Epoch 8: 100%|██████████| 57/57 [00:21<00:00,  2.60it/s]



Epoch [8/15]
Train - Loss: 0.1294, Acc: 0.9794, F1: 0.9794
Val   - Loss: 3.1050, Acc: 0.7605, F1: 0.7562
✅ Validation F1 improved — model saved.
------------------------------------------------------------


Train Epoch 9: 100%|██████████| 395/395 [04:01<00:00,  1.63it/s]
Validation Epoch 9: 100%|██████████| 57/57 [00:21<00:00,  2.60it/s]



Epoch [9/15]
Train - Loss: 0.1077, Acc: 0.9845, F1: 0.9845
Val   - Loss: 3.2978, Acc: 0.7472, F1: 0.7481
⏰ No improvement — patience 1/5
------------------------------------------------------------


Train Epoch 10: 100%|██████████| 395/395 [04:03<00:00,  1.62it/s]
Validation Epoch 10: 100%|██████████| 57/57 [00:21<00:00,  2.61it/s]



Epoch [10/15]
Train - Loss: 0.1069, Acc: 0.9816, F1: 0.9816
Val   - Loss: 3.2908, Acc: 0.7428, F1: 0.7437
⏰ No improvement — patience 2/5
------------------------------------------------------------


Train Epoch 11: 100%|██████████| 395/395 [04:02<00:00,  1.63it/s]
Validation Epoch 11: 100%|██████████| 57/57 [00:21<00:00,  2.60it/s]



Epoch [11/15]
Train - Loss: 0.1310, Acc: 0.9823, F1: 0.9822
Val   - Loss: 3.9457, Acc: 0.7450, F1: 0.7462
⏰ No improvement — patience 3/5
------------------------------------------------------------


Train Epoch 12: 100%|██████████| 395/395 [04:02<00:00,  1.63it/s]
Validation Epoch 12: 100%|██████████| 57/57 [00:22<00:00,  2.58it/s]



Epoch [12/15]
Train - Loss: 0.1004, Acc: 0.9883, F1: 0.9883
Val   - Loss: 5.1145, Acc: 0.7428, F1: 0.7417
⏰ No improvement — patience 4/5
------------------------------------------------------------


Train Epoch 13: 100%|██████████| 395/395 [04:01<00:00,  1.64it/s]
Validation Epoch 13: 100%|██████████| 57/57 [00:21<00:00,  2.63it/s]



Epoch [13/15]
Train - Loss: 0.0620, Acc: 0.9902, F1: 0.9902
Val   - Loss: 4.4622, Acc: 0.7517, F1: 0.7467
⏰ No improvement — patience 5/5
🛑 Early stopping triggered at epoch 13

🔍 Loading best model for final evaluation...


Final Test Evaluation: 100%|██████████| 113/113 [00:49<00:00,  2.27it/s]


🎯 FINAL MULTIMODAL SENTIMENT ANALYSIS RESULTS
   MuRIL (Text) + Swin Transformer (Vision) Fusion
Test Accuracy:  0.7849
Test Precision: 0.7843
Test Recall:    0.7849
Test F1-Score:  0.7792
Test Loss:      2.4547

Confusion Matrix:
[[357  36   9]
 [ 62 275  16]
 [ 42  29  76]]

📊 PER-CLASS DETAILED METRICS:
--------------------------------------------------
Negative: Precision=0.7744, Recall=0.8881, F1=0.8273, Support=402
 Neutral: Precision=0.8088, Recall=0.7790, F1=0.7937, Support=353
Positive: Precision=0.7525, Recall=0.5170, F1=0.6129, Support=147

📈 Classification Report:
              precision    recall  f1-score   support

    Negative       0.77      0.89      0.83       402
     Neutral       0.81      0.78      0.79       353
    Positive       0.75      0.52      0.61       147

    accuracy                           0.78       902
   macro avg       0.78      0.73      0.74       902
weighted avg       0.78      0.78      0.78       902


✅ Results saved to 'multimodal_res


