In [1]:
# ================================================
# ✅ 1️⃣ LIBRARIES & SETUP
# ================================================
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from transformers import AutoImageProcessor, SwinForImageClassification
from torch.optim import AdamW
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import torch.nn as nn

# ================================================
# ✅ 2️⃣ PATHS
# ================================================
image_dir = "/kaggle/input/basem/images"
input_csv = "/kaggle/input/basem/dataset.csv"

# ================================================
# ✅ 3️⃣ LOAD & PREPROCESS CSV
# ================================================
df = pd.read_csv(input_csv)

existing_data = []
for _, row in df.iterrows():
    image_filename = row['image_path']
    full_image_path = os.path.join(image_dir, image_filename)
    if os.path.exists(full_image_path):
        label_converted = row['label 2'] - 1
        existing_data.append({
            'Image_path': full_image_path,
            'Label_Sentiment': label_converted
        })

processed_df = pd.DataFrame(existing_data)

# ================================================
# ✅ 4️⃣ DATA SPLITS
# ================================================
train_df, temp_df = train_test_split(processed_df, test_size=0.3, stratify=processed_df['Label_Sentiment'], random_state=42)
test_df, val_df = train_test_split(temp_df, test_size=1/3, stratify=temp_df['Label_Sentiment'], random_state=42)

# Add label column for consistency
for df_name, df_ in [('train', train_df), ('test', test_df), ('val', val_df)]:
    df_['label'] = df_['Label_Sentiment']
    df_.to_csv(f'/kaggle/working/{df_name}_vision_only.csv', index=False)

print(f"Train samples: {len(train_df)}, Val samples: {len(val_df)}, Test samples: {len(test_df)}")

# ================================================
# ✅ 5️⃣ LOAD SWIN TRANSFORMER
# ================================================
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load Swin Transformer for image classification
model_name = "microsoft/swin-base-patch4-window7-224"
processor = AutoImageProcessor.from_pretrained(model_name)

# Load model with ignore_mismatched_sizes to handle classifier mismatch
swin_model = SwinForImageClassification.from_pretrained(
    model_name,
    num_labels=3,  # Set number of classes to 3
    ignore_mismatched_sizes=True  # Ignore size mismatch for classifier
)
swin_model = swin_model.to(device)

# ================================================
# ✅ 6️⃣ VISION DATASET
# ================================================
class VisionDataset(Dataset):
    def __init__(self, df, processor):
        self.df = df
        self.processor = processor

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = Image.open(row['Image_path']).convert('RGB')
        label = row['label']
        
        # Process image with Swin processor
        inputs = self.processor(image, return_tensors="pt")
        pixel_values = inputs['pixel_values'].squeeze(0)  # Remove batch dimension
        
        return pixel_values, label

def collate_fn(batch):
    pixel_values, labels = zip(*batch)
    pixel_values = torch.stack(pixel_values)
    labels = torch.tensor(labels)
    return pixel_values, labels

# ================================================
# ✅ 7️⃣ DATALOADERS
# ================================================
batch_size = 8

train_loader = DataLoader(VisionDataset(train_df, processor), batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(VisionDataset(val_df, processor), batch_size=batch_size, collate_fn=collate_fn)
test_loader = DataLoader(VisionDataset(test_df, processor), batch_size=batch_size, collate_fn=collate_fn)

# ================================================
# ✅ 8️⃣ LOSS & OPTIMIZER
# ================================================
# Calculate class weights for balanced training
class_weights = train_df['label'].value_counts().sort_index().tolist()
total = sum(class_weights)
weights = [total / c for c in class_weights]
print(f"Class distribution: {class_weights}")
print(f"Class weights: {weights}")

criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor(weights).to(device))
optimizer = AdamW(swin_model.parameters(), lr=1e-5)  # Lower learning rate for pre-trained model

# ================================================
# ✅ 9️⃣ TRAINING LOOP
# ================================================
num_epochs = 20
patience = 3
patience_counter = 0
best_val_loss = float('inf')

print("🚀 Starting training...")

for epoch in range(num_epochs):
    # ============================================================
    # TRAINING PHASE
    # ============================================================
    swin_model.train()
    total_train_loss = 0
    train_predictions = []
    train_labels = []

    for pixel_values, labels in tqdm(train_loader, desc=f"Train Epoch {epoch+1}"):
        pixel_values = pixel_values.to(device)
        labels = labels.to(device)
        
        outputs = swin_model(pixel_values=pixel_values, labels=labels)
        loss = outputs.loss
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_train_loss += loss.item()
        
        # Store predictions for accuracy calculation
        predictions = torch.argmax(outputs.logits, dim=1)
        train_predictions.extend(predictions.cpu().numpy())
        train_labels.extend(labels.cpu().numpy())

    avg_train_loss = total_train_loss / len(train_loader)
    train_accuracy = accuracy_score(train_labels, train_predictions)

    # ============================================================
    # VALIDATION PHASE
    # ============================================================
    swin_model.eval()
    total_val_loss = 0
    val_predictions = []
    val_labels = []

    with torch.no_grad():
        for pixel_values, labels in tqdm(val_loader, desc=f"Validation Epoch {epoch+1}"):
            pixel_values = pixel_values.to(device)
            labels = labels.to(device)
            
            outputs = swin_model(pixel_values=pixel_values, labels=labels)
            loss = outputs.loss
            
            total_val_loss += loss.item()
            
            # Store predictions for metrics
            predictions = torch.argmax(outputs.logits, dim=1)
            val_predictions.extend(predictions.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())

    avg_val_loss = total_val_loss / len(val_loader)
    val_accuracy = accuracy_score(val_labels, val_predictions)
    
    print(f"Epoch [{epoch+1}/{num_epochs}]")
    print(f"  Train Loss: {avg_train_loss:.4f} | Train Acc: {train_accuracy:.4f}")
    print(f"  Val Loss: {avg_val_loss:.4f} | Val Acc: {val_accuracy:.4f}")

    # ============================================================
    # EARLY STOPPING CHECK
    # ============================================================
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(swin_model.state_dict(), "best_swin_model.pt")
        print("✅ Validation loss improved — model saved.")
    else:
        patience_counter += 1
        print(f"⏰ No improvement — patience {patience_counter}/{patience}")

        if patience_counter >= patience:
            print(f"🛑 Early stopping triggered at epoch {epoch+1}")
            break
    print("-" * 50)

# ================================================
# ✅ 🔟 FINAL TEST EVALUATION
# ================================================
print("\n🔍 Loading best model for final evaluation...")
swin_model.load_state_dict(torch.load("best_swin_model.pt"))
swin_model.eval()

test_predictions = []
test_labels = []
total_test_loss = 0

with torch.no_grad():
    for pixel_values, labels in tqdm(test_loader, desc="Final Test Evaluation"):
        pixel_values = pixel_values.to(device)
        labels = labels.to(device)
        
        outputs = swin_model(pixel_values=pixel_values, labels=labels)
        loss = outputs.loss
        
        total_test_loss += loss.item()
        
        predictions = torch.argmax(outputs.logits, dim=1)
        test_predictions.extend(predictions.cpu().numpy())
        test_labels.extend(labels.cpu().numpy())

# Calculate final metrics
test_accuracy = accuracy_score(test_labels, test_predictions)
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, test_predictions, average='weighted')
cm = confusion_matrix(test_labels, test_predictions)

print("\n" + "="*60)
print("📊 FINAL TEST RESULTS - VISION ONLY (SWIN TRANSFORMER)")
print("="*60)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1-Score: {f1:.4f}")
print(f"Test Loss: {total_test_loss/len(test_loader):.4f}")
print(f"\nConfusion Matrix:\n{cm}")

# ================================================
# ✅ 1️⃣1️⃣ DETAILED CLASSIFICATION REPORT
# ================================================
from sklearn.metrics import classification_report

print("\n📋 Detailed Classification Report:")
print(classification_report(test_labels, test_predictions, 
                          target_names=['Negative', 'Neutral', 'Positive']))

# ================================================
# ✅ 1️⃣2️⃣ SAVE RESULTS
# ================================================
results = {
    'test_accuracy': test_accuracy,
    'test_precision': precision,
    'test_recall': recall,
    'test_f1': f1,
    'test_loss': total_test_loss/len(test_loader),
    'confusion_matrix': cm.tolist()
}

import json
with open('/kaggle/working/swin_vision_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print("\n✅ Results saved to 'swin_vision_results.json'")
print("🎯 Vision-only model evaluation complete!")

2025-07-07 08:24:40.486085: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751876680.682515      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751876680.735618      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Train samples: 3156, Val samples: 451, Test samples: 902
Using device: cuda


preprocessor_config.json:   0%|          | 0.00/255 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/352M [00:00<?, ?B/s]

Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([3, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Class distribution: [1404, 1237, 515]
Class weights: [2.247863247863248, 2.551333872271625, 6.128155339805825]
🚀 Starting training...


Train Epoch 1: 100%|██████████| 395/395 [03:33<00:00,  1.85it/s]
Validation Epoch 1: 100%|██████████| 57/57 [00:23<00:00,  2.43it/s]


Epoch [1/20]
  Train Loss: 0.8610 | Train Acc: 0.6001
  Val Loss: 0.7841 | Val Acc: 0.6452
✅ Validation loss improved — model saved.
--------------------------------------------------


Train Epoch 2: 100%|██████████| 395/395 [03:08<00:00,  2.09it/s]
Validation Epoch 2: 100%|██████████| 57/57 [00:21<00:00,  2.71it/s]


Epoch [2/20]
  Train Loss: 0.6577 | Train Acc: 0.7177
  Val Loss: 0.7746 | Val Acc: 0.6696
✅ Validation loss improved — model saved.
--------------------------------------------------


Train Epoch 3: 100%|██████████| 395/395 [03:09<00:00,  2.08it/s]
Validation Epoch 3: 100%|██████████| 57/57 [00:20<00:00,  2.72it/s]


Epoch [3/20]
  Train Loss: 0.4867 | Train Acc: 0.8013
  Val Loss: 0.8658 | Val Acc: 0.6519
⏰ No improvement — patience 1/3
--------------------------------------------------


Train Epoch 4: 100%|██████████| 395/395 [03:09<00:00,  2.09it/s]
Validation Epoch 4: 100%|██████████| 57/57 [00:21<00:00,  2.71it/s]


Epoch [4/20]
  Train Loss: 0.3452 | Train Acc: 0.8739
  Val Loss: 0.9082 | Val Acc: 0.6585
⏰ No improvement — patience 2/3
--------------------------------------------------


Train Epoch 5: 100%|██████████| 395/395 [03:09<00:00,  2.09it/s]
Validation Epoch 5: 100%|██████████| 57/57 [00:21<00:00,  2.69it/s]


Epoch [5/20]
  Train Loss: 0.2771 | Train Acc: 0.8929
  Val Loss: 0.9997 | Val Acc: 0.6386
⏰ No improvement — patience 3/3
🛑 Early stopping triggered at epoch 5

🔍 Loading best model for final evaluation...


Final Test Evaluation: 100%|██████████| 113/113 [00:45<00:00,  2.48it/s]


📊 FINAL TEST RESULTS - VISION ONLY (SWIN TRANSFORMER)
Test Accuracy: 0.6818
Test Precision: 0.6935
Test Recall: 0.6818
Test F1-Score: 0.6787
Test Loss: 0.7119

Confusion Matrix:
[[283 108  11]
 [ 75 266  12]
 [ 21  60  66]]

📋 Detailed Classification Report:
              precision    recall  f1-score   support

    Negative       0.75      0.70      0.72       402
     Neutral       0.61      0.75      0.68       353
    Positive       0.74      0.45      0.56       147

    accuracy                           0.68       902
   macro avg       0.70      0.64      0.65       902
weighted avg       0.69      0.68      0.68       902


✅ Results saved to 'swin_vision_results.json'
🎯 Vision-only model evaluation complete!



