In [1]:
# ================================================
# ✅ 1️⃣ LIBRARIES & SETUP
# ================================================
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from transformers import ViTImageProcessor, ViTForImageClassification
from torch.optim import AdamW
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import torch.nn as nn

# ================================================
# ✅ 2️⃣ PATHS
# ================================================
image_dir = "/kaggle/input/basem/images"
input_csv = "/kaggle/input/basem/dataset.csv"

# ================================================
# ✅ 3️⃣ LOAD & PREPROCESS CSV
# ================================================
df = pd.read_csv(input_csv)

existing_data = []
for _, row in df.iterrows():
    image_filename = row['image_path']
    full_image_path = os.path.join(image_dir, image_filename)
    if os.path.exists(full_image_path):
        label_converted = row['label 2'] - 1
        existing_data.append({
            'Image_path': full_image_path,
            'Label_Sentiment': label_converted
        })

processed_df = pd.DataFrame(existing_data)

# ================================================
# ✅ 4️⃣ DATA SPLITTING
# ================================================
train_df, temp_df = train_test_split(processed_df, test_size=0.3, stratify=processed_df['Label_Sentiment'], random_state=42)
test_df, val_df = train_test_split(temp_df, test_size=1/3, stratify=temp_df['Label_Sentiment'], random_state=42)

for df_name, df_ in [('train', train_df), ('test', test_df), ('val', val_df)]:
    df_['label'] = df_['Label_Sentiment']
    df_.to_csv(f'/kaggle/working/{df_name}_cleaned.csv', index=False)

# ================================================
# ✅ 5️⃣ LOAD ViT MODEL
# ================================================
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
vit_processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")

# ================================================
# ✅ 6️⃣ DATASET CLASS
# ================================================
class VisionOnlyDataset(Dataset):
    def __init__(self, df, processor):
        self.df = df
        self.processor = processor

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = Image.open(row['Image_path']).convert('RGB')
        label = row['label']
        
        # Process image with ViT processor
        inputs = self.processor(image, return_tensors="pt")
        pixel_values = inputs['pixel_values'].squeeze(0)  # Remove batch dimension
        
        return pixel_values, label

def collate_fn(batch):
    pixel_values, labels = zip(*batch)
    pixel_values = torch.stack(pixel_values)
    labels = torch.tensor(labels)
    return pixel_values, labels

# ================================================
# ✅ 7️⃣ DATALOADERS
# ================================================
batch_size = 8

train_loader = DataLoader(VisionOnlyDataset(train_df, vit_processor), batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(VisionOnlyDataset(val_df, vit_processor), batch_size=batch_size, collate_fn=collate_fn)
test_loader = DataLoader(VisionOnlyDataset(test_df, vit_processor), batch_size=batch_size, collate_fn=collate_fn)

# ================================================
# ✅ 8️⃣ CUSTOM ViT MODEL FOR 3-CLASS CLASSIFICATION
# ================================================
class ViTClassifier(nn.Module):
    def __init__(self, num_classes=3):
        super().__init__()
        # Load pre-trained ViT model without the classification head
        self.vit = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224")
        
        # Replace the classifier with our custom one for 3 classes
        self.vit.classifier = nn.Linear(self.vit.config.hidden_size, num_classes)
        
    def forward(self, pixel_values):
        outputs = self.vit(pixel_values=pixel_values)
        return outputs.logits

# ================================================
# ✅ 9️⃣ MODEL INITIALIZATION
# ================================================
model = ViTClassifier(num_classes=3).to(device)

# ================================================
# ✅ 🔟 LOSS & OPTIMIZER
# ================================================
class_weights = train_df['label'].value_counts().sort_index().tolist()
total = sum(class_weights)
weights = [total / c for c in class_weights]
criterion = torch.nn.CrossEntropyLoss(weight=torch.FloatTensor(weights).to(device))
optimizer = AdamW(model.parameters(), lr=1e-4)

# ================================================
# ✅ 1️⃣1️⃣ TRAINING LOOP
# ================================================
num_epochs = 20
patience = 3
patience_counter = 0
best_val_loss = float('inf')

for epoch in range(num_epochs):
    # ============================================================
    # TRAINING PHASE
    # ============================================================
    model.train()
    total_train_loss = 0

    for pixel_values, labels in tqdm(train_loader, desc=f"Train Epoch {epoch+1}"):
        pixel_values = pixel_values.to(device)
        labels = labels.to(device)
        
        # Forward pass
        logits = model(pixel_values)
        loss = criterion(logits, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()

    avg_train_loss = total_train_loss / len(train_loader)

    # ============================================================
    # VALIDATION PHASE
    # ============================================================
    model.eval()
    total_val_loss = 0
    val_predictions = []
    val_labels = []

    with torch.no_grad():
        for pixel_values, labels in tqdm(val_loader, desc=f"Validation Epoch {epoch+1}"):
            pixel_values = pixel_values.to(device)
            labels = labels.to(device)
            
            # Forward pass
            logits = model(pixel_values)
            loss = criterion(logits, labels)

            total_val_loss += loss.item()
            
            # Store predictions for metrics
            predictions = torch.argmax(logits, dim=1)
            val_predictions.extend(predictions.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())

    avg_val_loss = total_val_loss / len(val_loader)
    val_accuracy = accuracy_score(val_labels, val_predictions)
    
    print(f"Epoch [{epoch+1}/{num_epochs}] Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_accuracy:.4f}")

    # ============================================================
    # EARLY STOPPING CHECK
    # ============================================================
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), "best_vit_model.pt")
        print("✅ Validation loss improved — model saved.")
    else:
        patience_counter += 1
        print(f"⏰ No improvement — patience {patience_counter}/{patience}")

        if patience_counter >= patience:
            print(f"🛑 Early stopping triggered at epoch {epoch+1}")
            break

# ================================================
# ✅ 1️⃣2️⃣ FINAL TEST EVALUATION
# ================================================
print("\n🔍 Loading best model for final evaluation...")
model.load_state_dict(torch.load("best_vit_model.pt"))
model.eval()

test_predictions = []
test_labels = []
total_test_loss = 0

with torch.no_grad():
    for pixel_values, labels in tqdm(test_loader, desc="Final Test Evaluation"):
        pixel_values = pixel_values.to(device)
        labels = labels.to(device)
        
        # Forward pass
        logits = model(pixel_values)
        loss = criterion(logits, labels)
        
        total_test_loss += loss.item()
        predictions = torch.argmax(logits, dim=1)
        test_predictions.extend(predictions.cpu().numpy())
        test_labels.extend(labels.cpu().numpy())

# Calculate final metrics
test_accuracy = accuracy_score(test_labels, test_predictions)
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, test_predictions, average='weighted')
cm = confusion_matrix(test_labels, test_predictions)

print("\n📊 FINAL TEST RESULTS (Vision-Only ViT):")
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1-Score: {f1:.4f}")
print(f"Test Loss: {total_test_loss/len(test_loader):.4f}")
print(f"\nConfusion Matrix:\n{cm}")

# ================================================
# ✅ 1️⃣3️⃣ CLASS-WISE METRICS
# ================================================
precision_per_class, recall_per_class, f1_per_class, _ = precision_recall_fscore_support(test_labels, test_predictions, average=None)

print("\n📈 CLASS-WISE METRICS:")
for i in range(3):
    print(f"Class {i}: Precision={precision_per_class[i]:.4f}, Recall={recall_per_class[i]:.4f}, F1={f1_per_class[i]:.4f}")

2025-07-07 09:09:28.951849: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751879369.132234      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751879369.185347      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Train Epoch 1: 100%|██████████| 395/395 [02:52<00:00,  2.29it/s]
Validation Epoch 1: 100%|██████████| 57/57 [00:19<00:00,  2.86it/s]


Epoch [1/20] Train Loss: 0.9540 | Val Loss: 0.8768 | Val Acc: 0.6120
✅ Validation loss improved — model saved.


Train Epoch 2: 100%|██████████| 395/395 [02:36<00:00,  2.52it/s]
Validation Epoch 2: 100%|██████████| 57/57 [00:17<00:00,  3.28it/s]


Epoch [2/20] Train Loss: 0.7594 | Val Loss: 0.9314 | Val Acc: 0.5698
⏰ No improvement — patience 1/3


Train Epoch 3: 100%|██████████| 395/395 [02:37<00:00,  2.51it/s]
Validation Epoch 3: 100%|██████████| 57/57 [00:17<00:00,  3.21it/s]


Epoch [3/20] Train Loss: 0.5873 | Val Loss: 0.9348 | Val Acc: 0.6120
⏰ No improvement — patience 2/3


Train Epoch 4: 100%|██████████| 395/395 [02:38<00:00,  2.49it/s]
Validation Epoch 4: 100%|██████████| 57/57 [00:17<00:00,  3.30it/s]


Epoch [4/20] Train Loss: 0.4306 | Val Loss: 1.2082 | Val Acc: 0.6053
⏰ No improvement — patience 3/3
🛑 Early stopping triggered at epoch 4

🔍 Loading best model for final evaluation...


Final Test Evaluation: 100%|██████████| 113/113 [00:39<00:00,  2.87it/s]


📊 FINAL TEST RESULTS (Vision-Only ViT):
Test Accuracy: 0.6397
Test Precision: 0.6578
Test Recall: 0.6397
Test F1-Score: 0.6425
Test Loss: 0.8164

Confusion Matrix:
[[244 118  40]
 [ 63 247  43]
 [ 11  50  86]]

📈 CLASS-WISE METRICS:
Class 0: Precision=0.7673, Recall=0.6070, F1=0.6778
Class 1: Precision=0.5952, Recall=0.6997, F1=0.6432
Class 2: Precision=0.5089, Recall=0.5850, F1=0.5443



