In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

khadiza13_less_data_path = kagglehub.dataset_download('khadiza13/less-data')

print('Data source import complete.')


In [None]:
import numpy as np
import pandas as pd

In [None]:
df=pd.read_csv('/kaggle/input/less-data/changed_ds/new_ds_small.csv')
df.head()

Unnamed: 0,image,text,label
0,2001.jpg,আচ্ছা ভাই।\n মেয়েদের থেকে দূরে থাকবা মেয়ের পাল...,stereotype & objectification
1,2002.jpg,কিসের foodpanda যেখানে আমার সাদিয়া রে অর্ডার ক...,stereotype & objectification
2,2003.jpg,উপস্থিত sir\n Yes sir\n Present Teacher\n [লাব...,stereotype & objectification
3,2004.png,আমি হিজাব চাইনি\n তারপর আমার স্বামী আমাকে বোঝালেন,violence or abuse
4,2005.jpg,"বাঙ্গু (beta, omega, theta, delta) male\nসুন্দ...",non-misogynistic


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from transformers import AutoModelForImageClassification, AutoImageProcessor
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score
from sklearn.preprocessing import LabelEncoder
from PIL import Image
import os
import random
from tqdm import tqdm

# Set random seed for reproducibility
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    random.seed(seed)
set_seed(42)

# Load dataset
df = pd.read_csv("/kaggle/input/less-data/changed_ds/new_ds_small.csv")
assert not df.empty, "Dataset is empty"
assert len(df) == 4001, f"Expected 4001 samples, got {len(df)}"
assert df['image'].notnull().all(), "Missing values in 'image' column"
assert df['label'].notnull().all(), "Missing values in 'label' column"
print("✅ Dataset loaded. Sample:")
print(df.head())

# Encode labels
label_encoder = LabelEncoder()
df['label_id'] = label_encoder.fit_transform(df['label'])
label2id = {label: idx for idx, label in enumerate(label_encoder.classes_)}
id2label = {idx: label for label, idx in label2id.items()}
print(f"✅ Unique labels: {label2id}")
print(f"Label distribution: {df['label'].value_counts().to_dict()}")

# Split data into train (70%), validation (15%), test (15%)
train_df, temp_df = train_test_split(
    df, test_size=0.3, random_state=42, stratify=df['label_id']
)
val_df, test_df = train_test_split(
    temp_df, test_size=0.5, random_state=42, stratify=temp_df['label_id']
)
print(f"✅ Dataset split: Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")

# Image dataset class
class ImageDataset(Dataset):
    def __init__(self, df, img_dir, processor=None, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.processor = processor
        self.transform = transform
        self.image_paths = df['image'].values
        self.labels = df['label_id'].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.image_paths[idx])
        try:
            image = Image.open(img_path).convert('RGB')
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            image = Image.new('RGB', (224, 224))  # Fallback: blank image
        if self.processor:
            image = self.processor(image, return_tensors="pt")['pixel_values'].squeeze(0)
        elif self.transform:
            image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return {'pixel_values' if self.processor else 'image': image, 'labels': label}

# Preprocessing for EfficientNet
efficientnet_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
efficientnet_val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Model configurations
model_configs = {
    "EfficientNet": {
        "model_fn": lambda num_classes: models.efficientnet_b0(pretrained=True),
        "modify_fn": lambda model, num_classes: setattr(model, 'classifier', nn.Linear(model.classifier[1].in_features, num_classes)),
        "transform": efficientnet_transform,
        "val_test_transform": efficientnet_val_test_transform,
        "use_processor": False
    },
    "ViT": {
        "checkpoint": "google/vit-base-patch16-224",
        "use_processor": True
    },
    "SwinTransformer": {
        "checkpoint": "microsoft/swin-base-patch4-window7-224",
        "use_processor": True
    }
}

# Training and evaluation function
def train_and_evaluate(model_name, model, train_loader, val_loader, test_loader, num_classes, device):
    print(f"\n🔄 Training {model_name}...")
    model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=1e-4)
    criterion = nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 3
    min_delta = 0.01
    best_f1 = 0
    patience_counter = 0

    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        # Progress bar for training
        train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1} Training")
        for batch in train_bar:
            images = batch['image' if model_name == "EfficientNet" else 'pixel_values'].to(device)
            labels = batch['labels'].to(device)
            optimizer.zero_grad()
            outputs = model(images)
            logits = outputs.logits if model_name != "EfficientNet" else outputs
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            train_bar.set_postfix({"loss": running_loss / len(train_bar)})

        # Validation
        model.eval()
        val_preds, val_labels = [], []
        val_loss = 0.0
        # Progress bar for validation
        val_bar = tqdm(val_loader, desc=f"Epoch {epoch+1} Validation")
        with torch.no_grad():
            for batch in val_bar:
                images = batch['image' if model_name == "EfficientNet" else 'pixel_values'].to(device)
                labels = batch['labels'].to(device)
                outputs = model(images)
                logits = outputs.logits if model_name != "EfficientNet" else outputs
                loss = criterion(logits, labels)
                val_loss += loss.item()
                preds = torch.argmax(logits, dim=1)
                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())
                val_bar.set_postfix({"val_loss": val_loss / len(val_bar)})

        val_f1 = f1_score(val_labels, val_preds, average='weighted')
        print(f"Epoch {epoch+1}: Val F1 = {val_f1:.4f}")

        # Early stopping
        if val_f1 > best_f1 + min_delta:
            best_f1 = val_f1
            patience_counter = 0
            os.makedirs(f"./models/{model_name}", exist_ok=True)
            torch.save(model.state_dict(), f"./models/{model_name}/best_model.pt")
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

    # Load best model
    model.load_state_dict(torch.load(f"./models/{model_name}/best_model.pt"))

    # Validation evaluation
    model.eval()
    val_preds, val_labels = [], []
    # Progress bar for validation evaluation
    val_bar = tqdm(val_loader, desc="Validation Evaluation")
    with torch.no_grad():
        for batch in val_bar:
            images = batch['image' if model_name == "EfficientNet" else 'pixel_values'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(images)
            logits = outputs.logits if model_name != "EfficientNet" else outputs
            preds = torch.argmax(logits, dim=1)
            val_preds.extend(preds.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())

    print(f"\n📊 Validation Classification Report for {model_name}")
    val_report = classification_report(val_labels, val_preds, target_names=label_encoder.classes_)
    print(val_report)

    # Test evaluation
    test_preds, test_labels = [], []
    # Progress bar for test evaluation
    test_bar = tqdm(test_loader, desc="Test Evaluation")
    with torch.no_grad():
        for batch in test_bar:
            images = batch['image' if model_name == "EfficientNet" else 'pixel_values'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(images)
            logits = outputs.logits if model_name != "EfficientNet" else outputs
            preds = torch.argmax(logits, dim=1)
            test_preds.extend(preds.cpu().numpy())
            test_labels.extend(labels.cpu().numpy())

    print(f"\n📊 Test Classification Report for {model_name}")
    test_report = classification_report(test_labels, test_preds, target_names=label_encoder.classes_)
    print(test_report)

    # Save results
    results[model_name] = {
        "val_report": classification_report(val_labels, val_preds, target_names=label_encoder.classes_, output_dict=True),
        "test_report": classification_report(test_labels, test_preds, target_names=label_encoder.classes_, output_dict=True)
    }

    # Clear memory
    del model
    torch.cuda.empty_cache()
    print(f"🧹 Cleared memory after {model_name}")

# Main execution
results = {}
model_names = ["EfficientNet", "ViT", "SwinTransformer"]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Create datasets and loaders
img_dir = "/kaggle/input/less-data/changed_ds/img"
for model_name in model_names:
    config = model_configs[model_name]
    if config["use_processor"]:
        processor = AutoImageProcessor.from_pretrained(config["checkpoint"])
        train_dataset = ImageDataset(train_df, img_dir, processor=processor)
        val_dataset = ImageDataset(val_df, img_dir, processor=processor)
        test_dataset = ImageDataset(test_df, img_dir, processor=processor)
    else:
        train_dataset = ImageDataset(train_df, img_dir, transform=config["transform"])
        val_dataset = ImageDataset(val_df, img_dir, transform=config["val_test_transform"])
        test_dataset = ImageDataset(test_df, img_dir, transform=config["val_test_transform"])

    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

    # Initialize model
    if model_name == "EfficientNet":
        model = config["model_fn"](len(label2id))
        config["modify_fn"](model, len(label2id))
    else:
        model = AutoModelForImageClassification.from_pretrained(
            config["checkpoint"],
            num_labels=len(label2id),
            id2label=id2label,
            label2id=label2id
        )

    try:
        train_and_evaluate(model_name, model, train_loader, val_loader, test_loader, len(label2id), device)
    except Exception as e:
        print(f"❌ Skipping {model_name} due to error: {e}")

# Compare results
print("\n📊 Model Comparison (Test Metrics)")
for model_name, result in results.items():
    test_acc = result["test_report"]["accuracy"]
    test_f1 = result["test_report"]["weighted avg"]["f1-score"]
    print(f"{model_name}: Accuracy = {test_acc:.4f}, F1 = {test_f1:.4f}")

2025-06-24 09:07:39.717975: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750756059.913615      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750756059.974633      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth


✅ Dataset loaded. Sample:
      image                                               text  \
0  2001.jpg  আচ্ছা ভাই।\n মেয়েদের থেকে দূরে থাকবা মেয়ের পাল...   
1  2002.jpg  কিসের foodpanda যেখানে আমার সাদিয়া রে অর্ডার ক...   
2  2003.jpg  উপস্থিত sir\n Yes sir\n Present Teacher\n [লাব...   
3  2004.png  আমি হিজাব চাইনি\n তারপর আমার স্বামী আমাকে বোঝালেন   
4  2005.jpg  বাঙ্গু (beta, omega, theta, delta) male\nসুন্দ...   

                          label  
0  stereotype & objectification  
1  stereotype & objectification  
2  stereotype & objectification  
3             violence or abuse  
4              non-misogynistic  
✅ Unique labels: {'non-misogynistic': 0, 'stereotype & objectification': 1, 'violence or abuse': 2}
Label distribution: {'stereotype & objectification': 1591, 'non-misogynistic': 1380, 'violence or abuse': 1030}
✅ Dataset split: Train: 2800, Val: 600, Test: 601
Using device: cuda


100%|██████████| 20.5M/20.5M [00:00<00:00, 179MB/s]



🔄 Training EfficientNet...


Epoch 1 Training: 100%|██████████| 175/175 [01:12<00:00,  2.41it/s, loss=0.998]
Epoch 1 Validation: 100%|██████████| 38/38 [00:12<00:00,  2.94it/s, val_loss=0.907]


Epoch 1: Val F1 = 0.5746


Epoch 2 Training: 100%|██████████| 175/175 [00:54<00:00,  3.23it/s, loss=0.834]
Epoch 2 Validation: 100%|██████████| 38/38 [00:09<00:00,  4.16it/s, val_loss=0.908]


Epoch 2: Val F1 = 0.5821


Epoch 3 Training: 100%|██████████| 175/175 [00:54<00:00,  3.22it/s, loss=0.714]
Epoch 3 Validation: 100%|██████████| 38/38 [00:09<00:00,  4.07it/s, val_loss=0.902]


Epoch 3: Val F1 = 0.5808


Epoch 4 Training: 100%|██████████| 175/175 [00:54<00:00,  3.21it/s, loss=0.587]
Epoch 4 Validation: 100%|██████████| 38/38 [00:09<00:00,  4.19it/s, val_loss=0.938]


Epoch 4: Val F1 = 0.6126


Epoch 5 Training: 100%|██████████| 175/175 [00:54<00:00,  3.21it/s, loss=0.477]
Epoch 5 Validation: 100%|██████████| 38/38 [00:09<00:00,  4.16it/s, val_loss=0.982]


Epoch 5: Val F1 = 0.6043


Epoch 6 Training: 100%|██████████| 175/175 [00:54<00:00,  3.24it/s, loss=0.399]
Epoch 6 Validation: 100%|██████████| 38/38 [00:09<00:00,  4.19it/s, val_loss=1.05] 


Epoch 6: Val F1 = 0.6042


Epoch 7 Training: 100%|██████████| 175/175 [00:54<00:00,  3.22it/s, loss=0.288]
Epoch 7 Validation: 100%|██████████| 38/38 [00:09<00:00,  4.16it/s, val_loss=1.13] 


Epoch 7: Val F1 = 0.6116
Early stopping at epoch 7


Validation Evaluation: 100%|██████████| 38/38 [00:08<00:00,  4.25it/s]



📊 Validation Classification Report for EfficientNet
                              precision    recall  f1-score   support

            non-misogynistic       0.72      0.68      0.70       207
stereotype & objectification       0.62      0.65      0.64       239
           violence or abuse       0.46      0.47      0.46       154

                    accuracy                           0.61       600
                   macro avg       0.60      0.60      0.60       600
                weighted avg       0.61      0.61      0.61       600



Test Evaluation: 100%|██████████| 38/38 [00:12<00:00,  3.02it/s]



📊 Test Classification Report for EfficientNet
                              precision    recall  f1-score   support

            non-misogynistic       0.67      0.71      0.69       207
stereotype & objectification       0.65      0.66      0.66       239
           violence or abuse       0.54      0.48      0.51       155

                    accuracy                           0.63       601
                   macro avg       0.62      0.62      0.62       601
                weighted avg       0.63      0.63      0.63       601

🧹 Cleared memory after EfficientNet


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

RuntimeError: Error(s) in loading state_dict for Linear:
	size mismatch for bias: copying a param with shape torch.Size([1000]) from checkpoint, the shape in current model is torch.Size([3]).

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModelForImageClassification, AutoImageProcessor
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score
from sklearn.preprocessing import LabelEncoder
from PIL import Image
import os
import random
from tqdm import tqdm

# Set random seed for reproducibility
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    random.seed(seed)
set_seed(42)

# Load dataset
df = pd.read_csv("/kaggle/input/less-data/changed_ds/new_ds_small.csv")
assert not df.empty, "Dataset is empty"
assert len(df) == 4001, f"Expected 4001 samples, got {len(df)}"
assert df['image'].notnull().all(), "Missing values in 'image' column"
assert df['label'].notnull().all(), "Missing values in 'label' column"
print("✅ Dataset loaded. Sample:")
print(df.head())

# Encode labels
label_encoder = LabelEncoder()
df['label_id'] = label_encoder.fit_transform(df['label'])
label2id = {label: idx for idx, label in enumerate(label_encoder.classes_)}
id2label = {idx: label for label, idx in label2id.items()}
print(f"✅ Unique labels: {label2id}")
print(f"Label distribution: {df['label'].value_counts().to_dict()}")

# Verify number of classes
num_classes = len(label2id)
assert num_classes == 3, f"Expected 3 classes, got {num_classes}"

# Split data into train (70%), validation (15%), test (15%)
train_df, temp_df = train_test_split(
    df, test_size=0.3, random_state=42, stratify=df['label_id']
)
val_df, test_df = train_test_split(
    temp_df, test_size=0.5, random_state=42, stratify=temp_df['label_id']
)
print(f"✅ Dataset split: Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")

# Image dataset class
class ImageDataset(Dataset):
    def __init__(self, df, img_dir, processor=None):
        self.df = df
        self.img_dir = img_dir
        self.processor = processor
        self.image_paths = df['image'].values
        self.labels = df['label_id'].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.image_paths[idx])
        try:
            image = Image.open(img_path).convert('RGB')
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            image = Image.new('RGB', (224, 224))  # Fallback: blank image
        if self.processor:
            image = self.processor(image, return_tensors="pt")['pixel_values'].squeeze(0)
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return {'pixel_values': image, 'labels': label}

# Model configurations
model_configs = {
    "ViT": {
        "checkpoint": "google/vit-base-patch16-224",
        "use_processor": True
    },
    "SwinTransformer": {
        "checkpoint": "microsoft/swin-base-patch4-window7-224",
        "use_processor": True
    }
}

# Training and evaluation function
def train_and_evaluate(model_name, model, train_loader, val_loader, test_loader, num_classes, device):
    print(f"\n🔄 Training {model_name}...")
    model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=1e-4)
    criterion = nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 3
    min_delta = 0.01
    best_f1 = 0
    patience_counter = 0

    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        # Progress bar for training
        train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1} Training")
        for batch in train_bar:
            images = batch['pixel_values'].to(device)
            labels = batch['labels'].to(device)
            optimizer.zero_grad()
            outputs = model(images)
            logits = outputs.logits
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            train_bar.set_postfix({"loss": running_loss / len(train_bar)})

        # Validation
        model.eval()
        val_preds, val_labels = [], []
        val_loss = 0.0
        # Progress bar for validation
        val_bar = tqdm(val_loader, desc=f"Epoch {epoch+1} Validation")
        with torch.no_grad():
            for batch in val_bar:
                images = batch['pixel_values'].to(device)
                labels = batch['labels'].to(device)
                outputs = model(images)
                logits = outputs.logits
                loss = criterion(logits, labels)
                val_loss += loss.item()
                preds = torch.argmax(logits, dim=1)
                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())
                val_bar.set_postfix({"val_loss": val_loss / len(val_bar)})

        val_f1 = f1_score(val_labels, val_preds, average='weighted')
        print(f"Epoch {epoch+1}: Val F1 = {val_f1:.4f}")

        # Early stopping
        if val_f1 > best_f1 + min_delta:
            best_f1 = val_f1
            patience_counter = 0
            os.makedirs(f"./models/{model_name}", exist_ok=True)
            torch.save(model.state_dict(), f"./models/{model_name}/best_model.pt")
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

    # Load best model
    model.load_state_dict(torch.load(f"./models/{model_name}/best_model.pt"))

    # Validation evaluation
    model.eval()
    val_preds, val_labels = [], []
    # Progress bar for validation evaluation
    val_bar = tqdm(val_loader, desc="Validation Evaluation")
    with torch.no_grad():
        for batch in val_bar:
            images = batch['pixel_values'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(images)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            val_preds.extend(preds.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())

    print(f"\n📊 Validation Classification Report for {model_name}")
    val_report = classification_report(val_labels, val_preds, target_names=label_encoder.classes_)
    print(val_report)

    # Test evaluation
    test_preds, test_labels = [], []
    # Progress bar for test evaluation
    test_bar = tqdm(test_loader, desc="Test Evaluation")
    with torch.no_grad():
        for batch in test_bar:
            images = batch['pixel_values'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(images)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            test_preds.extend(preds.cpu().numpy())
            test_labels.extend(labels.cpu().numpy())

    print(f"\n📊 Test Classification Report for {model_name}")
    test_report = classification_report(test_labels, test_preds, target_names=label_encoder.classes_)
    print(test_report)

    # Save results
    results[model_name] = {
        "val_report": classification_report(val_labels, val_preds, target_names=label_encoder.classes_, output_dict=True),
        "test_report": classification_report(test_labels, test_preds, target_names=label_encoder.classes_, output_dict=True)
    }

    # Clear memory
    del model
    torch.cuda.empty_cache()
    print(f"🧹 Cleared memory after {model_name}")

# Main execution
results = {}
model_names = ["ViT", "SwinTransformer"]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Create datasets and loaders
img_dir = "/kaggle/input/less-data/changed_ds/img"
for model_name in model_names:
    config = model_configs[model_name]
    processor = AutoImageProcessor.from_pretrained(config["checkpoint"])
    train_dataset = ImageDataset(train_df, img_dir, processor=processor)
    val_dataset = ImageDataset(val_df, img_dir, processor=processor)
    test_dataset = ImageDataset(test_df, img_dir, processor=processor)

    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

    # Initialize model with ignore_mismatched_sizes=True
    model = AutoModelForImageClassification.from_pretrained(
        config["checkpoint"],
        num_labels=num_classes,
        id2label=id2label,
        label2id=label2id,
        ignore_mismatched_sizes=True  # Fix for size mismatch
    )

    try:
        train_and_evaluate(model_name, model, train_loader, val_loader, test_loader, num_classes, device)
    except Exception as e:
        print(f"❌ Skipping {model_name} due to error: {e}")

# Compare results
print("\n📊 Model Comparison (Test Metrics)")
for model_name, result in results.items():
    test_acc = result["test_report"]["accuracy"]
    test_f1 = result["test_report"]["weighted avg"]["f1-score"]
    print(f"{model_name}: Accuracy = {test_acc:.4f}, F1 = {test_f1:.4f}")


✅ Dataset loaded. Sample:
      image                                               text  \
0  2001.jpg  আচ্ছা ভাই।\n মেয়েদের থেকে দূরে থাকবা মেয়ের পাল...   
1  2002.jpg  কিসের foodpanda যেখানে আমার সাদিয়া রে অর্ডার ক...   
2  2003.jpg  উপস্থিত sir\n Yes sir\n Present Teacher\n [লাব...   
3  2004.png  আমি হিজাব চাইনি\n তারপর আমার স্বামী আমাকে বোঝালেন   
4  2005.jpg  বাঙ্গু (beta, omega, theta, delta) male\nসুন্দ...   

                          label  
0  stereotype & objectification  
1  stereotype & objectification  
2  stereotype & objectification  
3             violence or abuse  
4              non-misogynistic  
✅ Unique labels: {'non-misogynistic': 0, 'stereotype & objectification': 1, 'violence or abuse': 2}
Label distribution: {'stereotype & objectification': 1591, 'non-misogynistic': 1380, 'violence or abuse': 1030}
✅ Dataset split: Train: 2800, Val: 600, Test: 601
Using device: cuda


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([3, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



🔄 Training ViT...


Epoch 1 Training: 100%|██████████| 175/175 [01:51<00:00,  1.57it/s, loss=0.951]
Epoch 1 Validation: 100%|██████████| 38/38 [00:15<00:00,  2.50it/s, val_loss=0.921]


Epoch 1: Val F1 = 0.5474


Epoch 2 Training: 100%|██████████| 175/175 [01:48<00:00,  1.61it/s, loss=0.557]
Epoch 2 Validation: 100%|██████████| 38/38 [00:14<00:00,  2.64it/s, val_loss=0.897]


Epoch 2: Val F1 = 0.6249


Epoch 3 Training: 100%|██████████| 175/175 [01:48<00:00,  1.61it/s, loss=0.191]
Epoch 3 Validation: 100%|██████████| 38/38 [00:14<00:00,  2.59it/s, val_loss=1.04] 


Epoch 3: Val F1 = 0.6337


Epoch 4 Training: 100%|██████████| 175/175 [01:49<00:00,  1.61it/s, loss=0.0637]
Epoch 4 Validation: 100%|██████████| 38/38 [00:14<00:00,  2.62it/s, val_loss=1.36] 


Epoch 4: Val F1 = 0.6026


Epoch 5 Training: 100%|██████████| 175/175 [01:48<00:00,  1.61it/s, loss=0.0467]
Epoch 5 Validation: 100%|██████████| 38/38 [00:14<00:00,  2.61it/s, val_loss=1.26] 


Epoch 5: Val F1 = 0.6281
Early stopping at epoch 5


Validation Evaluation: 100%|██████████| 38/38 [00:14<00:00,  2.59it/s]



📊 Validation Classification Report for ViT
                              precision    recall  f1-score   support

            non-misogynistic       0.63      0.78      0.70       207
stereotype & objectification       0.70      0.54      0.61       239
           violence or abuse       0.54      0.56      0.55       154

                    accuracy                           0.63       600
                   macro avg       0.62      0.63      0.62       600
                weighted avg       0.63      0.63      0.62       600



Test Evaluation: 100%|██████████| 38/38 [00:15<00:00,  2.43it/s]



📊 Test Classification Report for ViT
                              precision    recall  f1-score   support

            non-misogynistic       0.60      0.79      0.68       207
stereotype & objectification       0.70      0.54      0.61       239
           violence or abuse       0.50      0.46      0.48       155

                    accuracy                           0.61       601
                   macro avg       0.60      0.60      0.59       601
                weighted avg       0.61      0.61      0.60       601

🧹 Cleared memory after ViT


preprocessor_config.json:   0%|          | 0.00/255 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


config.json:   0%|          | 0.00/71.8k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/352M [00:00<?, ?B/s]

Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([3, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



🔄 Training SwinTransformer...


Epoch 1 Training: 100%|██████████| 175/175 [01:58<00:00,  1.47it/s, loss=0.955]
Epoch 1 Validation: 100%|██████████| 38/38 [00:16<00:00,  2.30it/s, val_loss=0.881]


Epoch 1: Val F1 = 0.5639


Epoch 2 Training: 100%|██████████| 175/175 [01:58<00:00,  1.48it/s, loss=0.755]
Epoch 2 Validation: 100%|██████████| 38/38 [00:16<00:00,  2.30it/s, val_loss=0.985]


Epoch 2: Val F1 = 0.4906


Epoch 3 Training: 100%|██████████| 175/175 [01:58<00:00,  1.48it/s, loss=0.507]
Epoch 3 Validation: 100%|██████████| 38/38 [00:16<00:00,  2.31it/s, val_loss=1.01] 


Epoch 3: Val F1 = 0.5952


Epoch 4 Training: 100%|██████████| 175/175 [01:58<00:00,  1.47it/s, loss=0.322]
Epoch 4 Validation: 100%|██████████| 38/38 [00:16<00:00,  2.29it/s, val_loss=1.02] 


Epoch 4: Val F1 = 0.6276


Epoch 5 Training: 100%|██████████| 175/175 [01:58<00:00,  1.48it/s, loss=0.181]
Epoch 5 Validation: 100%|██████████| 38/38 [00:16<00:00,  2.30it/s, val_loss=1.41] 


Epoch 5: Val F1 = 0.6461


Epoch 6 Training: 100%|██████████| 175/175 [01:58<00:00,  1.48it/s, loss=0.146] 
Epoch 6 Validation: 100%|██████████| 38/38 [00:16<00:00,  2.31it/s, val_loss=1.46] 


Epoch 6: Val F1 = 0.6187


Epoch 7 Training: 100%|██████████| 175/175 [01:58<00:00,  1.47it/s, loss=0.0915]
Epoch 7 Validation: 100%|██████████| 38/38 [00:16<00:00,  2.29it/s, val_loss=1.3]  


Epoch 7: Val F1 = 0.6336


Epoch 8 Training: 100%|██████████| 175/175 [01:58<00:00,  1.47it/s, loss=0.0891]
Epoch 8 Validation: 100%|██████████| 38/38 [00:16<00:00,  2.25it/s, val_loss=1.44] 


Epoch 8: Val F1 = 0.6180
Early stopping at epoch 8


Validation Evaluation: 100%|██████████| 38/38 [00:16<00:00,  2.30it/s]



📊 Validation Classification Report for SwinTransformer
                              precision    recall  f1-score   support

            non-misogynistic       0.80      0.64      0.71       207
stereotype & objectification       0.67      0.64      0.66       239
           violence or abuse       0.48      0.64      0.55       154

                    accuracy                           0.64       600
                   macro avg       0.65      0.64      0.64       600
                weighted avg       0.66      0.64      0.65       600



Test Evaluation: 100%|██████████| 38/38 [00:17<00:00,  2.15it/s]


📊 Test Classification Report for SwinTransformer
                              precision    recall  f1-score   support

            non-misogynistic       0.71      0.65      0.68       207
stereotype & objectification       0.67      0.62      0.64       239
           violence or abuse       0.45      0.55      0.49       155

                    accuracy                           0.61       601
                   macro avg       0.61      0.61      0.61       601
                weighted avg       0.63      0.61      0.62       601

🧹 Cleared memory after SwinTransformer

📊 Model Comparison (Test Metrics)
ViT: Accuracy = 0.6057, F1 = 0.6007
SwinTransformer: Accuracy = 0.6123, F1 = 0.6172



