In [None]:
import pandas as pd
import numpy as np

In [None]:
# !pip install --upgrade transformers accelerate

In [None]:
train_df = pd.read_csv('/kaggle/input/visual-taxonomy/train.csv')

In [None]:
train_df

In [None]:
train_df['Category'].value_counts()

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from transformers import AutoFeatureExtractor, AutoModelForImageClassification
from tqdm import tqdm
from collections import Counter

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the training data
train_df = pd.read_csv('/kaggle/input/visual-taxonomy/train.csv')

# Filter for Sarees category
train_df = train_df[train_df['Category'] == 'Men Tshirts'].reset_index(drop=True)

# List of attribute columns
attribute_cols = ['attr_1', 'attr_2', 'attr_3', 'attr_4', 'attr_5']

# Image directory
image_dir = '/kaggle/input/visual-taxonomy/train_images'


In [None]:
class TopsAttributeDataset(Dataset):
    def __init__(self, dataframe, image_dir, feature_extractor, attribute):
        self.data = dataframe.reset_index(drop=True)
        self.image_dir = image_dir
        self.feature_extractor = feature_extractor
        self.attribute = attribute

        # Create label mapping
        self.labels = sorted(self.data[attribute].unique())
        self.label_to_idx = {label: idx for idx, label in enumerate(self.labels)}
        self.idx_to_label = {idx: label for idx, label in enumerate(self.labels)}
        self.num_classes = len(self.labels)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Load Image
        img_id = self.data.loc[idx, 'id']
        img_path = os.path.join(self.image_dir, f"{str(img_id).zfill(6)}.jpg")
        image = Image.open(img_path).convert("RGB")

        # Preprocess image
        inputs = self.feature_extractor(images=image, return_tensors="pt")

        # Load Label
        label = self.data.loc[idx, self.attribute]
        label_idx = self.label_to_idx[label]
        label_idx = torch.tensor(label_idx, dtype=torch.long)

        return inputs['pixel_values'].squeeze(0), label_idx


In [None]:
class TestTopsDataset(Dataset):
    def __init__(self, dataframe, image_dir, feature_extractor):
        self.data = dataframe.reset_index(drop=True)
        self.image_dir = image_dir
        self.feature_extractor = feature_extractor

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_id = self.data.loc[idx, 'id']
        img_path = os.path.join(self.image_dir, f"{str(img_id).zfill(6)}.jpg")
        image = Image.open(img_path).convert("RGB")

        # Preprocess image
        inputs = self.feature_extractor(images=image, return_tensors="pt")

        return inputs['pixel_values'].squeeze(0), img_id


In [None]:
model_name = './pvt_v2_b2'  
from transformers import AutoProcessor
feature_extractor = AutoProcessor.from_pretrained(model_name)

In [None]:
print(torch.cuda.device_count())

In [None]:
# Training loop for each attribute
for attribute in attribute_cols:
    print(f"\nTraining model for {attribute}")

    # Drop NaNs only for the current attribute
    df_attr = train_df[train_df[attribute].notna()].reset_index(drop=True)

    if df_attr.empty:
        print(f"No data available for {attribute}, skipping.")
        continue

    # Create the dataset
    dataset = TopsAttributeDataset(df_attr, image_dir, feature_extractor, attribute)

    # Compute class weights
    labels_list = [dataset.label_to_idx[label] for label in dataset.data[attribute]]
    class_weights = compute_class_weight(
        class_weight='balanced',
        classes=np.arange(len(dataset.labels)),
        y=labels_list
    )
    class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

    # Split into training and validation sets
    train_indices, val_indices = train_test_split(
        np.arange(len(dataset)),
        test_size=0.2,
        stratify=labels_list,
        random_state=42
    )

    train_dataset = torch.utils.data.Subset(dataset, train_indices)
    val_dataset = torch.utils.data.Subset(dataset, val_indices)

    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=4, pin_memory=True)

    # Instantiate the model with the correct number of labels
    num_classes = len(dataset.labels)
    model = AutoModelForImageClassification.from_pretrained(
        model_name,
        num_labels=num_classes,
        ignore_mismatched_sizes=True
    )

    # Use all available GPUs
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    model = model.to(device)

    # Unfreeze all layers for fine-tuning
    for param in model.parameters():
        param.requires_grad = True

    # Define optimizer and loss function
    optimizer = optim.AdamW(model.parameters(), lr=1e-4)
    criterion = nn.CrossEntropyLoss(weight=class_weights)

    # Training loop
    num_epochs = 10
    best_f1 = 0.0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        with tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}]",
                  leave=False, dynamic_ncols=True, mininterval=0.3) as pbar:
            for pixel_values, labels_batch in pbar:
                pixel_values = pixel_values.to(device)
                labels_batch = labels_batch.to(device)

                optimizer.zero_grad()
                outputs = model(pixel_values=pixel_values)
                logits = outputs.logits
                loss = criterion(logits, labels_batch)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                pbar.set_postfix({'Loss': f'{loss.item():.4f}'})

        epoch_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {epoch_loss:.4f}")

        # Validation
        model.eval()
        val_loss = 0.0
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for pixel_values, labels_batch in val_loader:
                pixel_values = pixel_values.to(device)
                labels_batch = labels_batch.to(device)
                outputs = model(pixel_values=pixel_values)
                logits = outputs.logits
                loss = criterion(logits, labels_batch)
                val_loss += loss.item()

                _, predicted = torch.max(logits, 1)
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels_batch.cpu().numpy())

        val_loss /= len(val_loader)
        f1 = f1_score(all_labels, all_preds, average='macro')
        print(f"Validation Loss: {val_loss:.4f}, F1-Score: {f1:.4f}")

        unique_preds = np.unique(all_preds)
        unique_labels = np.unique(all_labels)
        print(f"Unique Predictions: {unique_preds}")
        print(f"Unique True Labels: {unique_labels}")

        pred_counts = Counter(all_preds)
        label_counts = Counter(all_labels)
        print(f"Prediction Counts: {pred_counts}")
        print(f"True Label Counts: {label_counts}")

        # Save the best model
        if f1 > best_f1:
            best_f1 = f1
            model_save_path = f'pvt_classifier_men_tshirts_{attribute}_best.pth'
            # Save state_dict for multi-GPU compatibility
            torch.save(model.module.state_dict(), model_save_path)
            print(f"Model saved for {attribute} with Validation F1-Score: {best_f1:.4f}")