In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from transformers import AutoImageProcessor, AutoConfig, ViTModel, SwinModel, SwinConfig
from torchvision import transforms
from tqdm import tqdm
from collections import Counter

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ================================
# Specify Model Names Here
# ================================
model_names = {
    'vit': 'google/vit-base-patch16-224',
    'swin': 'microsoft/swin-tiny-patch4-window7-224',
}
# ================================

# Load the training data
train_df = pd.read_csv('/kaggle/input/visual-taxonomy/train.csv')

# Filter for 'Sarees' category
train_df = train_df[train_df['Category'] == 'Sarees'].reset_index(drop=True)

# List of attribute columns
attribute_cols = ['attr_7', 'attr_8']

# Image directory
image_dir = '/kaggle/input/visual-taxonomy/train_images'

class TopsAttributeDataset(Dataset):
    def __init__(self, dataframe, image_dir, image_processors, attribute):
        self.data = dataframe.reset_index(drop=True)
        self.image_dir = image_dir
        self.image_processors = image_processors  # Dictionary of image processors
        self.attribute = attribute

        # Create label mapping
        self.labels = sorted(self.data[attribute].unique())
        self.label_to_idx = {label: idx for idx, label in enumerate(self.labels)}
        self.idx_to_label = {idx: label for idx, label in enumerate(self.labels)}
        self.num_classes = len(self.labels)

        # Define transforms
        self.transforms = transforms.Compose([
            transforms.Resize((224, 224)),  # Resize to 256x128 (height x width)
            transforms.RandomCrop((224, 224)),  # RandomCrop
            transforms.RandomHorizontalFlip(),  # RandomHorizontalFlip
        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Load Image
        img_id = self.data.loc[idx, 'id']
        img_path = os.path.join(self.image_dir, f"{str(img_id).zfill(6)}.jpg")
        image = Image.open(img_path).convert("RGB")

        # Apply transforms
        image = self.transforms(image)

        # Preprocess image for each model
        pixel_values = {}
        for key, processor in self.image_processors.items():
            inputs = processor(images=image, return_tensors="pt", do_resize=False)
            pixel_values[key] = inputs['pixel_values'].squeeze(0)

        # Load Label
        label = self.data.loc[idx, self.attribute]
        label_idx = self.label_to_idx[label]
        label_idx = torch.tensor(label_idx, dtype=torch.long)

        return pixel_values, label_idx

class TestTopsDataset(Dataset):
    def __init__(self, dataframe, image_dir, image_processors):
        self.data = dataframe.reset_index(drop=True)
        self.image_dir = image_dir
        self.image_processors = image_processors

        # Define transforms (no augmentation for test data)
        self.transforms = transforms.Compose([
            transforms.Resize((224,224)),  # Resize to 256x128 (height x width)
        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_id = self.data.loc[idx, 'id']
        img_path = os.path.join(self.image_dir, f"{str(img_id).zfill(6)}.jpg")
        image = Image.open(img_path).convert("RGB")

        # Apply transforms
        image = self.transforms(image)

        # Preprocess image for each model
        pixel_values = {}
        for key, processor in self.image_processors.items():
            inputs = processor(images=image, return_tensors="pt", do_resize=False)
            pixel_values[key] = inputs['pixel_values'].squeeze(0)

        return pixel_values, img_id

# Initialize image processors for each model
image_processors = {
    key: AutoImageProcessor.from_pretrained(model_name)
    for key, model_name in model_names.items()
}

# Modify the processors to not resize images
for processor in image_processors.values():
    processor.do_resize = False

# Define Channel-Aware Attention Module
class ChannelAwareAttention(nn.Module):
    def __init__(self, dim):
        super(ChannelAwareAttention, self).__init__()
        self.conv = nn.Conv2d(dim, dim, kernel_size=1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        attention = self.conv(x)
        attention = self.sigmoid(attention)
        return x * attention

# Define the custom model that combines ViT and Swin with cross-fusion
class CustomModel(nn.Module):
    def __init__(self, model_names, num_labels):
        super(CustomModel, self).__init__()
        # Load ViT model
        self.vit = ViTModel.from_pretrained(model_names['vit'])
        # Load Swin model
        self.swin = SwinModel.from_pretrained(model_names['swin'])

        # Get hidden sizes
        self.dim_vit = self.vit.config.hidden_size
        self.dim_swin = self.swin.config.hidden_size

        # Channel-Aware Attention applied to Swin features
        self.channel_attention = ChannelAwareAttention(dim=self.dim_swin)

        # Cross-Fusion Module
        self.cross_attn = nn.MultiheadAttention(embed_dim=self.dim_vit, num_heads=8)

        # Classifier
        self.classifier = nn.Sequential(
            nn.Linear(self.dim_vit, num_labels)
        )

    def forward(self, pixel_values_dict):
        # ViT
        vit_outputs = self.vit(pixel_values=pixel_values_dict['vit'])
        vit_last_hidden_state = vit_outputs.last_hidden_state  # B x N_vit x C_vit

        # Swin
        swin_outputs = self.swin(pixel_values=pixel_values_dict['swin'])
        swin_last_hidden_state = swin_outputs.last_hidden_state  # B x N_swin x C_swin

        # Apply Channel-Aware Attention to Swin features
        B, L_swin, C_swin = swin_last_hidden_state.size()
        H = W = int(L_swin ** 0.5)  # Assuming square feature maps
        swin_features = swin_last_hidden_state.transpose(1, 2).view(B, C_swin, H, W)
        swin_features = self.channel_attention(swin_features)
        swin_features = swin_features.view(B, C_swin, -1).transpose(1, 2)  # B x L_swin x C_swin

        # Cross-Attention: Swin features as key and value, ViT features as query
        vit_features, _ = self.cross_attn(
            query=vit_last_hidden_state.permute(1, 0, 2),  # N_vit x B x C_vit
            key=swin_features.permute(1, 0, 2),            # N_swin x B x C_swin
            value=swin_features.permute(1, 0, 2)          # N_swin x B x C_swin
        )
        vit_features = vit_features.permute(1, 0, 2)  # B x N_vit x C_vit

        # Pooling (mean over sequence length)
        vit_pooled = vit_features.mean(dim=1)  # B x C_vit

        # Classification
        logits = self.classifier(vit_pooled)

        return logits

# Training loop for each attribute
for attribute in attribute_cols:
    print(f"\nTraining model for {attribute}")

    # Drop NaNs only for the current attribute
    df_attr = train_df[train_df[attribute].notna()].reset_index(drop=True)

    if df_attr.empty:
        print(f"No data available for {attribute}, skipping.")
        continue

    # Create the dataset
    dataset = TopsAttributeDataset(df_attr, image_dir, image_processors, attribute)

    # Compute class weights
    labels_list = [dataset.label_to_idx[label] for label in dataset.data[attribute]]
    class_weights = compute_class_weight(
        class_weight='balanced',
        classes=np.arange(len(dataset.labels)),
        y=labels_list
    )
    class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

    # Split into training and validation sets
    train_indices, val_indices = train_test_split(
        np.arange(len(dataset)),
        test_size=0.2,
        stratify=labels_list,
        random_state=42
    )

    train_dataset = torch.utils.data.Subset(dataset, train_indices)
    val_dataset = torch.utils.data.Subset(dataset, val_indices)

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)

    # Instantiate the custom model with the correct number of labels
    num_classes = len(dataset.labels)
    model = CustomModel(model_names=model_names, num_labels=num_classes)

    # Use all available GPUs
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    model = model.to(device)

    # Unfreeze all layers for fine-tuning
    for param in model.parameters():
        param.requires_grad = True

    # Define optimizer and loss function
    optimizer = optim.AdamW(model.parameters(), lr=1e-4)
    criterion = nn.CrossEntropyLoss(weight=class_weights)

    # Learning rate scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, verbose=True)

    # Training loop
    num_epochs = 10  # Adjust based on your computational resources
    best_f1 = 0.0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        with tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}]",
                  leave=False, dynamic_ncols=True, mininterval=0.3) as pbar:
            for pixel_values_dict, labels_batch in pbar:
                # Move data to device
                pixel_values_dict = {k: v.to(device) for k, v in pixel_values_dict.items()}
                labels_batch = labels_batch.to(device)

                optimizer.zero_grad()
                logits = model(pixel_values_dict)
                loss = criterion(logits, labels_batch)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                pbar.set_postfix({'Loss': f'{loss.item():.4f}'})

        epoch_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {epoch_loss:.4f}")

        # Validation
        model.eval()
        val_loss = 0.0
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for pixel_values_dict, labels_batch in val_loader:
                pixel_values_dict = {k: v.to(device) for k, v in pixel_values_dict.items()}
                labels_batch = labels_batch.to(device)
                logits = model(pixel_values_dict)
                loss = criterion(logits, labels_batch)
                val_loss += loss.item()

                _, predicted = torch.max(logits, dim=1)
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels_batch.cpu().numpy())

        val_loss /= len(val_loader)
        f1 = f1_score(all_labels, all_preds, average='macro')
        print(f"Validation Loss: {val_loss:.4f}, F1-Score: {f1:.4f}")

        unique_preds = np.unique(all_preds)
        unique_labels = np.unique(all_labels)
        print(f"Unique Predictions: {unique_preds}")
        print(f"Unique True Labels: {unique_labels}")

        pred_counts = Counter(all_preds)
        label_counts = Counter(all_labels)
        print(f"Prediction Counts: {pred_counts}")
        print(f"True Label Counts: {label_counts}")

        # Scheduler step
        scheduler.step(val_loss)

        # Save the best model
        if f1 > best_f1:
            best_f1 = f1
            model_save_path = f'custom_model_sarees_{attribute}_best.pth'
            # Save state_dict for multi-GPU compatibility
            if torch.cuda.device_count() > 1:
                torch.save(model.module.state_dict(), model_save_path)
            else:
                torch.save(model.state_dict(), model_save_path)
            print(f"Model saved for {attribute} with Validation F1-Score: {best_f1:.4f}")

# Load the test data
test_df = pd.read_csv('/kaggle/input/visual-taxonomy/test.csv')
test_df = test_df[test_df['Category'] == 'Sarees'].reset_index(drop=True)
test_image_dir = '/kaggle/input/visual-taxonomy/test_images'

# Create the test dataset and loader
test_dataset = TestTopsDataset(test_df, test_image_dir, image_processors)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

# Initialize submission DataFrame
submission_df = pd.DataFrame({'id': test_df['id']})
submission_df['Category'] = 'Sarees'
submission_df['len'] = 10

# Prepare dictionaries to hold models and label mappings
models = {}
label_mappings = {}

# Load all models and label mappings
for attribute in attribute_cols:
    model_path = f'custom_model_sarees_{attribute}_best.pth'
    if os.path.exists(model_path):
        print(f"Loading model for {attribute}")
        df_attr = train_df[train_df[attribute].notna()].reset_index(drop=True)
        labels = sorted(df_attr[attribute].unique())
        num_classes = len(labels)
        idx_to_label = {idx: label for idx, label in enumerate(labels)}
        label_mappings[attribute] = idx_to_label

        # Load the custom model with the correct number of labels
        model = CustomModel(model_names=model_names, num_labels=num_classes)
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
            model.module.load_state_dict(torch.load(model_path))
        else:
            model.load_state_dict(torch.load(model_path))
        model = model.to(device)
        model.eval()
        models[attribute] = model
    else:
        print(f"No trained model for {attribute}, filling with 'dummy_value'")
        submission_df[attribute] = 'dummy_value'

# Initialize predictions dictionary
attribute_preds = {attr: [] for attr in models.keys()}

# Initialize single progress bar
total_steps = len(test_loader)
pbar = tqdm(total=total_steps, desc='Generating predictions', ncols=100)

# Make predictions for all attributes in a single loop
with torch.no_grad():
    for pixel_values_dict, img_ids in test_loader:
        pixel_values_dict = {k: v.to(device) for k, v in pixel_values_dict.items()}
        for attribute, model in models.items():
            logits = model(pixel_values_dict)
            _, predicted = torch.max(logits, dim=1)
            idx_to_label = label_mappings[attribute]
            predicted_labels = [idx_to_label[idx.item()] for idx in predicted]
            attribute_preds[attribute].extend(predicted_labels)
        pbar.update(1)

pbar.close()

# Assign predictions to submission DataFrame
for attribute, preds in attribute_preds.items():
    submission_df[attribute] = preds

# Fill any missing attributes with 'dummy_value'
for attr in attribute_cols:
    if attr not in submission_df.columns:
        submission_df[attr] = 'dummy_value'

# Reorder columns as per submission format
submission_df = submission_df[['id', 'Category', 'len'] + attribute_cols]

# Save to CSV
submission_df.to_csv('submission_sarees_custom_model.csv', index=False)
print("\nSubmission file 'submission_sarees_custom_model.csv' created successfully.")

# Display the submission DataFrame
submission_df.head()


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


preprocessor_config.json:   0%|          | 0.00/255 [00:00<?, ?B/s]


Training model for attr_7


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


config.json:   0%|          | 0.00/71.8k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/113M [00:00<?, ?B/s]

  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [1/10], Training Loss: 1.0699




Validation Loss: 1.0144, F1-Score: 0.4915
Unique Predictions: [0 1 2 3]
Unique True Labels: [0 1 2 3]
Prediction Counts: Counter({2: 605, 0: 480, 3: 455, 1: 240})
True Label Counts: Counter({2: 745, 3: 536, 1: 422, 0: 77})
Model saved for attr_7 with Validation F1-Score: 0.4915


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [2/10], Training Loss: 0.9807




Validation Loss: 0.9742, F1-Score: 0.5476
Unique Predictions: [0 1 2 3]
Unique True Labels: [0 1 2 3]
Prediction Counts: Counter({2: 605, 3: 527, 1: 351, 0: 297})
True Label Counts: Counter({2: 745, 3: 536, 1: 422, 0: 77})
Model saved for attr_7 with Validation F1-Score: 0.5476


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [3/10], Training Loss: 0.9304




Validation Loss: 0.9523, F1-Score: 0.5305
Unique Predictions: [0 1 2 3]
Unique True Labels: [0 1 2 3]
Prediction Counts: Counter({2: 563, 3: 485, 1: 403, 0: 329})
True Label Counts: Counter({2: 745, 3: 536, 1: 422, 0: 77})


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [4/10], Training Loss: 0.9018




Validation Loss: 0.9946, F1-Score: 0.5499
Unique Predictions: [0 1 2 3]
Unique True Labels: [0 1 2 3]
Prediction Counts: Counter({2: 610, 3: 554, 1: 348, 0: 268})
True Label Counts: Counter({2: 745, 3: 536, 1: 422, 0: 77})
Model saved for attr_7 with Validation F1-Score: 0.5499


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [5/10], Training Loss: 0.8819




Validation Loss: 1.0322, F1-Score: 0.5424
Unique Predictions: [0 1 2 3]
Unique True Labels: [0 1 2 3]
Prediction Counts: Counter({3: 692, 2: 540, 1: 367, 0: 181})
True Label Counts: Counter({2: 745, 3: 536, 1: 422, 0: 77})


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [6/10], Training Loss: 0.8585




Validation Loss: 0.9806, F1-Score: 0.5449
Unique Predictions: [0 1 2 3]
Unique True Labels: [0 1 2 3]
Prediction Counts: Counter({3: 706, 2: 538, 1: 372, 0: 164})
True Label Counts: Counter({2: 745, 3: 536, 1: 422, 0: 77})


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [7/10], Training Loss: 0.8133




Validation Loss: 0.9828, F1-Score: 0.5534
Unique Predictions: [0 1 2 3]
Unique True Labels: [0 1 2 3]
Prediction Counts: Counter({2: 611, 3: 561, 1: 374, 0: 234})
True Label Counts: Counter({2: 745, 3: 536, 1: 422, 0: 77})
Model saved for attr_7 with Validation F1-Score: 0.5534


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [8/10], Training Loss: 0.8063




Validation Loss: 0.9883, F1-Score: 0.5616
Unique Predictions: [0 1 2 3]
Unique True Labels: [0 1 2 3]
Prediction Counts: Counter({2: 613, 3: 572, 1: 366, 0: 229})
True Label Counts: Counter({2: 745, 3: 536, 1: 422, 0: 77})
Model saved for attr_7 with Validation F1-Score: 0.5616


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [9/10], Training Loss: 0.7982




Validation Loss: 0.9955, F1-Score: 0.5625
Unique Predictions: [0 1 2 3]
Unique True Labels: [0 1 2 3]
Prediction Counts: Counter({2: 614, 3: 524, 1: 417, 0: 225})
True Label Counts: Counter({2: 745, 3: 536, 1: 422, 0: 77})
Model saved for attr_7 with Validation F1-Score: 0.5625


  self.pid = os.fork()
  self.pid = os.fork()
                                                                             

Epoch [10/10], Training Loss: 0.7946




Validation Loss: 0.9921, F1-Score: 0.5597
Unique Predictions: [0 1 2 3]
Unique True Labels: [0 1 2 3]
Prediction Counts: Counter({2: 614, 3: 520, 1: 420, 0: 226})
True Label Counts: Counter({2: 745, 3: 536, 1: 422, 0: 77})

Training model for attr_8


Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [1/10], Training Loss: 0.9894




Validation Loss: 1.0047, F1-Score: 0.2863
Unique Predictions: [0 1 2 3 4]
Unique True Labels: [0 1 2 3 4]
Prediction Counts: Counter({4: 1828, 0: 765, 3: 439, 1: 255, 2: 6})
True Label Counts: Counter({4: 2707, 2: 221, 3: 161, 0: 158, 1: 46})
Model saved for attr_8 with Validation F1-Score: 0.2863


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [2/10], Training Loss: 0.8643




Validation Loss: 0.7887, F1-Score: 0.5086
Unique Predictions: [0 1 2 3 4]
Unique True Labels: [0 1 2 3 4]
Prediction Counts: Counter({4: 1824, 0: 815, 2: 405, 3: 139, 1: 110})
True Label Counts: Counter({4: 2707, 2: 221, 3: 161, 0: 158, 1: 46})
Model saved for attr_8 with Validation F1-Score: 0.5086


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [3/10], Training Loss: 0.8219




Validation Loss: 0.8091, F1-Score: 0.4871
Unique Predictions: [0 1 2 3 4]
Unique True Labels: [0 1 2 3 4]
Prediction Counts: Counter({4: 1815, 0: 731, 2: 408, 3: 201, 1: 138})
True Label Counts: Counter({4: 2707, 2: 221, 3: 161, 0: 158, 1: 46})


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [4/10], Training Loss: 0.7893




Validation Loss: 0.8019, F1-Score: 0.5168
Unique Predictions: [0 1 2 3 4]
Unique True Labels: [0 1 2 3 4]
Prediction Counts: Counter({4: 1894, 0: 735, 2: 415, 1: 130, 3: 119})
True Label Counts: Counter({4: 2707, 2: 221, 3: 161, 0: 158, 1: 46})
Model saved for attr_8 with Validation F1-Score: 0.5168


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [5/10], Training Loss: 0.7585




Validation Loss: 0.7786, F1-Score: 0.5148
Unique Predictions: [0 1 2 3 4]
Unique True Labels: [0 1 2 3 4]
Prediction Counts: Counter({4: 1830, 0: 810, 2: 408, 3: 129, 1: 116})
True Label Counts: Counter({4: 2707, 2: 221, 3: 161, 0: 158, 1: 46})


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [6/10], Training Loss: 0.7660




Validation Loss: 0.7910, F1-Score: 0.5189
Unique Predictions: [0 1 2 3 4]
Unique True Labels: [0 1 2 3 4]
Prediction Counts: Counter({4: 1860, 0: 768, 2: 411, 1: 136, 3: 118})
True Label Counts: Counter({4: 2707, 2: 221, 3: 161, 0: 158, 1: 46})
Model saved for attr_8 with Validation F1-Score: 0.5189


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [7/10], Training Loss: 0.7324




Validation Loss: 0.8696, F1-Score: 0.5163
Unique Predictions: [0 1 2 3 4]
Unique True Labels: [0 1 2 3 4]
Prediction Counts: Counter({4: 1874, 0: 770, 2: 416, 3: 123, 1: 110})
True Label Counts: Counter({4: 2707, 2: 221, 3: 161, 0: 158, 1: 46})


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [8/10], Training Loss: 0.7159




Validation Loss: 0.8603, F1-Score: 0.5230
Unique Predictions: [0 1 2 3 4]
Unique True Labels: [0 1 2 3 4]
Prediction Counts: Counter({4: 2095, 0: 551, 2: 410, 3: 121, 1: 116})
True Label Counts: Counter({4: 2707, 2: 221, 3: 161, 0: 158, 1: 46})
Model saved for attr_8 with Validation F1-Score: 0.5230


  self.pid = os.fork()
  self.pid = os.fork()
                                                                            

Epoch [9/10], Training Loss: 0.6899




Validation Loss: 0.7907, F1-Score: 0.5173
Unique Predictions: [0 1 2 3 4]
Unique True Labels: [0 1 2 3 4]
Prediction Counts: Counter({4: 1880, 0: 743, 2: 413, 1: 136, 3: 121})
True Label Counts: Counter({4: 2707, 2: 221, 3: 161, 0: 158, 1: 46})


  self.pid = os.fork()
  self.pid = os.fork()
                                                                             

Epoch [10/10], Training Loss: 0.6593




Validation Loss: 0.8247, F1-Score: 0.5142
Unique Predictions: [0 1 2 3 4]
Unique True Labels: [0 1 2 3 4]
Prediction Counts: Counter({4: 1875, 0: 749, 2: 410, 1: 137, 3: 122})
True Label Counts: Counter({4: 2707, 2: 221, 3: 161, 0: 158, 1: 46})
Loading model for attr_7


Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  model.load_state_dict(torch.load(model_path))


Loading model for attr_8


Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  self.pid = os.fork()
  self.pid = os.fork()
Generating predictions: 100%|█████████████████████████████████████| 111/111 [02:02<00:00,  1.10s/it]


Submission file 'submission_sarees_custom_model.csv' created successfully.





Unnamed: 0,id,Category,len,attr_7,attr_8
0,3787,Sarees,10,same as saree,default
1,3788,Sarees,10,zari woven,default
2,3789,Sarees,10,default,solid
3,3790,Sarees,10,same as saree,zari woven
4,3791,Sarees,10,zari woven,default


In [2]:
# # Load the test data
# test_df = pd.read_csv('/kaggle/input/visual-taxonomy/test.csv')
# test_df = test_df[test_df['Category'] == 'Sarees'].reset_index(drop=True)
# test_image_dir = '/kaggle/input/visual-taxonomy/test_images'

# # Create the test dataset and loader
# test_dataset = TestTopsDataset(test_df, test_image_dir, image_processors)
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

# # Initialize submission DataFrame
# submission_df = pd.DataFrame({'id': test_df['id']})
# submission_df['Category'] = 'Women Tops & Tunics'
# submission_df['len'] = 10

# # Prepare dictionaries to hold models and label mappings
# models = {}
# label_mappings = {}

# # Load all models and label mappings
# for attribute in attribute_cols:
#     model_path = f'custom_model_sarees_{attribute}_best.pth'
#     if os.path.exists(model_path):
#         print(f"Loading model for {attribute}")
#         df_attr = train_df[train_df[attribute].notna()].reset_index(drop=True)
#         labels = sorted(df_attr[attribute].unique())
#         num_classes = len(labels)
#         idx_to_label = {idx: label for idx, label in enumerate(labels)}
#         label_mappings[attribute] = idx_to_label

#         # Load the custom model with the correct number of labels
#         model = CustomModel(model_names=model_names, num_labels=num_classes)
#         if torch.cuda.device_count() > 1:
#             model = nn.DataParallel(model)
#             model.module.load_state_dict(torch.load(model_path))
#         else:
#             model.load_state_dict(torch.load(model_path))
#         model = model.to(device)
#         model.eval()
#         models[attribute] = model
#     else:
#         print(f"No trained model for {attribute}, filling with 'dummy_value'")
#         submission_df[attribute] = 'dummy_value'

# # Initialize predictions dictionary
# attribute_preds = {attr: [] for attr in models.keys()}

# # Initialize single progress bar
# total_steps = len(test_loader)
# pbar = tqdm(total=total_steps, desc='Generating predictions', ncols=100)

# with torch.no_grad():
#     for pixel_values_dict, img_ids in test_loader:
#         pixel_values_dict = {k: v.to(device) for k, v in pixel_values_dict.items()}
#         for attribute, model in models.items():
#             logits = model(pixel_values_dict)
#             _, predicted = torch.max(logits, dim=1)
#             idx_to_label = label_mappings[attribute]
#             predicted_labels = [idx_to_label[idx.item()] for idx in predicted]
#             attribute_preds[attribute].extend(predicted_labels)
#         pbar.update(1)

# pbar.close()

# # Assign predictions to submission DataFrame
# for attribute, preds in attribute_preds.items():
#     submission_df[attribute] = preds

# # Fill any missing attributes with 'dummy_value'
# for attr in attribute_cols:
#     if attr not in submission_df.columns:
#         submission_df[attr] = 'dummy_value'

# # Reorder columns as per submission format
# submission_df = submission_df[['id', 'Category', 'len'] + attribute_cols]

# # Save to CSV
# submission_df.to_csv('submission_tops_custom_model.csv', index=False)
# print("\nSubmission file 'submission_tops_custom_model.csv' created successfully.")

In [3]:
submission_df

Unnamed: 0,id,Category,len,attr_7,attr_8
0,3787,Sarees,10,same as saree,default
1,3788,Sarees,10,zari woven,default
2,3789,Sarees,10,default,solid
3,3790,Sarees,10,same as saree,zari woven
4,3791,Sarees,10,zari woven,default
...,...,...,...,...,...
7097,11150,Sarees,10,zari woven,default
7098,11151,Sarees,10,same as saree,default
7099,11152,Sarees,10,zari woven,zari woven
7100,11153,Sarees,10,same as saree,zari woven
