In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import os
from PIL import Image
from sklearn.metrics import accuracy_score, f1_score
import numpy as np
import torch.nn.functional as F

In [2]:
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

In [3]:
df_train = pd.read_csv('/kaggle/input/visual-taxonomy/train.csv')
df_test = pd.read_csv('/kaggle/input/visual-taxonomy/test.csv')

In [5]:
df_train = df_train[:1000]

<h1>For Women Tops & Tunics</h1>

In [4]:
df_train = df_train[df_train['Category'] == 'Women Tops & Tunics']

In [6]:
df_train.drop(columns=['Category'])

Unnamed: 0,id,len,attr_1,attr_2,attr_3,attr_4,attr_5,attr_6,attr_7,attr_8,attr_9,attr_10
51209,51375,10,black,regular,,,,,,,regular sleeves,
51210,51376,10,navy blue,fitted,crop,high,casual,default,solid,short sleeves,default,knitted
51211,51377,10,red,regular,regular,round neck,casual,printed,typography,sleeveless,sleeveless,
51212,51378,10,default,fitted,crop,stylised,casual,solid,solid,short sleeves,regular sleeves,default
51213,51379,10,default,boxy,regular,round neck,casual,printed,typography,short sleeves,default,
...,...,...,...,...,...,...,...,...,...,...,...,...
52204,52370,10,red,default,crop,round neck,,printed,,sleeveless,sleeveless,
52205,52371,10,white,regular,regular,round neck,casual,printed,graphic,short sleeves,regular sleeves,tie-ups
52206,52372,10,,,,square neck,,,,short sleeves,,
52207,52373,10,peach,fitted,regular,round neck,casual,solid,solid,three-quarter sleeves,,knitted


In [7]:
class LabelEncoderDict:
    def __init__(self):
        self.encoders = {}
        
    def fit(self, df, columns):
        """Fit label encoders for each column"""
        for col in columns:
            le = LabelEncoder()
            # Include NaN as a unique label by appending it to valid labels
            valid_labels = df[col].dropna().unique().tolist()
            valid_labels.append('NaN')  # Assign a label for NaN
            le.fit(valid_labels)
            self.encoders[col] = le
            
    def transform(self, df, columns):
        """Transform labels using fitted encoders"""
        encoded = np.zeros((len(df), len(columns)))
        for i, col in enumerate(columns):
            series = df[col].copy()
            # Replace NaNs with the string 'NaN' so they can be encoded
            series = series.fillna('NaN')
            encoded[:, i] = self.encoders[col].transform(series)
        return encoded
    
    def get_num_classes(self, column):
        """Get number of classes for a specific column"""
        return len(self.encoders[column].classes_)


class MultiLabelImageDataset(Dataset):
    def __init__(self, df, image_dir, transform=None, attr_columns=None):
        """
        Args:
            df: DataFrame containing image names and attributes
            image_dir: Directory containing the images
            transform: torchvision transforms
            attr_columns: List of column names for attributes
        """
        self.df = df
        self.image_dir = image_dir
        self.transform = transform
        self.attr_columns = attr_columns if attr_columns else [f'attr_{i}' for i in range(1, 11)]
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        # Get image path
        img_name = str(self.df.iloc[idx]['id']).zfill(6)
        img_path = os.path.join(self.image_dir, f"{img_name}.jpg")
        
        # Load image
        try:
            image = Image.open(img_path).convert('RGB')
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            image = Image.new('RGB', (224, 224))  # Blank image on error
        
        if self.transform:
            image = self.transform(image)
        
        # Ensure labels are integers and convert to tensor
        labels = torch.tensor(self.df.iloc[idx][self.attr_columns].astype(int).values, dtype=torch.long)
        
        return image, labels


def prepare_data(df, image_dir, batch_size=32, test_size=0.2):
    """
    Prepare data loaders and label encoders
    """
    # Define attribute columns
    attr_columns = [f'attr_{i}' for i in range(1, 11)]
    
    # Create and fit label encoders
    label_encoders = LabelEncoderDict()
    label_encoders.fit(df, attr_columns)
    
    # Transform labels
    encoded_labels = label_encoders.transform(df, attr_columns)
    df_encoded = df.copy()
    for i, col in enumerate(attr_columns):
        df_encoded[col] = encoded_labels[:, i]
    
    # Split data
    train_df, val_df = train_test_split(df_encoded, test_size=test_size, random_state=42)
    
    # Define transforms
    transform = transforms.Compose([
        transforms.Resize((256, 512)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])
    ])
    
    # Create datasets
    train_dataset = MultiLabelImageDataset(
        train_df,
        image_dir,
        transform=transform,
        attr_columns=attr_columns
    )
    
    val_dataset = MultiLabelImageDataset(
        val_df,
        image_dir,
        transform=transform,
        attr_columns=attr_columns
    )
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    
    # Get number of classes for each attribute
    num_classes_per_attr = [label_encoders.get_num_classes(col) for col in attr_columns]
    
    return train_loader, val_loader, label_encoders, num_classes_per_attr

class MultiLabelClassifier(nn.Module):
    def __init__(self, num_classes_per_attr, pretrained=True):
        super(MultiLabelClassifier, self).__init__()
        self.backbone = models.resnet50(pretrained=pretrained)
        num_features = self.backbone.fc.in_features
        self.backbone = torch.nn.Sequential(*(list(self.backbone.children())[:-1]))
        
        # Create separate classifier heads for each attribute
        self.classifier_heads = nn.ModuleList([
            nn.Sequential(
                nn.Linear(num_features, 512),
                nn.ReLU(),
                nn.Dropout(0.3),
                nn.Linear(512, num_classes)
            ) for num_classes in num_classes_per_attr
        ])

    def forward(self, x):
        features = self.backbone(x)
        features = features.view(features.size(0), -1)
        return [head(features) for head in self.classifier_heads]

class MultiLabelCELoss(nn.Module):
    def __init__(self):
        super(MultiLabelCELoss, self).__init__()
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, outputs, targets):
        # outputs is a list of predictions for each label
        # targets is a tensor of shape (batch_size, num_labels)
        loss = 0
        for i, output in enumerate(outputs):
            loss += self.criterion(output, targets[:, i])
        return loss / len(outputs)

In [8]:
class MultiLabelClassifier(nn.Module):
    def __init__(self, num_classes_per_attr, pretrained=True):
        super(MultiLabelClassifier, self).__init__()
        # Use a stronger backbone
        self.backbone = models.efficientnet_b2(pretrained=pretrained)
        num_features = self.backbone.classifier[1].in_features
        self.backbone = torch.nn.Sequential(*(list(self.backbone.children())[:-1]))
        
        # Add batch normalization and feature extraction layers
        self.feature_extractor = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.BatchNorm1d(num_features),
            nn.Dropout(0.5),
            nn.Linear(num_features, 1024),
            nn.ReLU(),
            nn.BatchNorm1d(1024),
            nn.Dropout(0.4)
        )
        
        # Create separate classifier heads with shared features
        self.shared_features = nn.Sequential(
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(0.3)
        )
        
        # Classifier heads with attention mechanism
        self.classifier_heads = nn.ModuleList([
            nn.Sequential(
                nn.Linear(512, 256),
                nn.ReLU(),
                nn.BatchNorm1d(256),
                nn.Dropout(0.2),
                nn.Linear(256, num_classes)
            ) for num_classes in num_classes_per_attr
        ])
        
        # Attention modules for each attribute
        self.attention_modules = nn.ModuleList([
            nn.Sequential(
                nn.Linear(512, 1),
                nn.Sigmoid()
            ) for _ in num_classes_per_attr
        ])

    def forward(self, x):
        # Extract features
        features = self.backbone(x)
        features = self.feature_extractor(features)
        
        # Get shared features
        shared = self.shared_features(features)
        
        # Apply attention and get predictions
        outputs = []
        for attention_module, classifier_head in zip(self.attention_modules, self.classifier_heads):
            # Apply attention
            attention_weights = attention_module(shared)
            attended_features = shared * attention_weights
            
            # Get predictions
            output = classifier_head(attended_features)
            outputs.append(output)
            
        return outputs

class WeightedMultiLabelLoss(nn.Module):
    def __init__(self, num_classes_per_attr, device):
        super(WeightedMultiLabelLoss, self).__init__()
        self.num_attributes = len(num_classes_per_attr)
        self.class_weights = [None] * self.num_attributes
        self.device = device
        
    def update_weights(self, dataset):
        """Update class weights based on class distribution"""
        for i in range(self.num_attributes):
            labels = [item[1][i].item() for item in dataset]
            class_counts = torch.bincount(torch.tensor(labels))
            weights = 1.0 / class_counts.float()
            weights = weights / weights.sum()
            self.class_weights[i] = weights.to(self.device)
    
    def forward(self, outputs, targets):
        loss = 0
        batch_size = targets.size(0)
        
        for i, (output, weights) in enumerate(zip(outputs, self.class_weights)):
            # Cross entropy with class weights
            ce_loss = F.cross_entropy(output, targets[:, i], weight=weights)
            
            # Focal loss component
            pt = torch.exp(-ce_loss)
            focal_loss = (1 - pt) ** 2 * ce_loss
            
            # Add label smoothing
            smooth_loss = -torch.mean(torch.log_softmax(output, dim=1).mean(dim=1))
            
            # Combine losses
            combined_loss = 0.8 * focal_loss + 0.1 * ce_loss + 0.1 * smooth_loss
            loss += combined_loss
            
        return loss / self.num_attributes

def train_model(model, train_loader, val_loader, num_epochs, num_classes_per_attr):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    # Initialize loss and optimizer
    criterion = WeightedMultiLabelLoss(num_classes_per_attr, device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-2)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer, 
        max_lr=1e-3,
        epochs=num_epochs,
        steps_per_epoch=len(train_loader)
    )
    
    # Update class weights based on training data
    criterion.update_weights(train_loader.dataset)
    
    # Early stopping
    best_val_loss = float('inf')
    patience = 5
    patience_counter = 0
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0
        correct_predictions = [0] * len(num_classes_per_attr)
        total_predictions = 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            # Mixed precision training
            with torch.cuda.amp.autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)
            
            optimizer.zero_grad()
            loss.backward()
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            scheduler.step()
            
            train_loss += loss.item()
            
            # Calculate accuracy for each attribute
            for i, output in enumerate(outputs):
                _, predicted = torch.max(output.data, 1)
                correct_predictions[i] += (predicted == labels[:, i]).sum().item()
            total_predictions += labels.size(0)
        
        # Validation phase
        model.eval()
        val_loss = 0
        val_correct_predictions = [0] * len(num_classes_per_attr)
        val_total_predictions = 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
                for i, output in enumerate(outputs):
                    _, predicted = torch.max(output.data, 1)
                    val_correct_predictions[i] += (predicted == labels[:, i]).sum().item()
                val_total_predictions += labels.size(0)
        
        # Print metrics
        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'Training Loss: {train_loss/len(train_loader):.4f}')
        print(f'Validation Loss: {val_loss/len(val_loader):.4f}')
        
        # Initialize total correct predictions
        total_correct_predictions = 0
        total_val_correct_predictions = 0
        
        for i in range(len(num_classes_per_attr)):
            train_acc = 100 * correct_predictions[i] / total_predictions
            val_acc = 100 * val_correct_predictions[i] / val_total_predictions
            print(f'Attribute {i+1} - Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%')
        
            # Sum correct predictions for overall accuracy
            total_correct_predictions += correct_predictions[i]
            total_val_correct_predictions += val_correct_predictions[i]
        
        # Calculate overall accuracy
        overall_train_acc = 100 * total_correct_predictions / (total_predictions * len(num_classes_per_attr))
        overall_val_acc = 100 * total_val_correct_predictions / (val_total_predictions * len(num_classes_per_attr))
        
        # Print overall accuracy
        print(f'Overall Train Accuracy: {overall_train_acc:.2f}%')
        print(f'Overall Validation Accuracy: {overall_val_acc:.2f}%')
        
        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            # Save best model
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered")
                break

In [9]:
def main(df):
    
    # Set image directory
    image_dir = '/kaggle/input/visual-taxonomy/train_images'
    
    # Prepare data
    train_loader, val_loader, label_encoders, num_classes_per_attr = prepare_data(
        df, 
        image_dir,
        batch_size=64
    )
    
    # Initialize model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # Initialize the model
    model = MultiLabelClassifier(num_classes_per_attr)
    
    # Define loss and optimizer
    criterion = MultiLabelCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Train the model
    train_model(model, train_loader, val_loader, num_epochs=50, num_classes_per_attr=num_classes_per_attr)

    # Save label encoders for future use
    import pickle
    with open('label_encoders.pkl', 'wb') as f:
        pickle.dump(label_encoders, f)

In [10]:
main(df_train)

Downloading: "https://download.pytorch.org/models/efficientnet_b2_rwightman-c35c1473.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b2_rwightman-c35c1473.pth
100%|██████████| 35.2M/35.2M [00:01<00:00, 36.4MB/s]


Epoch 1/50
Training Loss: 1.4367
Validation Loss: 1.2593
Attribute 1 - Train Acc: 7.00%, Val Acc: 6.00%
Attribute 2 - Train Acc: 23.25%, Val Acc: 9.00%
Attribute 3 - Train Acc: 33.12%, Val Acc: 31.50%
Attribute 4 - Train Acc: 11.50%, Val Acc: 3.50%
Attribute 5 - Train Acc: 32.00%, Val Acc: 69.50%
Attribute 6 - Train Acc: 25.75%, Val Acc: 25.50%
Attribute 7 - Train Acc: 15.50%, Val Acc: 8.50%
Attribute 8 - Train Acc: 22.50%, Val Acc: 42.50%
Attribute 9 - Train Acc: 20.38%, Val Acc: 15.50%
Attribute 10 - Train Acc: 13.62%, Val Acc: 5.50%
Overall Train Accuracy: 20.46%
Overall Validation Accuracy: 21.70%
Epoch 2/50
Training Loss: 1.4054
Validation Loss: 1.2598
Attribute 1 - Train Acc: 7.00%, Val Acc: 10.00%
Attribute 2 - Train Acc: 22.50%, Val Acc: 11.00%
Attribute 3 - Train Acc: 35.50%, Val Acc: 33.50%
Attribute 4 - Train Acc: 11.88%, Val Acc: 3.50%
Attribute 5 - Train Acc: 37.25%, Val Acc: 54.50%
Attribute 6 - Train Acc: 28.62%, Val Acc: 31.00%
Attribute 7 - Train Acc: 16.62%, Val Acc: 