In [2]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim


In [None]:
import pandas as pd
import os


csv_path = '/kaggle/input/fashion-product-images-dataset/fashion-dataset/styles.csv'# the path of the  CSV file in the kaggle dataset
image_dir = '/kaggle/input/fashion-product-images-dataset/fashion-dataset/images' # the path of the images folder in the kaggle dataset

df = pd.read_csv(csv_path, on_bad_lines='skip', engine='python')


df.columns = df.columns.str.strip()


available_images = set([f.split('.')[0] for f in os.listdir(image_dir)])
df = df[df['id'].astype(str).isin(available_images)].reset_index(drop=True)

# encode labels
from sklearn.preprocessing import LabelEncoder
label_encoders = {}
for col in ['baseColour', 'articleType', 'season', 'gender']:
    le = LabelEncoder()
    df[col + '_enc'] = le.fit_transform(df[col])
    label_encoders[col] = le

from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, shuffle=True)
train_df = train_df.reset_index(drop=True)
val_df   = val_df.reset_index(drop=True)

print("Train size:", len(train_df))
print("Validation size:", len(val_df))


Train size: 35535
Validation size: 8884


# Dataset Creation
 I had created the fashion dataset having image and labels,a dictionary contain the season,basecolor,gender,articleType  and applied the data augmentation to the images for training and validation data having 128 images as a batch 

In [4]:
class FashionDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.df = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.image_dir, str(row['id']) + '.jpg')
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)

        labels = {
            'baseColour': row['baseColour_enc'],
            'articleType': row['articleType_enc'],
            'season': row['season_enc'],
            'gender': row['gender_enc'],
        }
        return image, labels


In [10]:
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
])
transform_val = transforms.Compose([
    transforms.Resize((224, 224)),             # resize for validation (no randomness)
    transforms.ToTensor(),
    
])

train_dataset = FashionDataset(train_df, image_dir, train_transforms)
val_dataset = FashionDataset(val_df, image_dir, transform_val)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=4)


# Model Defination

#### for this i used the ResNet50 as a base model(removed the final layer).After feature extraction,I applied the  averagepooling and after that there is a feed forwad layer with relu activation function ,batchnormalization for normalizing thr batch values and a dropout to prevent overfitting.
#### The output of the feed formward layer (512) is given to 4 seperate output layers (feed forward) each one predict one of the 4 labels

#### I used cross entropy for claculating  loss of 4 output layer and the optimizer is Adam

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models

class MultiOutputModel(nn.Module):
    def __init__(self, num_colors, num_types, num_seasons, num_genders):
        super(MultiOutputModel, self).__init__()

        
        base_model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
        self.backbone = nn.Sequential(*list(base_model.children())[:-2])  # till conv5_x

        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        self.shared_fc = nn.Sequential(
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(0.3),
        )

        
        self.color_head = nn.Linear(512, num_colors)
        self.type_head = nn.Linear(512, num_types)
        self.season_head = nn.Linear(512, num_seasons)
        self.gender_head = nn.Linear(512, num_genders)

    def forward(self, x):
        x = self.backbone(x)    # [B, 2048, H/32, W/32]
        x = self.pool(x)        # [B, 2048, 1, 1]
        x = self.flatten(x)     # [B, 2048]
        x = self.shared_fc(x)   # [B, 512]

        return {
            'baseColour': self.color_head(x),
            'articleType': self.type_head(x),
            'season': self.season_head(x),
            
            'gender': self.gender_head(x),
        }


In [7]:
model = MultiOutputModel(
    num_colors=len(label_encoders['baseColour'].classes_),
    num_types=len(label_encoders['articleType'].classes_),
    num_seasons=len(label_encoders['season'].classes_),
    num_genders=len(label_encoders['gender'].classes_),
).cuda()


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 211MB/s]


In [8]:


criterion = nn.CrossEntropyLoss()



optimizer = optim.Adam(model.parameters(), lr=1e-4)


# Model Training 
#### used tqdm for showing the progress of each epoch

#### implemented early stopping based on validation loss

#### saved the model after each epoch and also the best model (based on validation loss)

In [None]:
from tqdm import tqdm
import time
import torch
import os

def train_model(model, train_loader, val_loader, criterion, optimizer, epochs, patience=6, model_path="best_model.pth"):
    model.cuda()
    best_val_loss = float('inf')
    patience_counter = 0

    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")
        model.train()
        running_loss = 0.0

        # Training loop with progress bar
        start_time = time.time()
        train_progress = tqdm(train_loader, desc="Training", leave=False)

        for images, labels in train_progress:
            images = images.cuda()
            labels = {k: v.cuda() for k, v in labels.items()}

            optimizer.zero_grad()
            outputs = model(images)

            loss = (
                criterion(outputs['baseColour'], labels['baseColour']) +
                criterion(outputs['articleType'], labels['articleType']) +
                criterion(outputs['season'], labels['season']) +
                criterion(outputs['gender'], labels['gender'])
            )

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            train_progress.set_postfix(loss=loss.item())

        avg_train_loss = running_loss / len(train_loader)
        print(f"Training Loss: {avg_train_loss:.4f} | Time: {(time.time() - start_time):.2f}s")

        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            val_progress = tqdm(val_loader, desc="Validation", leave=False)
            for images, labels in val_progress:
                images = images.cuda()
                labels = {k: v.cuda() for k, v in labels.items()}
                outputs = model(images)

                loss = (
                    criterion(outputs['baseColour'], labels['baseColour']) +
                    criterion(outputs['articleType'], labels['articleType']) +
                    criterion(outputs['season'], labels['season']) +
                    criterion(outputs['gender'], labels['gender'])
                )

                val_loss += loss.item()

        avg_val_loss = val_loss / len(val_loader)
        print(f"Validation Loss: {avg_val_loss:.4f}")

        
        torch.save(model.state_dict(), f"model_epoch_{epoch+1}.pth")

        
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
            torch.save(model.state_dict(), model_path)  # Save best model
            print(" Best model updated and saved.")
        else:
            patience_counter += 1
            print(f"Early stopping patience: {patience_counter}/{patience}")
            if patience_counter >= patience:
                print(" Early stopping triggered.")
                break


In [None]:
train_model(model, train_loader, val_loader, criterion, optimizer, epochs=40)



# Testing

In [None]:
img_path4="/kaggle/input/testing/41zOk4RLI-L.AC_SX250.jpg"
image = Image.open(img_path4).convert('RGB')
image = transform_val(image).unsqueeze(0).to("cuda")
with torch.no_grad():
    outputs = model(image)

baseColour_decoder = label_encoders['baseColour'].classes_
articleType_decoder = label_encoders['articleType'].classes_
season_decoder = label_encoders['season'].classes_
gender_decoder = label_encoders['gender'].classes_

baseColour_pred = torch.argmax(outputs['baseColour'], dim=1).item()
articleType_pred = torch.argmax(outputs['articleType'], dim=1).item()
season_pred = torch.argmax(outputs['season'], dim=1).item()
gender_pred = torch.argmax(outputs['gender'], dim=1).item()



print("Predicted Base Colour:", baseColour_decoder[baseColour_pred])
print("Predicted Article Type:", articleType_decoder[articleType_pred])
print("Predicted Season:", season_decoder[season_pred])
print("Predicted Gender:", gender_decoder[gender_pred])


Predicted Base Colour: Black
Predicted Article Type: Tshirts
Predicted Season: Fall
Predicted Gender: Men
