In [28]:
import os
import cv2
import tqdm
import numpy as np
from typing import List
from PIL import Image
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms, models
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer
from sklearn.metrics import classification_report, accuracy_score
from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
# import torch
# if torch.backends.mps.is_available():
#     mps_device = torch.device("mps")
#     x = torch.ones(1, device=mps_device)
#     print (x)
# else:
#     print ("MPS device not found.")

tensor([1.], device='mps:0')


## CNN

In [38]:
# Global image directory path
IMG_DIR = "/Users/susanketsarkar/Desktop/Code/Meesho/data/train_images"  # Update with your image directory

class CenterCropWithMargin:
    def __init__(self, margin_percent=0.1):
        self.margin_percent = margin_percent
    
    def __call__(self, img):
        # Calculate the cropping dimensions
        width, height = img.size
        crop_margin_w = int(width * self.margin_percent)
        crop_margin_h = int(height * self.margin_percent)
        
        # Crop the image by removing the calculated margin from each side
        cropped_img = img.crop((
            crop_margin_w, crop_margin_h,
            width - crop_margin_w, height - crop_margin_h
        ))
        return cropped_img
    
class CustomImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['image_path']
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            image = self.transform(image)

        label = self.dataframe.iloc[idx]['label']
        return image, label

def load_data(csv_path, attr_to_predict):
    # Load data
    df = pd.read_csv(csv_path)
    
    # Extract relevant columns
    df = df[['id', 'Category', attr_to_predict]]
    l1 = len(df)
    
    # Drop rows with missing values in the target attribute
    df.dropna(subset=[attr_to_predict], inplace=True)
    print("Number of nan objects: ", l1-len(df))

    # Create image paths
    df['image_path'] = df['id'].apply(lambda x: os.path.join(IMG_DIR, f"{str(x).zfill(6)}.jpg"))
    
    return df

def preprocess_data(df, attr_to_predict):
    # Encode labels
    le = LabelEncoder()
    df['label'] = le.fit_transform(df[attr_to_predict])
    
    return df, le

def build_cnn_model(num_classes):
    model = nn.Sequential(
        nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(32),
        nn.MaxPool2d(kernel_size=2, stride=2),
        
        nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(64),
        nn.MaxPool2d(kernel_size=2, stride=2),
        
        nn.Flatten(),
        nn.Linear(64 * 16 * 16, 128),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(128, num_classes)
    )
    return model

def train_cnn_on_attribute(csv_path, attr_to_predict, epochs=10, batch_size=4):
    # Load and prepare data
    df = load_data(csv_path, attr_to_predict)
    df, le = preprocess_data(df, attr_to_predict)

    transform = transforms.Compose([
        transforms.ToPILImage(),
        CenterCropWithMargin(margin_percent=0.1),
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
    ])
    
    dataset = CustomImageDataset(dataframe=df, transform=transform)
    
    # Split the dataset
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Build CNN model
    print("Building the model...")
    model = build_cnn_model(num_classes=len(le.classes_))
    
    # Move model to GPU if available
    device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
    model.to(device)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train the model
    print(f"Training the model on {train_size} data points...")
    model.train()
    for epoch in range(epochs):
        print(f"Epoch {epoch} running...")
        for images, labels in tqdm.tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # Evaluate the model
    print("Evaluating the model...")
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.append(preds.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    # Print metrics
    print("Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=le.classes_))
    print(f"Accuracy: {accuracy_score(all_labels, all_preds) * 100:.2f}%")

In [39]:
csv_path = "../data/cat_wise_csv/Women_Tops_&_Tunics_data.csv"  
attr_to_predict = 'color'  
train_cnn_on_attribute(csv_path, attr_to_predict, epochs=10, batch_size=32)

Number of nan objects:  4946
Building the model...
Training the model on 11246 data points...
Epoch 0 running...


100%|██████████| 352/352 [00:45<00:00,  7.79it/s]


Epoch 1 running...


100%|██████████| 352/352 [00:47<00:00,  7.35it/s]


Epoch 2 running...


100%|██████████| 352/352 [01:02<00:00,  5.61it/s]


Epoch 3 running...


100%|██████████| 352/352 [00:41<00:00,  8.47it/s]


Epoch 4 running...


100%|██████████| 352/352 [00:49<00:00,  7.18it/s]


Epoch 5 running...


100%|██████████| 352/352 [00:45<00:00,  7.70it/s]


Epoch 6 running...


100%|██████████| 352/352 [00:48<00:00,  7.19it/s]


Epoch 7 running...


100%|██████████| 352/352 [00:39<00:00,  8.96it/s]


Epoch 8 running...


100%|██████████| 352/352 [00:46<00:00,  7.60it/s]


Epoch 9 running...


100%|██████████| 352/352 [00:50<00:00,  6.99it/s]


Evaluating the model...
Classification Report:
              precision    recall  f1-score   support

       black       0.87      0.89      0.88       538
        blue       0.67      0.77      0.72       201
     default       0.66      0.65      0.66       397
       green       0.75      0.79      0.77       144
      maroon       0.84      0.76      0.80       154
  multicolor       0.46      0.14      0.22        84
   navy blue       0.72      0.71      0.72        87
       peach       0.62      0.31      0.41        74
        pink       0.77      0.82      0.79       327
         red       0.77      0.91      0.84       140
       white       0.86      0.86      0.86       482
      yellow       0.74      0.82      0.78       184

    accuracy                           0.78      2812
   macro avg       0.73      0.70      0.70      2812
weighted avg       0.77      0.78      0.77      2812

Accuracy: 77.60%


## One hot encoded CNN

In [35]:
IMG_DIR = "/Users/susanketsarkar/Desktop/Code/Meesho/data/train_images"  # Update with your image directory

# Custom dataset to handle the images and attributes
class CustomImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['image_path']  # Adjust according to your image path column
        image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)  # Open image and convert to RGB
        attributes = self.dataframe.columns[3:]  # Assuming first three columns are id, Category, and len
        unique_classes = {attr: self.dataframe[attr].unique() for attr in attributes}
        num_attributes = len(unique_classes)
        max_classes = max(len(v) for v in unique_classes.values())

        # Create a mapping from attributes to class indices
        mlb = MultiLabelBinarizer()
        binary_labels = mlb.fit_transform(self.dataframe[attributes].values)  # Get attribute values
        
        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(binary_labels)

def load_data(csv_path, attrs_to_predict:List):
    # Load data
    df = pd.read_csv(csv_path)
    
    # Extract relevant columns
    # df = df[['id', 'Category'].extend(list(attrs_to_predict))]
    df.drop(["len"], axis=1, inplace=True)
    l1 = len(df)
    
    # Drop rows with missing values in the target attribute
    # df.dropna(subset=[attr_to_predict], inplace=True)
    # print("Number of nan objects: ", l1-len(df))

    # Create image paths
    df['image_path'] = df['id'].apply(lambda x: os.path.join(IMG_DIR, f"{str(x).zfill(6)}.jpg"))
    
    return df

# Function to build the ResNet model
def build_resnet_model(num_attributes, max_classes):
    resnet = models.resnet50(pretrained=True)
    num_ftrs = resnet.fc.in_features

    # Change the final layer to output n x m (n = num_attributes, m = max_classes)
    resnet.fc = nn.Linear(num_ftrs, num_attributes * max_classes)
    
    return resnet

# Function to prepare data and train the model
def train_resnet_on_attributes(csv_path, img_dir, category, epochs=10, batch_size=32):
    # Load the data
    df = pd.read_csv(csv_path)
    df = load_data(csv_path, df.columns[3:])
    
    # Filter the dataframe for the given category
    # df = df[df['Category'] == category].reset_index(drop=True)
    
    # Get unique classes for each attribute
    attributes = df.columns[3:]  # Assuming first three columns are id, Category, and len
    unique_classes = {attr: df[attr].unique() for attr in attributes}
    num_attributes = len(unique_classes)
    max_classes = max(len(v) for v in unique_classes.values())

    # Create a mapping from attributes to class indices
    mlb = MultiLabelBinarizer()
    binary_labels = mlb.fit_transform(df[attributes].values)
    
    # Transformations for the images
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

    # Create the dataset and dataloaders
    dataset = CustomImageDataset(dataframe=df, transform=transform)
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Build and train the model
    model = build_resnet_model(num_attributes, max_classes)
    device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
    model.to(device)

    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    model.train()
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        for images, labels in tqdm.tqdm(train_loader):
            print(images.shape, labels.shape)
            images = images.to(device)
            labels = torch.FloatTensor(labels).to(device)  # Convert labels to FloatTensor

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # Evaluate the model
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = torch.FloatTensor(labels).to(device)
            outputs = model(images)

            preds = torch.sigmoid(outputs).cpu().numpy() > 0.5  # Apply sigmoid and threshold
            all_preds.append(preds)
            all_labels.append(labels.cpu().numpy())

    all_preds = np.vstack(all_preds)
    all_labels = np.vstack(all_labels)

    # Print classification report for each attribute
    print("Classification Report:")
    for i, attr in enumerate(attributes):
        print(f"\nAttribute: {attr}")
        print(classification_report(all_labels[:, i], all_preds[:, i]))

    # Save the model
    model_save_path = f"{category}_resnet_model.pth"
    torch.save(model.state_dict(), model_save_path)
    print(f"Model saved to {model_save_path}")

In [36]:
# Example usage
csv_path = "../data/filled_data/Men_Tshirts_data.csv"  # Path to your CSV
img_dir = "../data/train_images"      # Directory containing images
category_to_predict = "Men_Tshirts"        # Category to train on

train_resnet_on_attributes(csv_path, img_dir, category_to_predict, epochs=10, batch_size=32)

Epoch 1/10


  0%|          | 0/182 [00:00<?, ?it/s]

: 