In [6]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch
from torchvision import datasets, transforms, models
import torch.nn as nn
from torchvision.models import resnet50, ResNet50_Weights
from PIL import Image
from torch.utils.data import random_split
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
dataframe = pd.read_csv('../del-3:2/Large-mixed-clothing-dataset/myntradataset/styles.csv', on_bad_lines='skip', header=None, names=["id", "gender", "masterCategory", "subCategory", "articleType", "baseColor", "season", "year", "usage", "productDisplayName"])

In [8]:
unique_classes = dataframe['articleType'].unique()
class_list = list(unique_classes)
print(len(class_list))
print(class_list)

144
['articleType', 'Shirts', 'Jeans', 'Watches', 'Track Pants', 'Tshirts', 'Socks', 'Casual Shoes', 'Belts', 'Flip Flops', 'Handbags', 'Tops', 'Bra', 'Sandals', 'Shoe Accessories', 'Sweatshirts', 'Deodorant', 'Formal Shoes', 'Bracelet', 'Lipstick', 'Flats', 'Kurtas', 'Waistcoat', 'Sports Shoes', 'Shorts', 'Briefs', 'Sarees', 'Perfume and Body Mist', 'Heels', 'Sunglasses', 'Innerwear Vests', 'Pendant', 'Nail Polish', 'Laptop Bag', 'Scarves', 'Rain Jacket', 'Dresses', 'Night suits', 'Skirts', 'Wallets', 'Blazers', 'Ring', 'Kurta Sets', 'Clutches', 'Shrug', 'Backpacks', 'Caps', 'Trousers', 'Earrings', 'Camisoles', 'Boxers', 'Jewellery Set', 'Dupatta', 'Capris', 'Lip Gloss', 'Bath Robe', 'Mufflers', 'Tunics', 'Jackets', 'Trunk', 'Lounge Pants', 'Face Wash and Cleanser', 'Necklace and Chains', 'Duffel Bag', 'Sports Sandals', 'Foundation and Primer', 'Sweaters', 'Free Gifts', 'Trolley Bag', 'Tracksuits', 'Swimwear', 'Shoe Laces', 'Fragrance Gift Set', 'Bangle', 'Nightdress', 'Ties', 'Baby D

In [9]:
items_to_drop = [
    "Ipad", "Body Wash and Scrub", "Mens Grooming Kit", "Face Serum and Gel", "Lip Plumper", "Makeup Remover", "Key chain",
    "Rucksacks", "Basketballs", "Lehenga Choli", "Clothing Set", "Mascara", "Toner", "Cushion Covers", "Tablet Sleeve",
    "Ties and Cufflinks", "Footballs", "Stoles", "Shapewear", "Nehru Jackets", "Salwar", "Cufflinks", "Jeggings", 
    "Hair Colour", "Concealer", "Rompers", "Body Lotion", "Sunscreen", "Mask and Peel", "Face Scrub and Exfoliator",
    "Lounge Shorts", "Nail Essentials", "Churidar", "Lounge Tshirts", "Eyeshadow", "Salwar and Dupatta", "Patiala",
    "Lip Liner", "Kajal and Eyeliner", "Highlighter and Blush", "Travel Accessory", "Kurtis", "Mobile Pouch", 
    "Messenger Bag", "Lip Care", "Face Moisturisers", "Compact", "Eye Cream", "Accessory Gift Set", "Baby Dolls",
    "Shoe Laces", "Fragrance Gift Set", "Bangle", "Free Gifts", "Duffel Bag", "Sports Sandals", 
    "Foundation and Primer", "Lounge Pants", "Face Wash and Cleanser", "Mufflers", "Jewellery Set", "Dupatta", 
    "Capris", "Lip Gloss", "Camisoles", "Kurta Sets", "Clutches", "Shrug", "Pendant", "Nail Polish", "Briefs", 
    "Sarees", "Perfume and Body Mist", "Lipstick", "Flats", "Kurtas", "Waistcoat", "Sports Shoes", "Deodorant", 
    "Shoe Accessories"
]

filtered_dataframe = dataframe[~dataframe['articleType'].isin(items_to_drop)]

unique_classes = filtered_dataframe['articleType'].unique()
class_list = list(unique_classes)
print(len(class_list))
print(class_list)

64
['articleType', 'Shirts', 'Jeans', 'Watches', 'Track Pants', 'Tshirts', 'Socks', 'Casual Shoes', 'Belts', 'Flip Flops', 'Handbags', 'Tops', 'Bra', 'Sandals', 'Sweatshirts', 'Formal Shoes', 'Bracelet', 'Shorts', 'Heels', 'Sunglasses', 'Innerwear Vests', 'Laptop Bag', 'Scarves', 'Rain Jacket', 'Dresses', 'Night suits', 'Skirts', 'Wallets', 'Blazers', 'Ring', 'Backpacks', 'Caps', 'Trousers', 'Earrings', 'Boxers', 'Bath Robe', 'Tunics', 'Jackets', 'Trunk', 'Necklace and Chains', 'Sweaters', 'Trolley Bag', 'Tracksuits', 'Swimwear', 'Nightdress', 'Ties', 'Leggings', 'Beauty Accessory', 'Jumpsuit', 'Water Bottle', 'Suspenders', 'Robe', 'Stockings', 'Headband', 'Tights', 'Gloves', 'Wristbands', 'Booties', 'Waist Pouch', 'Hair Accessory', 'Umbrellas', 'Hat', 'Rain Trousers', 'Suits']


In [10]:
label_mapping = {
    'Shirts': 'Shirts', 'Jeans': 'Pants', 'Watches': 'Watches', 'Track Pants': 'Pants',
    'Tshirts': 'T-shirts', 'Socks': 'Accessories', 'Casual Shoes': 'Shoes', 'Belts': 'Belts',
    'Flip Flops': 'Slippers', 'Handbags': 'Bags', 'Tops': 'Shirts', 'Bra': 'Accessories',
    'Sandals': 'Shoes', 'Sweatshirts': 'Sweaters', 'Formal Shoes': 'Shoes', 'Bracelet': 'Accessories',
    'Waistcoat': 'Outerwear', 'Sports Shoes': 'Shoes', 'Shorts': 'Shorts', 'Heels': 'Shoes',
    'Sunglasses': 'Accessories', 'Innerwear Vests': 'Accessories', 'Laptop Bag': 'Bags',
    'Scarves': 'Accessories', 'Rain Jacket': 'Outerwear', 'Dresses': 'Dresses', 'Night suits': 'Robe',
    'Skirts': 'Skirts', 'Wallets': 'Accessories', 'Blazers': 'Outerwear', 'Ring': 'Accessories',
    'Backpacks': 'Bags', 'Caps': 'Hats', 'Trousers': 'Pants', 'Earrings': 'Accessories',
    'Boxers': 'Accessories', 'Bath Robe': 'Robe', 'Tunics': 'Dresses', 'Jackets': 'Outerwear',
    'Trunk': 'Bags', 'Lounge Pants': 'Pants', 'Necklace and Chains': 'Accessories', 'Sports Sandals': 'Shoes',
    'Sweaters': 'Sweaters', 'Trolley Bag': 'Bags', 'Tracksuits': 'One-piece', 'Swimwear': 'Swimwear',
    'Nightdress': 'Robe', 'Ties': 'Ties', 'Leggings': 'Pants', 'Beauty Accessory': 'Accessories',
    'Jumpsuit': 'One-piece', 'Water Bottle': 'Accessories', 'Suspenders': 'Accessories',
    'Robe': 'Robe', 'Stockings': 'Pants', 'Headband': 'Headband', 'Tights': 'Pants',
    'Gloves': 'Gloves', 'Wristbands': 'Accessories', 'Nehru Jackets': 'Outerwear', 
    'Jeggings': 'Pants', 'Booties': 'Shoes', 'Waist Pouch': 'Accessories', 'Hair Accessory': 'Accessories',
    'Umbrellas': 'Umbrellas', 'Hat': 'Hats', 'Rain Trousers': 'Pants', 'Suits': 'Suits'
}

# Apply this dictionary to map your dataset's 'articleType' to a new 'general_articleType'
filtered_dataframe['general_articleType'] = filtered_dataframe['articleType'].map(label_mapping)

# Handling any potential unmapped items
filtered_dataframe['general_articleType'].fillna(filtered_dataframe['articleType'], inplace=True)

unique_classes = filtered_dataframe['general_articleType'].unique()
class_list = list(unique_classes)
print(len(class_list))
print(class_list)


24
['articleType', 'Shirts', 'Pants', 'Watches', 'T-shirts', 'Accessories', 'Shoes', 'Belts', 'Slippers', 'Bags', 'Sweaters', 'Shorts', 'Outerwear', 'Dresses', 'Robe', 'Skirts', 'Hats', 'One-piece', 'Swimwear', 'Ties', 'Headband', 'Gloves', 'Umbrellas', 'Suits']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_dataframe['general_articleType'] = filtered_dataframe['articleType'].map(label_mapping)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  filtered_dataframe['general_articleType'].fillna(filtered_dataframe['articleType'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_g

In [11]:
########
batch_size = 256
learning_rate = 0.001
scheduler_step_size = 7
scheduler_gamma = 0.1
num_epochs = 20
degrees = 45
translate = (0.1, 0.3)
scale = (0.8, 1.2)
saturation = 0.5
num_workers = 2
########


class ClothingDataset(Dataset):
    def __init__(self, dataframe, root_dir):
        """
        Args:
            dataframe (pandas.DataFrame): Dataframe containing image info.
            root_dir (string): Directory with all the images.
        """
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.label_map = {label: idx for idx, label in enumerate(
            dataframe['general_articleType'].unique())}

        self.dataframe = dataframe.copy()
        self.dataframe['exists'] = self.dataframe['id'].apply(
            lambda x: os.path.exists(os.path.join(root_dir, f"{x}.jpg")))
        self.dataframe = self.dataframe[self.dataframe['exists']]

    def __len__(self):
        return len(self.dataframe)

    def max_label(self):
        """Returns the maximum label integer from the dataframe."""
        return self.dataframe['general_articleType'].map(self.label_map).max()

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx]['id'] + ".jpg"
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        label_str = self.dataframe.iloc[idx]['general_articleType']
        label_int = self.label_map[label_str]

        label = torch.tensor(label_int, dtype=torch.long)

        return image, label


dataset = ClothingDataset(dataframe=filtered_dataframe,
                          root_dir='../del-3:2/Large-mixed-clothing-dataset/myntradataset/images')

print(len(dataset))

total_size = len(dataset)
train_size = int(0.8 * total_size)  # 80% for training
test_size = total_size - train_size  # 20% for testing
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])


train_transform = transforms.Compose([
    transforms.RandomAffine(degrees=degrees, translate=translate, scale=scale),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(saturation=saturation),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])


def train_collate_fn(batch):
    transformed_batch = [(train_transform(x), y) for x, y in batch]
    return torch.utils.data.dataloader.default_collate(transformed_batch)


def test_collate_fn(batch):
    transformed_batch = [(test_transform(x), y) for x, y in batch]
    return torch.utils.data.dataloader.default_collate(transformed_batch)


train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True,
                          num_workers=num_workers, pin_memory=True, collate_fn=train_collate_fn)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False,
                         num_workers=num_workers, pin_memory=True, collate_fn=test_collate_fn)

34034


In [12]:
class_list = dataset.dataframe['general_articleType'].unique()
print(len(class_list))
print(class_list)

22
['Shirts' 'Pants' 'Watches' 'T-shirts' 'Accessories' 'Shoes' 'Belts'
 'Slippers' 'Bags' 'Sweaters' 'Shorts' 'Outerwear' 'Dresses' 'Robe'
 'Skirts' 'Hats' 'One-piece' 'Swimwear' 'Ties' 'Headband' 'Gloves'
 'Umbrellas']


In [13]:
class CNN(nn.Module):
    def __init__(self):
      super(CNN,self).__init__()
      self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1)
      self.conv2 = nn.Conv2d(32, 64, 3, 1)
      self.bn1 = nn.BatchNorm2d(64)
      self.dropout1 = nn.Dropout2d(0.25)
      self.dropout2 = nn.Dropout2d(0.5)
      self.fc1 = nn.Linear(1600, 128)
      self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
      x = self.conv1(x)
      x = nn.functional.relu(x)
      x = nn.functional.max_pool2d(x, 2)
      x = self.dropout1(x)
      x = self.conv2(x)
      x = self.bn1(x)
      x = nn.functional.relu(x)
      x = nn.functional.max_pool2d(x, 2)
      x = self.dropout1(x)
      x = torch.flatten(x, 1)
      x = self.fc1(x)
      x = nn.functional.relu(x)
      x = self.dropout2(x)
      x = self.fc2(x)
      output = nn.functional.log_softmax(x, dim=1)
      return output
    
loss_history = []

model = CNN()

if torch.cuda.is_available():
    model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

step_lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer, step_size=scheduler_step_size, gamma=scheduler_gamma)

n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    print(f'Epoch [{epoch+1}/{num_epochs}]')
    for i, (images, labels) in enumerate(train_loader):
        try:
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i+1) % 694 == 0:
                print(
                    f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item()}')

        except FileNotFoundError as e:
            print(f"File not found: {e}, skipping...")
            continue

    step_lr_scheduler.step()

print('Finished Training')

Epoch [1/20]


Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/multiprocessing/spawn.py", line 122, in spawn_main
    exitcode = _main(fd, parent_sentinel)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/multiprocessing/spawn.py", line 132, in _main
    self = reduction.pickle.load(from_parent)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute 'ClothingDataset' on <module '__main__' (built-in)>


KeyboardInterrupt: 

In [None]:
with torch.no_grad():
    model.eval()
    n_correct = 0
    n_samples = 0
    num_classes = len(dataset.dataframe)  # Assuming this attribute exists and is set correctly
    n_class_correct = [0 for _ in range(num_classes)]
    n_class_samples = [0 for _ in range(num_classes)]
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

        for i in range(labels.size(0)):
            label = labels[i]
            pred = predicted[i]
            if (label == pred):
                n_class_correct[label] += 1
            n_class_samples[label] += 1

acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network on the test images: {acc} %')

for i in range(num_classes):
    if n_class_samples[i] != 0:
        acc = 100.0 * n_class_correct[i] / n_class_samples[i]
        print(f'Accuracy of {class_list[i]}: {acc:.2f} %')
    else:
        print(f'Accuracy of {class_list[i]}: N/A (no samples)')