In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import os
from PIL import Image


In [2]:
#csv_file = "/kaggle/input/fashion-product-images-dataset/fashion-dataset/sty.csv"

#df = pd.read_csv(csv_file, on_bad_lines="skip")
#print(df.head())  # Show the first few rows
#print(df.columns)  # Display all column names


root_dir = "/kaggle/input/fashion-product-images-dataset/fashion-dataset"
image_folder = "images"  # Update if necessary

# Check if the image folder exists
print("Does the image folder exist?", os.path.exists(os.path.join(root_dir, image_folder)))

# List some images
sample_images = os.listdir(os.path.join(root_dir, image_folder))[:10]
print("Sample image files:", sample_images)

Does the image folder exist? True
Sample image files: ['31973.jpg', '30778.jpg', '19812.jpg', '22735.jpg', '38246.jpg', '16916.jpg', '52876.jpg', '39500.jpg', '44758.jpg', '59454.jpg']


In [3]:
class ClothesDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data_frame = pd.read_csv(csv_file, on_bad_lines='skip')  # Skips bad lines
        #self.data_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.categories = sorted(self.data_frame["articleType"].unique())
        self.category_to_idx = {cat: idx for idx, cat in enumerate(self.categories)}
    
    def __len__(self):
        return len(self.data_frame)
    
    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, "images", str(self.data_frame.iloc[idx]["id"]) + ".jpg")
        
        if not os.path.exists(img_name):
             print(f"Warning: Image {img_name} not found. Skipping it.")
             return torch.zeros((3, 224, 224)), torch.tensor(-1, dtype=torch.long)  # Fix: Correct tensor creation
        image = Image.open(img_name).convert("RGB")
        label = self.category_to_idx[self.data_frame.iloc[idx]["articleType"]]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label


In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [5]:
dataset = ClothesDataset(csv_file="/kaggle/input/fashion-product-images-dataset/fashion-dataset/styles.csv", root_dir="/kaggle/input/fashion-product-images-dataset/fashion-dataset", transform=transform)

dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


In [6]:
class ClothesClassifier(nn.Module):
    def __init__(self, num_classes):
        super(ClothesClassifier, self).__init__()
        self.model = models.resnet50(pretrained=True)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)
    
    def forward(self, x):
        return self.model(x)

num_classes = len(dataset.categories)
model = ClothesClassifier(num_classes)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 190MB/s]


In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)


In [9]:
import matplotlib.pyplot as plt
import numpy as np
import torch

num_epochs = 10  # Define number of epochs
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for epoch in range(num_epochs):
    model.train()
    correct_train = 0
    total_train = 0
    running_loss = 0.0
    print(1)
    # Training Loop
    for images, labels in dataloader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # Compute accuracy
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)
        running_loss += loss.item()
    print(2)
    # Calculate train loss and accuracy
    train_loss = running_loss / len(dataloader)
    train_accuracy = 100 * correct_train / total_train
    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)
    
    # Validation Step
    model.eval()
    correct_val = 0
    total_val = 0
    val_running_loss = 0.0
    print(3)
    with torch.no_grad():
        for images, labels in val_dataloader:
            outputs = model(images)
            loss = criterion(outputs, labels)

            _, predicted = torch.max(outputs, 1)
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)
            val_running_loss += loss.item()
    print(4)
    # Calculate validation loss and accuracy
    val_loss = val_running_loss / len(val_dataloader)
    val_accuracy = 100 * correct_val / total_val
    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)
    print(5)
    # Print results for each epoch
    print("=" * 60)
    print(f"Epoch [{epoch+1}/{num_epochs}]")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_accuracy:.2f}%")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_accuracy:.2f}%")
    print("=" * 60)


1


IndexError: Target -1 is out of bounds.

In [None]:

# Plot Training & Validation Loss
plt.figure(figsize=(10, 4))
plt.plot(np.arange(1, num_epochs+1), train_losses, label="Train Loss")
plt.plot(np.arange(1, num_epochs+1), val_losses, label="Val Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training & Validation Loss")
plt.legend()
plt.show()

# Plot Training & Validation Accuracy
plt.figure(figsize=(10, 4))
plt.plot(np.arange(1, num_epochs+1), train_accuracies, label="Train Accuracy")
plt.plot(np.arange(1, num_epochs+1), val_accuracies, label="Val Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy (%)")
plt.title("Training & Validation Accuracy")
plt.legend()
plt.show()

In [None]:
torch.save(model.state_dict(), "clothes_classifier.pth")
print("Model saved successfully!")


In [None]:
#testing model

import torch
import torchvision.transforms as transforms
from PIL import Image
import pandas as pd
import os
from torchvision import models

# Define paths (model should be in the same directory)
model_path = "clothes_classifier.pth"  # Adjust this if your model has a different name
csv_path = "styles.csv"  # CSV file in the same directory

# Check if the model exists
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model file '{model_path}' not found. Make sure it's saved in the same directory.")

# Define the number of categories (match this with training)
num_classes = 50  # Change this if your dataset has a different number of categories
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model architecture
model = models.resnet18(pretrained=False)
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)

# Load the trained weights
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

print("✅ Model loaded successfully!")


In [None]:
# Define image transformations (must match training preprocessing)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),  
    transforms.Normalize([0.5], [0.5])  
])

# Load and preprocess your image
image_path = "/kaggle/input/testing-images/1.png"  # Change this to your test image
if not os.path.exists(image_path):
    raise FileNotFoundError(f"Test image '{image_path}' not found!")

image = Image.open(image_path).convert("RGB")
image = transform(image).unsqueeze(0)  # Add batch dimension
image = image.to(device)

print("✅ Image preprocessed successfully!")


In [None]:
# Run inference on the image
with torch.no_grad():
    output = model(image)
    _, predicted = torch.max(output, 1)

predicted_index = predicted.item()
print(f"Predicted class index: {predicted_index}")


In [None]:
# Load the dataset CSV
df = pd.read_csv(csv_path)

# Ensure column names match (modify if necessary)
category_list = sorted(df["subCategory"].dropna().unique())  
color_list = sorted(df["baseColour"].dropna().unique())  

# Get the predicted category and color
predicted_category = category_list[predicted_index % len(category_list)]
predicted_color = color_list[predicted_index % len(color_list)]

print(f"🎯 Predicted Category: {predicted_category}")
print(f"🎨 Predicted Color: {predicted_color}")
