# Fashion Product Image Classification
This notebook loads the Kaggle dataset, filters a few categories, and trains a simple image classification model.

In [36]:
import pandas as pd
import os

# Define paths
base_dir = 'fashion_product_images'  
csv_path = os.path.join(base_dir, 'styles.csv')
image_dir = os.path.join(base_dir, 'images')

# Load the CSV
df = pd.read_csv(csv_path, on_bad_lines='skip')
df.head()

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011.0,Casual,Turtle Check Men Navy Blue Shirt
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012.0,Casual,Peter England Men Party Blue Jeans
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016.0,Casual,Titan Women Silver Watch
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011.0,Casual,Manchester United Men Solid Black Track Pants
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012.0,Casual,Puma Men Grey T-shirt


In [37]:
# Show number of unique categories
print("Number of unique categories:", df['articleType'].nunique())

# Show how many images are in each category
category_counts = df['articleType'].value_counts()
print(category_counts.head(15))


Number of unique categories: 143
articleType
Tshirts         7067
Shirts          3217
Casual Shoes    2845
Watches         2542
Sports Shoes    2036
Kurtas          1844
Tops            1762
Handbags        1759
Heels           1323
Sunglasses      1073
Wallets          936
Flip Flops       914
Sandals          897
Briefs           849
Belts            813
Name: count, dtype: int64


In [38]:
# Filter for selected categories
selected_categories = ['Shirts', 'Watches', 'Handbags', 'Casual Shoes', 'Sports Shoes']
df_filtered = df[df['articleType'].isin(selected_categories)].dropna(subset=['id', 'articleType'])
df_filtered = df_filtered.groupby('articleType').head(500)  # Limit to 500 per class
df_filtered.reset_index(drop=True, inplace=True)
df_filtered.head()

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011.0,Casual,Turtle Check Men Navy Blue Shirt
1,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016.0,Casual,Titan Women Silver Watch
2,30805,Men,Apparel,Topwear,Shirts,Green,Summer,2012.0,Ethnic,Fabindia Men Striped Green Shirt
3,26960,Women,Apparel,Topwear,Shirts,Purple,Summer,2012.0,Casual,Jealous 21 Women Purple Shirt
4,30039,Men,Accessories,Watches,Watches,Black,Winter,2016.0,Casual,Skagen Men Black Watch


In [39]:
from PIL import Image
import numpy as np

def load_images(df, img_dir, size=(128, 128)):
    X, y = [], []
    for _, row in df.iterrows():
        img_path = os.path.join(img_dir, str(row['id']) + '.jpg')
        try:
            img = Image.open(img_path).convert('RGB').resize(size)
            X.append(np.array(img))
            y.append(row['articleType'])
        except:
            continue
    return np.array(X), np.array(y)

X, y = load_images(df_filtered, image_dir)
print(f"Loaded {len(X)} images.")

Loaded 2499 images.


In [40]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42)

# Normalize
X_train = X_train / 255.0
X_test = X_test / 255.0

print(f"Train set: {X_train.shape}, Test set: {X_test.shape}")

Train set: (1999, 128, 128, 3), Test set: (500, 128, 128, 3)


In [41]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Build model
model = models.Sequential([
    layers.Input(shape=(128, 128, 3)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(len(np.unique(y_encoded)), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train
model.fit(X_train, y_train, epochs=20, validation_split=0.1)

Epoch 1/20
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 44ms/step - accuracy: 0.5025 - loss: 1.7827 - val_accuracy: 0.8650 - val_loss: 0.2761
Epoch 2/20
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 43ms/step - accuracy: 0.8912 - loss: 0.2756 - val_accuracy: 0.8850 - val_loss: 0.2453
Epoch 3/20
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 43ms/step - accuracy: 0.9249 - loss: 0.1867 - val_accuracy: 0.9100 - val_loss: 0.1887
Epoch 4/20
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 44ms/step - accuracy: 0.9401 - loss: 0.1580 - val_accuracy: 0.8700 - val_loss: 0.2711
Epoch 5/20
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 42ms/step - accuracy: 0.9443 - loss: 0.1458 - val_accuracy: 0.9000 - val_loss: 0.2154
Epoch 6/20
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 42ms/step - accuracy: 0.9505 - loss: 0.1316 - val_accuracy: 0.8900 - val_loss: 0.3017
Epoch 7/20
[1m57/57[0m [32m━━━━

<keras.src.callbacks.history.History at 0x2039bc18980>

In [42]:
# Evaluate
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {accuracy:.2f}")

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9315 - loss: 0.2861
Test accuracy: 0.92


In [43]:
model.save("model/fashion_model.h5")


