In [5]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')


In [6]:

# Load the dataset
data_dir = '/kaggle/input/fashion-product-images-small/'
images_folder = os.path.join(data_dir, 'images')
styles_path = os.path.join(data_dir, 'styles.csv')


In [10]:

# Read styles.csv for labels
df = pd.read_csv(styles_path, on_bad_lines='skip')

In [13]:
df = df[df['id'].apply(lambda x: os.path.isfile(os.path.join(images_folder, str(x) + '.jpg')))]
df['image'] = df['id'].apply(lambda x: os.path.join(images_folder, str(x) + '.jpg'))

In [14]:

# Focus on the 'subCategory' column
df = df[['image', 'subCategory']]

# Count the number of samples in each category
category_counts = df['subCategory'].value_counts()
print(category_counts)

# Filter categories with at least 800 samples
valid_categories = category_counts[category_counts >= 800].index

# Filter the dataframe to include only valid categories
df = df[df['subCategory'].isin(valid_categories)]

# Split data into training, validation, and test sets
train_df, temp_df = train_test_split(df, test_size=0.1, stratify=df['subCategory'], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['subCategory'], random_state=42)

subCategory
Topwear                     15398
Shoes                        7343
Bags                         3055
Bottomwear                   2693
Watches                      2542
Innerwear                    1808
Jewellery                    1079
Eyewear                      1073
Fragrance                    1011
Sandal                        963
Wallets                       933
Flip Flops                    913
Belts                         811
Socks                         698
Lips                          527
Dress                         478
Loungewear and Nightwear      470
Saree                         427
Nails                         329
Makeup                        307
Headwear                      293
Ties                          258
Accessories                   129
Scarves                       118
Cufflinks                     108
Apparel Set                   106
Free Gifts                    104
Stoles                         90
Skin Care                      77
Sk

In [15]:
# Data generators for training and validation
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range=30,        
    width_shift_range=0.1,   
    height_shift_range=0.1,   
    shear_range=0.1,
    zoom_range=0.1,          
    horizontal_flip=True,
    fill_mode='nearest'
)
val_datagen = ImageDataGenerator(rescale=1. / 255)
test_datagen = ImageDataGenerator(rescale=1. / 255)  # No augmentation for test set

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='image',
    y_col='subCategory',
    target_size=(160, 120), 
    batch_size=64,
    class_mode='categorical'
)
val_generator = val_datagen.flow_from_dataframe(
    val_df,
    x_col='image',
    y_col='subCategory',
    target_size=(160, 120),  
    batch_size=64,
    class_mode='categorical'
)
test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='image',
    y_col='subCategory',
    target_size=(160, 120),
    batch_size=64,
    class_mode='categorical',
    shuffle=False  # Important for evaluation
)

Found 35659 validated image filenames belonging to 13 classes.
Found 1981 validated image filenames belonging to 13 classes.
Found 1982 validated image filenames belonging to 13 classes.


In [None]:
# Create a custom CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(160, 120, 3)),
    BatchNormalization(), 
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.001)), 
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.5), 
    Dense(len(train_generator.class_indices), activation='softmax')
])

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])

# Callbacks for improved training
early_stopping = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-6)

# Calculate class weights
class_weights = compute_class_weight('balanced', classes=np.unique(train_generator.classes), y=train_generator.classes)
class_weights = dict(zip(np.unique(train_generator.classes), class_weights))

# Train the model
history = model.fit(
    train_generator,
    epochs=25,
    validation_data=val_generator,
    verbose=1,
    callbacks=[early_stopping, reduce_lr],
    class_weight=class_weights  # Add class weights
)
'''
# Save model using .keras format
model.save('model.keras')
'''

Epoch 1/25
[1m558/558[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1229s[0m 2s/step - accuracy: 0.5348 - loss: 2.9766 - val_accuracy: 0.7259 - val_loss: 1.8306 - learning_rate: 1.0000e-04
Epoch 2/25
[1m558/558[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1206s[0m 2s/step - accuracy: 0.6868 - loss: 2.0276 - val_accuracy: 0.8420 - val_loss: 1.5408 - learning_rate: 1.0000e-04
Epoch 3/25
[1m558/558[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1213s[0m 2s/step - accuracy: 0.7356 - loss: 1.7733 - val_accuracy: 0.7224 - val_loss: 1.8237 - learning_rate: 1.0000e-04
Epoch 4/25
[1m558/558[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1211s[0m 2s/step - accuracy: 0.7643 - loss: 1.6465 - val_accuracy: 0.8950 - val_loss: 1.2695 - learning_rate: 1.0000e-04
Epoch 5/25
[1m558/558[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1237s[0m 2s/step - accuracy: 0.7962 - loss: 1.4833 - val_accuracy: 0.8440 - val_loss: 1.3398 - learning_rate: 1.0000e-04
Epoch 6/25
[1m558/558[0m [32m━━━━━━━━

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_generator, verbose=1)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Plot training history
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.savefig('training_history.png')
plt.show()

# Predictions on Test Set
Y_pred = model.predict(test_generator)
y_pred = np.argmax(Y_pred, axis=1)

# Get true classes and class labels
true_classes = test_generator.classes
class_labels = list(val_generator.class_indices.keys())

# Classification Report
print("Classification Report:")
print(classification_report(true_classes, y_pred, target_names=class_labels))

# Confusion Matrix
conf_matrix = confusion_matrix(true_classes, y_pred)

plt.figure(figsize=(10, 8))
plt.imshow(conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()

tick_marks = np.arange(len(class_labels))
plt.xticks(tick_marks, class_labels, rotation=45)
plt.yticks(tick_marks, class_labels)

plt.tight_layout()
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.savefig('confusion_matrix.png')
plt.show()

In [None]:
print(class_labels)