# Fashion Product Classifier

## Image name with Extension

In [1]:
import pandas as pd

In [2]:
# Read CSV file into DataFrame, skipping lines with errors
df = pd.read_csv("styles.csv",on_bad_lines='skip')


In [3]:

# Add a new column 'image' to the dataframe, consisting of the 'id' column values converted to string and appended with ".jpg"
df['image'] = df.apply(lambda row: str(row['id']) + ".jpg", axis=1)


In [4]:
df.head(10)

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011.0,Casual,Turtle Check Men Navy Blue Shirt,15970.jpg
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012.0,Casual,Peter England Men Party Blue Jeans,39386.jpg
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016.0,Casual,Titan Women Silver Watch,59263.jpg
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011.0,Casual,Manchester United Men Solid Black Track Pants,21379.jpg
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012.0,Casual,Puma Men Grey T-shirt,53759.jpg
5,1855,Men,Apparel,Topwear,Tshirts,Grey,Summer,2011.0,Casual,Inkfruit Mens Chain Reaction T-shirt,1855.jpg
6,30805,Men,Apparel,Topwear,Shirts,Green,Summer,2012.0,Ethnic,Fabindia Men Striped Green Shirt,30805.jpg
7,26960,Women,Apparel,Topwear,Shirts,Purple,Summer,2012.0,Casual,Jealous 21 Women Purple Shirt,26960.jpg
8,29114,Men,Accessories,Socks,Socks,Navy Blue,Summer,2012.0,Casual,Puma Men Pack of 3 Socks,29114.jpg
9,30039,Men,Accessories,Watches,Watches,Black,Winter,2016.0,Casual,Skagen Men Black Watch,30039.jpg


In [5]:
# Randomly shuffle the DataFrame rows
df = df.sample(frac=1).reset_index(drop=True)

In [6]:
df.head(10)

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image
0,48753,Women,Accessories,Jewellery,Bangle,Silver,Summer,2013.0,Casual,Lucera Women Silver Bangle,48753.jpg
1,44249,Women,Accessories,Bags,Handbags,Red,Summer,2012.0,Casual,Hidekraft Women Red Handbag,44249.jpg
2,19398,Men,Apparel,Topwear,Tshirts,Blue,Summer,2011.0,Sports,ADIDAS Men Solid Blue Jersey,19398.jpg
3,20457,Women,Accessories,Wallets,Wallets,Black,Winter,2015.0,Casual,Baggit Women Dhano Taj Black Wallet,20457.jpg
4,55288,Women,Personal Care,Makeup,Highlighter and Blush,Skin,Spring,2017.0,Casual,Lakme Sun Kissed Face Sheer,55288.jpg
5,58673,Women,Apparel,Topwear,Kurtas,White,Summer,2012.0,Ethnic,ALayna Women Printed White Kurta,58673.jpg
6,9993,Women,Apparel,Bottomwear,Leggings,Beige,Fall,2011.0,Casual,Urban Yoga Women Anga Beige Leggings,9993.jpg
7,33358,Men,Footwear,Shoes,Formal Shoes,Brown,Summer,2012.0,Formal,Homme Men Brown Semi Formal Shoes,33358.jpg
8,50689,Men,Apparel,Innerwear,Trunk,Black,Summer,2016.0,Casual,Chromozome Men Black Trunks,50689.jpg
9,57832,Women,Apparel,Topwear,Tops,Pink,Summer,2012.0,Casual,United Colors of Benetton Women Pink Top,57832.jpg


## Preparing the ImageDataGenerator for training the model

In [7]:
import tensorflow as tf

In [8]:

# Import the ImageDataGenerator class
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set the batch size for the image data generator
batch_size = 256

# Create an instance of ImageDataGenerator for data augmentation/preprocessing. Here it's configured for a validation split of 20%
image_generator = ImageDataGenerator(validation_split=0.2)

# Create a training data generator. This will read images in batches from the 'images' directory, using the 'image' column from df for filenames
# and the 'masterCategory' column for labels. The target size for resizing images is set to (60,80), and the subset 'training' is selected.
training_generator = image_generator.flow_from_dataframe(
    dataframe=df,
    directory="images",
    x_col="image",
    y_col="masterCategory",
    target_size=(60,80),
    batch_size=batch_size,
    subset="training"
)

# Similar to the above, create a validation data generator. It also reads from the 'images' directory and df, but selects the 'validation' subset.
validation_generator = image_generator.flow_from_dataframe(
    dataframe=df,
    directory="images",
    x_col="image",
    y_col="masterCategory",
    target_size=(60,80),
    batch_size=batch_size,
    subset="validation"
)

# Store the number of classes (unique labels) present in the training data
classes = len(training_generator.class_indices)

Found 35536 validated image filenames belonging to 7 classes.




Found 8883 validated image filenames belonging to 7 classes.




In [9]:
classes

7

## Initializing the Model using Neural Network

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense



In [11]:
#Initializing our model
classifier = Sequential()

classifier.add(Conv2D(32,(3,3),input_shape = (60,80,3), activation = 'relu'))

classifier.add(MaxPooling2D(pool_size=(3, 3)))

classifier.add(Conv2D(32,(3,3), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size=(3, 3)))

classifier.add(Flatten())

classifier.add(Dense(units=32,activation = 'relu'))
classifier.add(Dense(units=64,activation = 'relu'))
classifier.add(Dense(units=128,activation = 'relu'))
classifier.add(Dense(units=256,activation = 'relu'))
classifier.add(Dense(units=512,activation = 'relu'))

classifier.add(Dense(units=7,activation = 'softmax'))

classifier.compile(optimizer='adam',
              loss="categorical_crossentropy",
              metrics=['accuracy'])

## Fitting Data in our Neural Network

In [12]:
# ! pip install pillow

In [13]:
import scipy
from tensorflow import keras

# Assuming you have already defined your model 'classifier'

# Set the number of epochs for training
epochs = 5

# Use the 'fit' method to train the model on the training data and validate on the validation data
history = classifier.fit(
    x=training_generator,
    steps_per_epoch=len(training_generator),  # Number of batches per epoch
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=len(validation_generator),  # Number of batches for validation
    verbose=1  # Set to 1 for progress updates during training
)

# After training, you can evaluate the model's performance on the test data (if available)
# Replace 'test_data' and 'test_labels' with your actual test data and labels
# Evaluate the model
# test_loss, test_accuracy = classifier.evaluate(test_data, test_labels)
# print("Test Loss:", test_loss)
# print("Test Accuracy:", test_accuracy)



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [17]:

# After training, you can evaluate the model's performance on the validation data
# Use the 'evaluate' method to get the loss and accuracy
validation_loss, validation_accuracy = classifier.evaluate(
    x=validation_generator,
    steps=len(validation_generator),  # Number of batches for validation
    verbose=1  # Set to 1 for progress updates during evaluation
)

# Print the validation loss and accuracy
print("Validation Loss:", validation_loss)
print("Validation Accuracy:", validation_accuracy)

Validation Loss: 0.1453908085823059
Validation Accuracy: 0.9551953077316284


## Saving the Model for further use

In [14]:
classifier.save("model.h5")

  saving_api.save_model(


## Testing on our data

In [23]:

import numpy as np
from keras.preprocessing.image import load_img

filename = "images/15970.jpg"

from keras.models import load_model
new_model = load_model('model.h5')
new_model.summary()




Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 58, 78, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 19, 26, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 17, 24, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 5, 8, 32)          0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 1280)              0         
                                                                 
 dense (Dense)               (None, 32)                4

In [24]:
import numpy as np
from keras.preprocessing.image import load_img, img_to_array
from keras.applications.imagenet_utils import preprocess_input

# Load the image
image = load_img(filename, target_size=(60, 80))  # Resize the image to match the model's input shape
image = img_to_array(image)  # Convert to numpy array
image = preprocess_input(image)  # Preprocess the image (scaling, etc.)

# Make predictions using your model
predictions = new_model.predict(np.expand_dims(image, axis=0))  # Predictions for a single image
print(predictions)
# The 'predictions' variable now contains the model's output probabilities for each class
# You can interpret these probabilities to determine the predicted class


[[7.1621507e-01 2.6835841e-01 1.2383981e-03 7.2822087e-03 8.5659522e-06
  6.8477220e-03 4.9668230e-05]]


In [25]:
val = np.argmax(predictions) # Returns the index of the class with the highest probability
print(val) # prints the predicted class label, which represents the category or class that the model predicts the input image belongs to

0


In [26]:
my_dict = training_generator.class_indices
key_list = list(my_dict.keys()) 
val_list = list(my_dict.values()) 
print(key_list[val])


Accessories


The model predicts the following probabilities for each class:\
Accessories: 71.62%\
Apparel: 26.84%\
Footwear: 0.12%\
Free Items: 0.73%\
Home: 0.01%\
Personal Care: 0.68%\
Sports: 0.00%

The predicted class index is 0 (corresponding to "Accessories").
Therefore, the model predicts that the input image most likely belongs to the "Accessories" class.

## Saving the Class IDs

In [27]:
import pickle
# save the model to disk
filename1 = 'key_list'
filename = 'val_list'
pickle.dump(key_list, open(filename1, 'wb'))
pickle.dump(val_list, open(filename, 'wb'))