# Creating a multi-label classifer to label watches

In [61]:
#!pip install scikit-learn

## Import Libraries

In [21]:
import os
import pathlib
from csv import DictReader
import glob
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import *

## Define a function to build the network architecture. First, implement the convolutional blocks

In [22]:
def build_network(width, height, depth, classes):
    input_layer = Input(shape = (width, height, depth))
    x = Conv2D(filters = 32,
              kernel_size = (3, 3),
              padding = "same") (input_layer)

    x = ReLU()(x)
    x = BatchNormalization(axis = -1) (x)
    x = Conv2D(filters = 32,
              kernel_size = (3, 3),
              padding = "same") (x)
    x = ReLU() (x)
    x = BatchNormalization(axis = -1) (x)
    x = MaxPooling2D(pool_size = (2, 2)) (x)
    x = Dropout(rate = 0.25) (x)

    x = Conv2D(filters = 64,
              kernel_size = (3, 3),
              padding = "same") (x)
    x = ReLU() (x)
    x = BatchNormalization(axis = -1) (x)
    x = Conv2D(filters = 64,
              kernel_size = (3, 3),
              padding = "same") (x)

    x = ReLU() (x)
    x = BatchNormalization(axis = -1) (x)
    x = MaxPooling2D(pool_size = (2, 2)) (x)
    x = Dropout(rate = 0.25) (x)
    
#Next, add the convolutional layers
    x = Flatten() (x)
    x = Dense(units = 512) (x)
    x = ReLU() (x)
    x = BatchNormalization(axis = -1) (x)
    x = Dropout(rate = 0.5) (x)
    
    x = Dense(units = classes) (x)
    output  = Activation("sigmoid") (x)

    return Model(input_layer, output)

    
    

## Define function to load all images and labels(gender and usage), given a list of image paths and a dictionary of metadata associated with each of them

In [23]:
def load_images_and_labels(image_paths, styles,
                          target_size):
    images = []
    labels = []
    for image_path in image_paths:
        image = load_img(image_path,
                        target_size = target_size)
        image = img_to_array(image)
        image_id = image_path.split(os.path.sep) [-1] [:-4]

        image_style = styles[image_id]
        label = (image_style["gender"],
                image_style["usage"])

        images.append(image)
        labels.append(label)
    return np.array(images), np.array(labels)

        

## Set the random seed to guarantee reproducibility

In [24]:
SEED = 999
np.random.seed(SEED)

## Define the path to the images and the styles.csv metadata file:

In [25]:
base_path = (pathlib.Path.home() / "C:/.keras" /
            "datasets" /
            "fashion-product-images-small")
styles_path = str(base_path / "styles.csv")
images_path_pattern = str(base_path / "images/*.jpg")
image_paths = glob.glob(images_path_pattern)

## Keep only the watches images for Casual, Smart Casual, and Formal usage, suited to Men and Women

In [26]:
with open(styles_path, "r") as f:
    dict_reader = DictReader(f)
    STYLES = [*dict_reader]

    article_type = "Watches"
    genders = {"Men", "Women"}
    usages = {"Casual", "Smart Casual", "Formal"}
    STYLES = {style["id"]: style
             for style in STYLES
             if (style["articleType"] == article_type
                and
                style["gender"] in genders and 
                style["usage"] in usages)}

    image_paths = [*filter(lambda p:
                          p.split(os.path.sep) [-1] [:-4]
                          in STYLES.keys(),
                          image_paths)]

## Load images and labels, resizing  the images into a 64x64x3 shape

In [27]:
x, y = load_images_and_labels(image_paths, STYLES,
                             (64, 64))

## Normalize the images and mult-hot encode the labels

In [28]:
x = x.astype("float") / 255.0
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(y)

## Create the train, validation, and test splits

In [29]:
(x_train, x_test, y_train, y_test) = train_test_split(x, y,
                                                     stratify = y,
                                                     test_size = 0.2,
                                                     random_state = SEED)

(x_train, x_valid, y_train, y_valid) = train_test_split(x_train, y_train,
                                                       stratify = y_train,
                                                       test_size = 0.2,
                                                       random_state = SEED)

## Build and Compile the network

In [30]:
model = build_network(width = 64,
                     height = 64,
                     depth = 3,
                     classes = len(mlb.classes_))
model.compile(loss = "binary_crossentropy",
             optimizer = "rmsprop",
             metrics = ["accuracy"])


## Train the Model for 20 epochs, in batches of 64 images at a time

In [33]:
BATCH_SIZE = 64
EPOCHS = 20
model.fit(x_train, y_train,
         validation_data = (x_valid, y_valid),
         batch_size = BATCH_SIZE,
         epochs = EPOCHS)

Epoch 1/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 1s/step - accuracy: 0.2597 - loss: 0.4841 - val_accuracy: 0.9284 - val_loss: 0.6328
Epoch 2/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 1s/step - accuracy: 0.3877 - loss: 0.3479 - val_accuracy: 0.9761 - val_loss: 0.7172
Epoch 3/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 1s/step - accuracy: 0.4988 - loss: 0.2267 - val_accuracy: 0.9814 - val_loss: 0.6573
Epoch 4/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 1s/step - accuracy: 0.6375 - loss: 0.1824 - val_accuracy: 0.9814 - val_loss: 0.6149
Epoch 5/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 1s/step - accuracy: 0.6546 - loss: 0.1366 - val_accuracy: 0.9814 - val_loss: 0.6432
Epoch 6/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 1s/step - accuracy: 0.7298 - loss: 0.1348 - val_accuracy: 0.9814 - val_loss: 0.8243
Epoch 7/20
[1m24/24[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1617f11b0d0>

## Evaluate the model on the test set

In [34]:
result = model.evaluate(x_test, y_test,
                       batch_size = BATCH_SIZE)
print(f"Test accuracy: {result[1]}")

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 215ms/step - accuracy: 0.7575 - loss: 0.2680
Test accuracy: 0.781316339969635


## Use the model to make predictions on a test image, displaying the probability of each label

In [36]:
test_image = np.expand_dims(x_test[0], axis = 0)
probabilities = model.predict(test_image) [0]
for label, p in zip(mlb.classes_, probabilities):
    print(f"{label}: {p * 100:.2f}%")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 523ms/step
Casual: 99.87%
Formal: 0.11%
Men: 19.03%
Smart Casual: 0.25%
Women: 80.86%


## Compare the ground truth labels with the network's prediction

In [37]:
ground_truth_labels = np.expand_dims(y_test[0],
                                    axis = 0)
ground_truth_labels = mlb.inverse_transform(ground_truth_labels)
print(f"Groung truth labels: {ground_truth_labels}")

Groung truth labels: [(np.str_('Casual'), np.str_('Women'))]
