In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
import matplotlib.pyplot as plt
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.models import load_model
import keras_tuner as kt
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.layers import RandomZoom
import logging
import pickle
import json


csv_file = r'F:\Dataset.csv'  # Write the CSV file path
image_dir = r'F:\Labesni\DataSet\images'  # Write the images file path

df = pd.read_csv(csv_file)

file_paths = df['id'].values
labels = df[['gender', 'masterCategory', 'subCategory', 'articleType', 'baseColour','season', 'usage']]
labels = labels.fillna('Unknown')

# One-hot encode labels
encoder = OneHotEncoder(sparse_output=False)
labels_encoded = encoder.fit_transform(labels)

encoded_labels = tf.convert_to_tensor(labels_encoded, dtype=tf.float32)
file_paths = [str(file_name) for file_name in file_paths]
file_paths = [os.path.join(image_dir, file_name + '.jpg') for file_name in file_paths]

# Define number of classes for each category
num_classes = {
    'gender': 5,
    'masterCategory': 3,
    'subCategory': 17,
    'articleType': 68,
    'baseColour': 46,
    'season':2,
    'usage': 6
}

# Split encoded labels
gender_labels = labels_encoded[:, :num_classes['gender']]
masterCategory_labels = labels_encoded[:, num_classes['gender']:num_classes['gender'] + num_classes['masterCategory']]
subCategory_labels = labels_encoded[:, num_classes['gender'] + num_classes['masterCategory']:num_classes['gender'] + num_classes['masterCategory'] + num_classes['subCategory']]
articleType_labels = labels_encoded[:, num_classes['gender'] + num_classes['masterCategory'] + num_classes['subCategory']:num_classes['gender'] + num_classes['masterCategory'] + num_classes['subCategory'] + num_classes['articleType']]
baseColour_labels = labels_encoded[:, num_classes['gender'] + num_classes['masterCategory'] + num_classes['subCategory'] + num_classes['articleType']:num_classes['gender'] + num_classes['masterCategory'] + num_classes['subCategory'] + num_classes['articleType'] + num_classes['baseColour']]
season_labels = labels_encoded[:, num_classes['gender'] + num_classes['masterCategory'] + num_classes['subCategory'] + num_classes['articleType'] + num_classes['baseColour']:num_classes['gender'] + num_classes['masterCategory'] + num_classes['subCategory'] + num_classes['articleType'] + num_classes['baseColour'] + num_classes['season']]
usage_labels = labels_encoded[:, -num_classes['usage']:]

# Image preprocessing functions
    
def load_and_preprocess_image(file_path):
    file_path = file_path.numpy().decode('utf-8')
    if not tf.io.gfile.exists(file_path):
        return tf.zeros([224, 224, 3])
    image = tf.io.read_file(file_path)
    try:
        image = tf.image.decode_jpeg(image, channels=3)
    except:
        # Handle decoding errors
        return tf.zeros([224, 224, 3])
    image = tf.image.resize(image, [224, 224])
    image = tf.cast(image, tf.float32) / 255.0
    return image


random_zoom = tf.keras.layers.RandomZoom(height_factor=(-0.2, 0.2), width_factor=(-0.2, 0.2)) 

batch_size = 32

dataset = tf.data.Dataset.from_tensor_slices((file_paths, gender_labels, masterCategory_labels, 
                                              subCategory_labels, articleType_labels, 
                                              baseColour_labels, season_labels, usage_labels))


def preprocess_image(file_path, gender_label, masterCategory_label, subCategory_label, articleType_label, baseColour_label, season_label, usage_label):
    try:
        file_path = tf.strings.as_string(file_path)
        image = tf.py_function(func=load_and_preprocess_image, inp=[file_path], Tout=tf.float32)
        image.set_shape([224, 224, 3])
        
        # Data Augmentation techniques
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_brightness(image, max_delta=0.1)
        image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
        image = tf.image.random_saturation(image, lower=0.8, upper=1.2)
        image = tf.image.random_hue(image, max_delta=0.02)
        image = tf.image.random_jpeg_quality(image, min_jpeg_quality=75, max_jpeg_quality=100)
        image = random_zoom(tf.expand_dims(image, 0)) 
        image = tf.squeeze(image, axis=0)

        return image, (gender_label, masterCategory_label, subCategory_label, articleType_label, baseColour_label, season_label, usage_label)
    
    except Exception as e:
        print(f"Error during image preprocessing: {e}")
                
def filter_invalid(image, *labels):
    zero_tensor = tf.zeros_like(image)
    
    image_valid = tf.reduce_any(tf.not_equal(image, zero_tensor))
    
    labels_valid = tf.reduce_any([tf.reduce_any(tf.not_equal(label, tf.zeros_like(label))) for label in labels])
    
    return tf.logical_and(image_valid, labels_valid)

dataset = dataset.filter(lambda image, *labels: filter_invalid(image, *labels))
       
dataset = dataset.map(lambda x, y1, y2, y3, y4, y5, y6, y7: preprocess_image(x, y1, y2, y3, y4, y5, y6, y7), 
                      num_parallel_calls=tf.data.AUTOTUNE)

dataset = dataset.shuffle(buffer_size=len(file_paths))

total_size = len(file_paths)
train_size = int(total_size * 0.9)
val_size = total_size - train_size

train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)

train_dataset = train_dataset.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

steps_per_epoch = train_size // batch_size
validation_steps = val_size // batch_size


print(f"train dataset size: {train_size}")
print(f"validation dataset size: {val_size}")


train dataset size: 33894
validation dataset size: 3766


In [7]:
val_dataset_size = len(list(val_dataset))
print(f"Validation dataset size: {val_dataset_size}")

Validation dataset size: 118


In [2]:
def build_model(hp):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze the base model

    x = base_model.output
    x = GlobalAveragePooling2D()(x)

    # Tune number of units in dense layer
    units = hp.Int('units', min_value=256, max_value=1024, step=128)
    x = Dense(units, activation='relu')(x)
    
    # Outputs for multi-label classification
    gender_output = Dense(num_classes['gender'], activation='softmax', name='gender')(x)
    masterCategory_output = Dense(num_classes['masterCategory'], activation='softmax', name='masterCategory')(x)
    subCategory_output = Dense(num_classes['subCategory'], activation='softmax', name='subCategory')(x)
    articleType_output = Dense(num_classes['articleType'], activation='softmax', name='articleType')(x)
    baseColour_output = Dense(num_classes['baseColour'], activation='softmax', name='baseColour')(x)
    season_output = Dense(num_classes['season'], activation='softmax', name='season')(x)
    usage_output = Dense(num_classes['usage'], activation='softmax', name='usage')(x)

    model = Model(inputs=base_model.input, 
                  outputs=[gender_output, masterCategory_output, subCategory_output, articleType_output, 
                           baseColour_output, season_output, usage_output])

    # Tune learning rate
    learning_rate = hp.Float('learning_rate', min_value=1e-5, max_value=1e-3, sampling='log')
    
    # Compile the model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss={
            'articleType': 'categorical_crossentropy',
            'baseColour': 'categorical_crossentropy',
            'gender': 'categorical_crossentropy',
            'masterCategory': 'categorical_crossentropy',
            'season': 'categorical_crossentropy',
            'subCategory': 'categorical_crossentropy',
            'usage': 'categorical_crossentropy'
        },
        metrics={
            'articleType': 'accuracy',
            'baseColour': 'accuracy',
            'gender': 'accuracy',
            'masterCategory': 'accuracy',
            'season': 'accuracy',
            'subCategory': 'accuracy',
            'usage': 'accuracy'
        }
    )
    
    return model

In [None]:
## to delete tuner directory if needed
# import shutil

# tuner_dir = 'fine_tune_tuner_results'

# if os.path.exists(tuner_dir):
#     shutil.rmtree(tuner_dir)
#     print(f"Directory '{tuner_dir}' has been deleted.")    

In [None]:
# Initialize the tuner with a specific objective
tuner = kt.RandomSearch(
    build_model,
    objective=kt.Objective("articleType_accuracy", direction="max"),
    max_trials=10,           # minimize this to test .............................................................
    executions_per_trial=1,
    directory='tuner_results',
    project_name='Labbesni'
)

# Display search space summary
tuner.search_space_summary()

# Define callbacks
checkpoint_callback = ModelCheckpoint(
    filepath='best_model_epoch_{epoch:02d}_articleType_acc_{articleType_accuracy:.2f}.keras',  # Save the best model based on validation accuracy
    monitor='articleType_accuracy',
    mode='max',
    save_best_only=True,
    verbose=1
)

reduce_lr_callback = ReduceLROnPlateau(
    monitor='articleType_accuracy',
    mode='max',
    factor=0.2,
    patience=3,  # Number of epochs with no improvement after which the learning rate will be reduced
    verbose=1
)

early_stopping_callback = EarlyStopping(
    monitor='articleType_accuracy',
    mode='max',
    patience=5,  # Number of epochs with no improvement after which training will stop
    verbose=1,
    restore_best_weights=True  
)

In [None]:
# Perform the hyperparameter search
try:
    tuner.search(
        train_dataset.repeat(), 
        validation_data=val_dataset, 
        epochs=25,                    # minimize this to test .....................................................
        steps_per_epoch=steps_per_epoch, 
        validation_steps=validation_steps,
        callbacks=[checkpoint_callback, reduce_lr_callback, early_stopping_callback]
    )
except Exception as e:
        print(f"Error during hyperparameter search: {e}")

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Print the best hyperparameters
print(f"Best number of units in dense layers: {best_hps.get('units')}")
print(f"Best learning rate: {best_hps.get('learning_rate')}")

# Build the model with the best hyperparameters and train it
model = tuner.hypermodel.build(best_hps)

In [None]:
try:
    history = model.fit(
        train_dataset.repeat(),
        validation_data=val_dataset,
        #initial_epoch=, # if the training stop at any epoch write the number of this epoch to continue training from this point
        epochs=30,                  # minimize this to test ...............................................
        steps_per_epoch=steps_per_epoch,
        validation_steps=validation_steps,
        callbacks=[checkpoint_callback, reduce_lr_callback, early_stopping_callback]
    )
except Exception as e:
    logging.error(f"An error occurred during training: {e}")
    
model.save('best_initial_model.keras') 
print("Model saved successfully.") 

with open('history.pkl', 'wb') as f:
    pickle.dump(history.history, f)

In [None]:
# model = tf.keras.models.load_model('best_initial_model.keras')
# with open('history.pkl', 'rb') as f:
#     history = pickle.load(f)

In [None]:
def build_fine_tune_model(hp):
    try:
        model = tf.keras.models.load_model('best_initial_model.keras')
    except OSError as e:
        print(f"Error: {e}. Model file not found or corrupted. Please check the path or model format.")
        return None  s
    except Exception as e:
        print(f"Unexpected error while loading the model: {e}")
        return None  
        # Freeze all layers initially
    for layer in model.layers:
        layer.trainable = False
    
    # Determine the number of layers to unfreeze
    total_layers = len(model.layers)
    unfreeze_layers = hp.Int('unfreeze_layers', min_value=10, max_value=total_layers, step=20)
    
    # Unfreeze the specified number of layers from the top
    for layer in model.layers[-unfreeze_layers:]:
        layer.trainable = True
        
    fine_tune_lr = hp.Float('fine_tune_learning_rate', min_value=1e-6, max_value=1e-4, sampling='log')
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=fine_tune_lr),
        loss={
            'articleType': 'categorical_crossentropy',
            'baseColour': 'categorical_crossentropy',
            'gender': 'categorical_crossentropy',
            'masterCategory': 'categorical_crossentropy',
            'season': 'categorical_crossentropy',
            'subCategory': 'categorical_crossentropy',
            'usage': 'categorical_crossentropy'
        },
        metrics={
            'articleType': 'accuracy',
            'baseColour': 'accuracy',
            'gender': 'accuracy',
            'masterCategory': 'accuracy',
            'season': 'accuracy',
            'subCategory': 'accuracy',
            'usage': 'accuracy'
        }
    )
    
    return model

# Initialize the tuner for fine-tuning
fine_tune_tuner = kt.RandomSearch(
    build_fine_tune_model,
    objective=kt.Objective("articleType_accuracy", direction="max"),
    max_trials=10,            # minimize this to test ...............................................
    executions_per_trial=1,
    directory='fine_tune_tuner_results',
    project_name='Labbesni_fine_tuning'
)

fine_tune_tuner.search_space_summary()



In [None]:
# Define checkpoint callback to save the best model during fine-tuning
checkpoint_callback = ModelCheckpoint(
    filepath='best_fine_tuned_model.keras',
    monitor='articleType_accuracy',
    mode='max',
    save_best_only=True,
    verbose=1
)


lr_scheduler = ReduceLROnPlateau(
    monitor='articleType_accuracy',
    mode='max',
    factor=0.2,
    patience=3,
    verbose=1
)


early_stopping = EarlyStopping(
    monitor='articleType_accuracy',
    mode='max',
    patience=5,
    verbose=1,
    restore_best_weights=True
)

In [None]:
# Perform the hyperparameter search for fine-tuning
try:
    fine_tune_tuner.search(
        train_dataset, 
        validation_data=val_dataset, 
        epochs=25,                # minimize this to test ...............................................
        steps_per_epoch=steps_per_epoch, 
        validation_steps=validation_steps,
        callbacks=[checkpoint_callback, lr_scheduler, early_stopping]
    )
except Exception as e:
    print(f"Error during hyperparameter search: {e}")

best_hps_fine_tune = fine_tune_tuner.get_best_hyperparameters(num_trials=1)[0]


print(f"Best number of unfreeze layers: {best_hps_fine_tune.get('unfreeze_layers')}")
print(f"Best fine-tune learning rate: {best_hps_fine_tune.get('fine_tune_learning_rate')}")

# Unfreeze layers and fine-tune with the best hyperparameters
model = fine_tune_tuner.hypermodel.build(best_hps_fine_tune)

In [None]:
# Fine-tune the model
try:
    history_fine_tuning = model.fit(
        train_dataset.repeat(),
        validation_data=val_dataset,
        #initial_epoch=,
        epochs=30,
        steps_per_epoch=steps_per_epoch,
        validation_steps=validation_steps,
        callbacks=[checkpoint_callback, lr_scheduler, early_stopping]
    )
except Exception as e:
    logging.error(f"An error occurred during training: {e}")
    
model.save('tuned_model.keras')
print("Model saved successfully.")     

import pickle
with open('history_fine_tuning.pkl', 'wb') as f:
    pickle.dump(history_fine_tuning.history, f)

In [None]:
# model = tf.keras.models.load_model('tuned_model.keras')
# with open('history_fine_tuning.pkl', 'rb') as f:
#     history_fine_tuning = pickle.load(f)

In [None]:
results = model.evaluate(val_dataset, steps=validation_steps)
print(f"Evaluation results: {results}")
with open('evaluation_results.pkl', 'wb') as f:
    pickle.dump(results, f)

In [None]:
# with open('evaluation_results.pkl', 'rb') as f:
#     results = pickle.load(f)

In [None]:
total_loss = results[0]

# The order of categories
categories = ['gender', 'masterCategory', 'subCategory', 'articleType', 'baseColour', 'season', 'usage']

# Unpack losses and accuracies based on the known order
losses = results[1:8]  # Adjust based on your model's output
accuracies = results[8:]

# Create a dictionary to store results
evaluation_results = {
    "Total Loss": total_loss,
}

# Print evaluation results and store them in the dictionary
for i, category in enumerate(categories):
    loss = losses[i]
    accuracy = accuracies[i]
    evaluation_results[category] = {
        "Loss": loss,
        "Accuracy": accuracy
    }
    print(f"{category.capitalize()} Loss: {loss}, Accuracy: {accuracy}")
print(f"Total Loss: {total_loss}")

In [15]:
with open('evaluation_results.json', 'w') as json_file:
    json.dump(evaluation_results, json_file, indent=4)

In [None]:
with open('evaluation_results.json', 'r') as json_file:
    loaded_results = json.load(json_file)

# Access the loaded results in the same way
print(f"Total Loss: {loaded_results['Total Loss']}")

In [None]:
# Plot training & validation accuracy for each category
def plot_accuracy(history, category):
    plt.plot(history.history[category + '_accuracy'], label=f'Training {category} Accuracy')
    plt.plot(history.history['val_' + category + '_accuracy'], label=f'Validation {category} Accuracy')
    plt.title(f'{category} Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

categories = ['gender', 'masterCategory', 'subCategory', 'articleType', 'baseColour', 'season', 'usage']

for category in categories:
    plot_accuracy(history, category)

In [None]:
for category in categories:
    plot_accuracy(history_fine_tuning, category)

except Exception as e:
    logging.error(f"An error occurred during training: {e}")

In [None]:
# print("Available keys in history.history:")
# print(history.history.keys())

# # Plotting accuracy for all available accuracies
# plt.figure(figsize=(12, 8))

# for key in history.history.keys():
#     if 'accuracy' in key:  
#         plt.plot(history.history[key], label=key)

# plt.title('Model Accuracy')
# plt.ylabel('Accuracy')
# plt.xlabel('Epoch')
# plt.legend(loc='upper left')
# plt.show()

# # Plotting loss for training and validation
# plt.figure(figsize=(12, 8))

# if 'loss' in history.history:
#     plt.plot(history.history['loss'], label='Training loss')

# if 'val_loss' in history.history:
#     plt.plot(history.history['val_loss'], label='Validation loss')

# plt.title('Model Loss')
# plt.ylabel('Loss')
# plt.xlabel('Epoch')
# plt.legend(loc='upper left')
# plt.show()

In [None]:
# if history_fine_tuning:
#     print("Available keys in history_fine_tuning.history:")
#     print(history_fine_tuning.history.keys())

#     # Plotting accuracy for all available accuracies
#     plt.figure(figsize=(12, 8))

#     for key in history_fine_tuning.history.keys():
#         if 'accuracy' in key: 
#             plt.plot(history_fine_tuning.history[key], label=key)

#     plt.title('Fine-Tuned Model Accuracy')
#     plt.ylabel('Accuracy')
#     plt.xlabel('Epoch')
#     plt.legend(loc='upper left')
#     plt.show()

#     # Plotting loss for training and validation
#     plt.figure(figsize=(12, 8))

#     if 'loss' in history_fine_tuning.history:
#         plt.plot(history_fine_tuning.history['loss'], label='Training loss')

#     if 'val_loss' in history_fine_tuning.history:
#         plt.plot(history_fine_tuning.history['val_loss'], label='Validation loss')

#     plt.title('Fine-Tuned Model Loss')
#     plt.ylabel('Loss')
#     plt.xlabel('Epoch')
#     plt.legend(loc='upper left')
#     plt.show()

In [None]:
class_names = {
    'gender': ['Boys', 'Girls', 'Men', 'Unisex', 'Women'],
    'masterCategory': ['Accessories', 'Apparel', 'Footwear'],
    'subCategory': ['Bags', 'Belts', 'Bottomwear', 'Cufflinks', 'Dress', 'Eyewear',
       'Flip Flops', 'Headwear', 'Jewellery', 'Sandal', 'Scarves',
       'Shoes', 'Socks', 'Stoles', 'Ties', 'Topwear', 'Watches'],
    'articleType': ['Backpacks', 'Bangle', 'Belts', 'Blazers', 'Bracelet', 'Capris',
       'Caps', 'Casual Shoes', 'Churidar', 'Clutches', 'Cufflinks',
       'Dresses', 'Duffel Bag', 'Earrings', 'Flats',
       'Flip Flops', 'Formal Shoes', 'Handbags', 'Hat', 'Headband',
       'Heels', 'Jackets', 'Jeans', 'Jeggings', 'Jewellery Set',
       'Jumpsuit', 'Kurtas', 'Kurtis', 'Laptop Bag', 'Leggings',
       'Messenger Bag', 'Mobile Pouch',
       'Necklace and Chains', 'Patiala', 'Pendant',
       'Rain Jacket', 'Ring', 'Rompers', 'Rucksacks', 'Sandals',
       'Scarves', 'Shirts', 'Shorts', 'Skirts', 'Socks',
       'Sports Sandals', 'Sports Shoes', 'Stockings', 'Stoles',
       'Sunglasses', 'Sweaters', 'Sweatshirts', 'Swimwear',
        'Ties', 'Tights', 'Tops',
       'Track Pants', 'Tracksuits', 'Travel Accessory', 'Trolley Bag',
       'Trousers', 'Tshirts', 'Tunics', 'Waist Pouch', 'Waistcoat',
       'Wallets', 'Watches'],
    'baseColour': ['Beige', 'Black', 'Blue', 'Bronze', 'Brown', 'Burgundy',
       'Charcoal', 'Coffee Brown', 'Copper', 'Cream', 'Fluorescent Green',
       'Gold', 'Green', 'Grey', 'Grey Melange', 'Khaki', 'Lavender',
       'Lime Green', 'Magenta', 'Maroon', 'Mauve', 'Metallic', 'Multi',
       'Mushroom Brown', 'Mustard', 'Navy Blue', 'Nude', 'Off White',
       'Olive', 'Orange', 'Peach', 'Pink', 'Purple', 'Red', 'Rose',
       'Rust', 'Sea Green', 'Silver', 'Skin', 'Steel', 'Tan', 'Taupe',
       'Teal', 'Turquoise Blue', 'Unknown', 'White', 'Yellow'],
    'season':['Summer', 'Winter'],
    'usage': ['Casual', 'Formal', 'Local', 'Party', 'Sports', 'Travel']
}

for images, labels in val_dataset.take(1):  
    
    predictions = model.predict(images)


    plt.imshow(images[0])#.numpy().astype("uint8"))
    plt.show()
    

    true_labels = {
        'gender': np.argmax(labels[0][0]),  
        'masterCategory': np.argmax(labels[1][0]),
        'subCategory': np.argmax(labels[2][0]),
        'articleType': np.argmax(labels[3][0]),
        'baseColour': np.argmax(labels[4][0]),
        'season': np.argmax(labels[5][0]),
        'usage': np.argmax(labels[6][0])
    }

    pred_labels = {
        'gender': np.argmax(predictions[0][0]),  
        'masterCategory': np.argmax(predictions[1][0]),
        'subCategory': np.argmax(predictions[2][0]),
        'articleType': np.argmax(predictions[3][0]),
        'baseColour': np.argmax(predictions[4][0]),
        'season': np.argmax(predictions[5][0]),
        'usage': np.argmax(predictions[6][0])
    }

    print("True Labels:")
    for category, index in true_labels.items():
        print(f"{category}: {class_names[category][index]}")
        
    print("\nPredicted Labels:")
    for category, index in pred_labels.items():
        print(f"{category}: {class_names[category][index]}")
        
        