<a href="https://colab.research.google.com/github/AndresPerelmuter/E-commerce-Product-Images/blob/main/E_commerce_Product_Images_Functional_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers, Model, Input, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.callbacks import ReduceLROnPlateau

from sklearn.model_selection import train_test_split

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
base_dir = "/content/drive/MyDrive/E-commerce Product Images/data"  # Reemplaza con la ruta de tu carpeta 'data'

In [4]:
# Inspección de la estructura de archivos
def explore_directory(path):
    for root, dirs, files in os.walk(path):
        print(f"Directorio: {root}")
        if dirs:
            print(f"  Subdirectorios: {dirs}")
        if files:
            print(f"  Archivos: {files[:5]}...")

# Explorar la carpeta base
explore_directory(base_dir)

Directorio: /content/drive/MyDrive/E-commerce Product Images/data
  Subdirectorios: ['Footwear', 'Apparel', 'Imagenes']
  Archivos: ['fashion.csv', '.DS_Store']...
Directorio: /content/drive/MyDrive/E-commerce Product Images/data/Footwear
  Subdirectorios: ['Women', 'Men']
Directorio: /content/drive/MyDrive/E-commerce Product Images/data/Footwear/Women
  Subdirectorios: ['Images']
Directorio: /content/drive/MyDrive/E-commerce Product Images/data/Footwear/Women/Images
  Subdirectorios: ['images_with_product_ids']
Directorio: /content/drive/MyDrive/E-commerce Product Images/data/Footwear/Women/Images/images_with_product_ids
  Archivos: ['13042.jpg', '12902.jpg', '11511.jpg', '10292.jpg', '11497.jpg']...
Directorio: /content/drive/MyDrive/E-commerce Product Images/data/Footwear/Men
  Subdirectorios: ['Images']
Directorio: /content/drive/MyDrive/E-commerce Product Images/data/Footwear/Men/Images
  Subdirectorios: ['images_with_product_ids']
Directorio: /content/drive/MyDrive/E-commerce Pro

In [5]:
csv_path = os.path.join(base_dir, "fashion.csv")

data = pd.read_csv(csv_path)

In [6]:
data

Unnamed: 0,ProductId,Gender,Category,SubCategory,ProductType,Colour,Usage,ProductTitle,Image,ImageURL
0,42419,Girls,Apparel,Topwear,Tops,White,Casual,Gini and Jony Girls Knit White Top,42419.jpg,http://assets.myntassets.com/v1/images/style/p...
1,34009,Girls,Apparel,Topwear,Tops,Black,Casual,Gini and Jony Girls Black Top,34009.jpg,http://assets.myntassets.com/v1/images/style/p...
2,40143,Girls,Apparel,Topwear,Tops,Blue,Casual,Gini and Jony Girls Pretty Blossom Blue Top,40143.jpg,http://assets.myntassets.com/v1/images/style/p...
3,23623,Girls,Apparel,Topwear,Tops,Pink,Casual,Doodle Kids Girls Pink I love Shopping Top,23623.jpg,http://assets.myntassets.com/v1/images/style/p...
4,47154,Girls,Apparel,Bottomwear,Capris,Black,Casual,Gini and Jony Girls Black Capris,47154.jpg,http://assets.myntassets.com/v1/images/style/p...
...,...,...,...,...,...,...,...,...,...,...
2901,51755,Women,Footwear,Shoes,Casual Shoes,Black,Casual,Catwalk Women Black Shoes,51755.jpg,http://assets.myntassets.com/v1/images/style/p...
2902,47630,Women,Footwear,Shoes,Flats,Blue,Casual,Carlton London Women Blue Shoes,47630.jpg,http://assets.myntassets.com/v1/images/style/p...
2903,32836,Women,Footwear,Shoes,Flats,Pink,Casual,Grendha Women Flori Pink Sandals,32836.jpg,http://assets.myntassets.com/v1/images/style/p...
2904,35821,Women,Footwear,Shoes,Heels,Black,Casual,Enroute Women Black Heels,35821.jpg,http://assets.myntassets.com/v1/images/style/p...


In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2906 entries, 0 to 2905
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   ProductId     2906 non-null   int64 
 1   Gender        2906 non-null   object
 2   Category      2906 non-null   object
 3   SubCategory   2906 non-null   object
 4   ProductType   2906 non-null   object
 5   Colour        2906 non-null   object
 6   Usage         2906 non-null   object
 7   ProductTitle  2906 non-null   object
 8   Image         2906 non-null   object
 9   ImageURL      2906 non-null   object
dtypes: int64(1), object(9)
memory usage: 227.2+ KB


In [8]:
# Función para construir la ruta completa de la imagen
def construct_image_path(row):
    category = row["Category"]
    gender = row["Gender"]
    image_name = row["Image"]

    return os.path.join(base_dir, category, gender, "Images", "images_with_product_ids", image_name)

data["image_path"] = data.apply(construct_image_path, axis=1)

print(data.head())

   ProductId Gender Category SubCategory ProductType Colour   Usage  \
0      42419  Girls  Apparel     Topwear        Tops  White  Casual   
1      34009  Girls  Apparel     Topwear        Tops  Black  Casual   
2      40143  Girls  Apparel     Topwear        Tops   Blue  Casual   
3      23623  Girls  Apparel     Topwear        Tops   Pink  Casual   
4      47154  Girls  Apparel  Bottomwear      Capris  Black  Casual   

                                  ProductTitle      Image  \
0           Gini and Jony Girls Knit White Top  42419.jpg   
1                Gini and Jony Girls Black Top  34009.jpg   
2  Gini and Jony Girls Pretty Blossom Blue Top  40143.jpg   
3   Doodle Kids Girls Pink I love Shopping Top  23623.jpg   
4             Gini and Jony Girls Black Capris  47154.jpg   

                                            ImageURL  \
0  http://assets.myntassets.com/v1/images/style/p...   
1  http://assets.myntassets.com/v1/images/style/p...   
2  http://assets.myntassets.com/v1/ima

In [9]:
# Verificar si las imágenes existen
data["image_exists"] = data["image_path"].apply(os.path.exists)

# Imprimir las imágenes que faltan
missing_images = data[~data["image_exists"]]
if not missing_images.empty:
    print(f"Faltan {len(missing_images)} imágenes:")
    print(missing_images)

# Filtrar solo las imágenes existentes
data = data[data["image_exists"]]
print(f"Imágenes válidas: {len(data)}")


Imágenes válidas: 2906


In [10]:
# Convertir las etiquetas categóricas a valores numéricos
data["gender_label"] = data["Gender"].astype("category").cat.codes
data["category_label"] = data["Category"].astype("category").cat.codes
data["subcategory_label"] = data["SubCategory"].astype("category").cat.codes
data["producttype_label"] = data["ProductType"].astype("category").cat.codes
data["colour_label"] = data["Colour"].astype("category").cat.codes

# Crear un único dataset de TensorFlow
def load_image_with_labels(image_path, gender_label, category_label, subcategory_label, producttype_label, colour_label):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [128, 128])  # Redimensionar a 128x128 píxeles
    img = img / 255.0  # Normalizar entre 0 y 1
    return img, {
        'gender_output': gender_label,
        'category_output': category_label,
        'subcategory_output': subcategory_label,
        'producttype_output': producttype_label,
        'colour_output': colour_label,
    }

# Dividir en entrenamiento y prueba
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    data["image_path"].values,
    data[["gender_label", "category_label", "subcategory_label", "producttype_label", "colour_label"]].values,
    test_size=0.2,
    random_state=42
)

# Crear datasets de TensorFlow
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.map(
    lambda x, y: load_image_with_labels(
        x, y[0], y[1], y[2], y[3], y[4]
    )
).batch(32).shuffle(buffer_size=1000)

test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.map(
    lambda x, y: load_image_with_labels(
        x, y[0], y[1], y[2], y[3], y[4]
    )
).batch(32)


In [11]:
# Convertir las etiquetas categóricas a valores numéricos, asegurando que empiezan desde 0
label_columns = ["Gender", "Category", "SubCategory", "ProductType", "Colour"]
for col in label_columns:
    data[f"{col.lower()}_label"] = data[col].astype("category").cat.codes


In [12]:
# Crear un único dataset de TensorFlow
def load_image_with_labels(image_path, labels):
    # Cargar y procesar la imagen
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [128, 128])  # Redimensionar a 128x128 píxeles
    img = img / 255.0  # Normalizar entre 0 y 1

    # Asegurar que las etiquetas son tensores
    labels = tf.cast(labels, tf.int32)

    return img, {
        'gender_output': labels[0],
        'category_output': labels[1],
        'subcategory_output': labels[2],
        'producttype_output': labels[3],
        'colour_output': labels[4],
    }

In [13]:
# Dividir en entrenamiento y prueba

x_train, x_test, y_train, y_test = train_test_split(
    data["image_path"].values,
    data[["gender_label", "category_label", "subcategory_label", "producttype_label", "colour_label"]].values,
    test_size=0.2,
    random_state=42
)

In [14]:
# Crear datasets de TensorFlow
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.map(
    lambda x, y: load_image_with_labels(
        x, y
    )
).batch(32).shuffle(buffer_size=1000)

test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.map(
    lambda x, y: load_image_with_labels(
        x, y
    )
).batch(32)

# Verificar los rangos de las etiquetas para cada salida
num_classes = {
    'gender_output': data['gender_label'].nunique(),
    'category_output': data['category_label'].nunique(),
    'subcategory_output': data['subcategory_label'].nunique(),
    'producttype_output': data['producttype_label'].nunique(),
    'colour_output': data['colour_label'].nunique(),
}

In [15]:
from tensorflow.keras import models

# Crear el modelo
inputs = layers.Input(shape=(128, 128, 3))

# Modelo base
x = layers.Conv2D(32, (3, 3), activation='relu')(inputs)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Flatten()(x)

# Salidas individuales para cada etiqueta
outputs = {
    'gender_output': layers.Dense(num_classes['gender_output'], activation='softmax', name='gender_output')(x),
    'category_output': layers.Dense(num_classes['category_output'], activation='softmax', name='category_output')(x),
    'subcategory_output': layers.Dense(num_classes['subcategory_output'], activation='softmax', name='subcategory_output')(x),
    'producttype_output': layers.Dense(num_classes['producttype_output'], activation='softmax', name='producttype_output')(x),
    'colour_output': layers.Dense(num_classes['colour_output'], activation='softmax', name='colour_output')(x),
}

model = models.Model(inputs=inputs, outputs=outputs)

# Compilar el modelo con métricas específicas para cada salida
model.compile(
    optimizer='adam',
    loss={
        'gender_output': 'sparse_categorical_crossentropy',
        'category_output': 'sparse_categorical_crossentropy',
        'subcategory_output': 'sparse_categorical_crossentropy',
        'producttype_output': 'sparse_categorical_crossentropy',
        'colour_output': 'sparse_categorical_crossentropy',
    },
    metrics={
        'gender_output': ['accuracy'],
        'category_output': ['accuracy'],
        'subcategory_output': ['accuracy'],
        'producttype_output': ['accuracy'],
        'colour_output': ['accuracy'],
    }
)

# Resumen del modelo
model.summary()

In [None]:
# Entrenar el modelo
history = model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=10
)

Epoch 1/10


In [None]:
# Evaluar el modelo en los datos de prueba
test_loss, test_accuracy = model.evaluate(test_dataset)

print(f"Pérdida en el conjunto de prueba: {test_loss}")
print(f"Precisión en el conjunto de prueba: {test_accuracy}")


In [None]:
# Guardar el modelo en el formato recomendado por Keras
model.save('cnn_functional.keras')


In [None]:


# Plot the training and validation accuracy and loss
def plot_training_history(history):
    # Retrieve training metrics
    acc = history.history['gender_output_accuracy']
    val_acc = history.history['val_gender_output_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    # Plot accuracy
    plt.figure(figsize=(14, 6))
    plt.subplot(1, 2, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()

# Call the function to visualize training history
plot_training_history(history)


In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Example: Gender predictions
def plot_confusion_matrix(y_true, y_pred, labels, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
    plt.title(title)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

# Assuming predictions are for 'gender_output' and y_test includes the true labels
y_true_gender = y_test[:, 0]  # True labels for gender
y_pred_gender = model.predict(test_dataset)['gender_output'].argmax(axis=1)  # Predicted labels

# Replace ['Male', 'Female'] with your actual label names
plot_confusion_matrix(y_true_gender, y_pred_gender, labels=['Male', 'Female'], title='Confusion Matrix for Gender Output')
