In [None]:
import os
import zipfile

# Kaggle environment paths
dataset_path = "/kaggle/input/keras-multi-label/keras-multi-label"  

# Change directory to the unzipped dataset folder (if necessary)
dataset_folder = "/kaggle/input/keras-multi-label/keras-multi-label/dataset"
os.chdir(dataset_folder)

# Verify files
print("[INFO] Dataset files:", os.listdir(dataset_folder))

**Step 1: Prepare Data (Image and Labels)**

In [None]:
# Import necessary libraries for data loading and preprocessing
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np
import glob

# Define image dimensions
IMAGE_DIMS = (64, 64, 3)  # You can adjust this based on your use case

# Initialize data and labels
data = []
labels = []

# List of classes (subdirectories in the dataset)
class_names = ["black_jeans", "blue_dress", "blue_jeans", "blue_shirt", "red_dress", "red_shirt"]

# Loop through each class (subdirectory)
for class_name in class_names:
    image_paths = glob.glob(os.path.join(dataset_folder, class_name, "*.jpg"))  # Adjust extension if needed
    for imagePath in image_paths:
        # Load the image, resize it, and convert to array
        image = load_img(imagePath, target_size=(IMAGE_DIMS[0], IMAGE_DIMS[1]))
        image = img_to_array(image)
        data.append(image)
        
        # Add the corresponding label (class name)
        labels.append([class_name])

# Convert data to a numpy array and normalize it
data = np.array(data, dtype="float") / 255.0

# Convert labels to numpy array
labels = np.array(labels)

# Display the number of images loaded
print(f"[INFO] Loaded {len(data)} images.")

**Step 2: Binarize Labels**

In [None]:
# Use MultiLabelBinarizer to one-hot encode the labels
mlb = MultiLabelBinarizer()
labels = mlb.fit_transform(labels)

# Display some label examples for verification
print("[INFO] Sample Labels after Binarization:")
print(labels[:5])
print("Class labels:", mlb.classes_)

**Step 3: Split the Data into Training and Testing Sets**

In [None]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets (80% train, 20% test)
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.2, random_state=42)

print(f"[INFO] Training data shape: {trainX.shape}")
print(f"[INFO] Testing data shape: {testX.shape}")

**Step 4: Build the Model**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Build a Sequential CNN model
model = Sequential()

# First convolutional layer
model.add(Conv2D(32, (3, 3), padding="same", input_shape=IMAGE_DIMS, activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Second convolutional layer
model.add(Conv2D(64, (3, 3), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Third convolutional layer
model.add(Conv2D(128, (3, 3), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten and add fully connected layers
model.add(Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dropout(0.5))

# Output layer (multi-label classification - sigmoid activation)
model.add(Dense(len(mlb.classes_), activation="sigmoid"))

# Compile the model
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Print model summary
print("[INFO] Model summary:")
model.summary()

**Step 5: Train the Model**

In [None]:
# Define batch size and epochs
EPOCHS = 25
BS = 32  # Batch size

# Train the model
history = model.fit(
    trainX, trainY, 
    validation_data=(testX, testY), 
    epochs=EPOCHS, 
    batch_size=BS, 
    verbose=1
)

# Save the model
model.save("/kaggle/working/multi_label_model.h5")

**Step 6: Evaluate the Model**

In [None]:
# Evaluate the model
print("[INFO] Evaluating the model...")
(loss, accuracy) = model.evaluate(testX, testY, batch_size=BS, verbose=1)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

**Step 7: Visualize the Training History**

In [None]:
import matplotlib.pyplot as plt

# Plot training and validation accuracy/loss
plt.figure(figsize=(12, 4))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='upper left')

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='upper left')

plt.tight_layout()
plt.show()

**Make Predictions on New Images(Examples)**

In [17]:
# The model has already been trained and saved
model_path = "/kaggle/working/multi_label_model.h5"
model = load_model(model_path)

# 2. Load the MultiLabelBinarizer
mlb_path = '/kaggle/working/mlb.pickle'
with open(mlb_path, 'rb') as f:
    mlb = pickle.load(f)

# 3. Preprocess a new image for prediction
def prepare_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (IMAGE_DIMS[0], IMAGE_DIMS[1]))
    image = np.array(image, dtype="float") / 255.0
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    return image

# Example image path
image_path =  "/kaggle/input/keras-multi-label/keras-multi-label/examples/example_05.jpg"
image = prepare_image(image_path)

# 4. Make a prediction on the image
preds = model.predict(image)

# Keep only the first 5 columns of predictions
preds = preds[:, :5]  # Adjust to match the expected number of classes

# Print raw predictions for debugging
print(f"[DEBUG] Raw predictions: {preds}")

# Convert predicted probabilities to binary predictions with a lower threshold
threshold = 0.3  # Consider lowering the threshold to see if it helps
binary_preds = (preds >= threshold).astype(int)  # Apply threshold

# 5. Decode the prediction
predicted_labels = mlb.inverse_transform(binary_preds)
print(f"[INFO] Predicted labels: {predicted_labels}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
[DEBUG] Raw predictions: [[0.16190077 0.04690529 0.42183858 0.0091376  0.01166103]]
[INFO] Predicted labels: [('blue_jeans',)]


In [22]:
# The model has already been trained and saved
model_path = "/kaggle/working/multi_label_model.h5"
model = load_model(model_path)

# Load the MultiLabelBinarizer
mlb_path = '/kaggle/working/mlb.pickle'
with open(mlb_path, 'rb') as f:
    mlb = pickle.load(f)

# Preprocess a new image for prediction
def prepare_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (IMAGE_DIMS[0], IMAGE_DIMS[1]))
    image = np.array(image, dtype="float") / 255.0
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    return image

# Example image path
image_path = "/kaggle/input/keras-multi-label/keras-multi-label/examples/example_05.jpg"
image = prepare_image(image_path)

# Make a prediction on the image
preds = model.predict(image)

# Adjust the predictions based on the number of classes
num_classes = len(mlb.classes_)
preds = preds[:, :num_classes]  # Ensure we're using the correct number of classes

# Define a range of thresholds to test
thresholds = [0.3, 0.4]  # Add more thresholds as needed

# Loop through each threshold and make predictions
for threshold in thresholds:
    binary_preds = (preds >= threshold).astype(int)  # Apply threshold
    predicted_labels = mlb.inverse_transform(binary_preds)
    print(f"[INFO] Predicted labels at threshold {threshold}: {predicted_labels}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step
[INFO] Predicted labels at threshold 0.3: [('blue_jeans',)]
[INFO] Predicted labels at threshold 0.4: [('blue_jeans',)]


In [21]:
# The model has already been trained and saved
model_path = "/kaggle/working/multi_label_model.h5"
model = load_model(model_path)

# Load the MultiLabelBinarizer
mlb_path = '/kaggle/working/mlb.pickle'
with open(mlb_path, 'rb') as f:
    mlb = pickle.load(f)

# Preprocess a new image for prediction
def prepare_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (IMAGE_DIMS[0], IMAGE_DIMS[1]))
    image = np.array(image, dtype="float") / 255.0
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    return image

# Example image path
image_path = "/kaggle/input/keras-multi-label/keras-multi-label/examples/example_04.jpg"
image = prepare_image(image_path)

# Make a prediction on the image
preds = model.predict(image)

# Adjust the predictions based on the number of classes
num_classes = len(mlb.classes_)
preds = preds[:, :num_classes]  # Ensure we're using the correct number of classes

# Define a range of thresholds to test
thresholds = [0.3, 0.4]  # Add more thresholds as needed

# Loop through each threshold and make predictions
for threshold in thresholds:
    binary_preds = (preds >= threshold).astype(int)  # Apply threshold
    predicted_labels = mlb.inverse_transform(binary_preds)
    print(f"[INFO] Predicted labels at threshold {threshold}: {predicted_labels}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
[INFO] Predicted labels at threshold 0.3: [('blue_dress',)]
[INFO] Predicted labels at threshold 0.4: [('blue_dress',)]
