In [1]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input, BatchNormalization
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ReduceLROnPlateau
import matplotlib.pyplot as plt

# Paths
train_img_folder = '../Penyisihan_Hology_DataMining/train'  # Update with the path to your train folder
csv_file = '../Penyisihan_Hology_DataMining/train.csv'  # Update with the path to your train.csv

# Image preprocessing constants
IMG_SIZE = (128, 128)

# Load train.csv
train_df = pd.read_csv(csv_file)

# Helper function to load and resize images
def load_images_from_folder(folder, img_ids, img_extensions=['.jpg', '.JPG', '.png']):
    images = []
    valid_img_ids = []  # To store ids of images with correct shape
    for img_id in img_ids:
        img_path = None
        for ext in img_extensions:
            potential_path = os.path.join(folder, f"{img_id}{ext}")
            if os.path.exists(potential_path):
                img_path = potential_path
                break

        if img_path:
            try:
                # Load and resize the image
                img = load_img(img_path, target_size=IMG_SIZE)
                img_array = img_to_array(img) / 255.0  # Normalize
                images.append(img_array)
                valid_img_ids.append(img_id)  # Store valid image ids
            except Exception as e:
                print(f"Failed to process image {img_id}: {str(e)}")
        else:
            print(f"Image {img_id} not found.")
    
    return np.array(images), valid_img_ids

# Load images and check shapes
images, valid_img_ids = load_images_from_folder(train_img_folder, train_df['id'].values)

# Filter the labels based on valid image ids
train_df_filtered = train_df[train_df['id'].isin(valid_img_ids)]
jenis_labels_filtered = train_df_filtered['jenis'].values
warna_labels_filtered = train_df_filtered['warna'].values

# Convert the filtered labels into categorical format
jenis_labels_cat = to_categorical(jenis_labels_filtered, num_classes=2)
warna_labels_cat = to_categorical(warna_labels_filtered, num_classes=5)

# Check the shape of the data
print(f'Bentuk X_train: {images.shape}')
print(f'Bentuk jenis_labels_cat: {jenis_labels_cat.shape}')
print(f'Bentuk warna_labels_cat: {warna_labels_cat.shape}')

# Split the data into train and validation sets
X_train, X_val, y_train_jenis, y_val_jenis, y_train_warna, y_val_warna = train_test_split(
    images, jenis_labels_cat, warna_labels_cat, test_size=0.2, random_state=42
)

# Data Augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
datagen.fit(X_train)

# Model architecture: Multi-output CNN
input_layer = Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3))

# Convolutional layers with Batch Normalization and Dropout
conv1 = Conv2D(32, (3, 3), activation='relu')(input_layer)
conv1 = BatchNormalization()(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

conv2 = Conv2D(64, (3, 3), activation='relu')(pool1)
conv2 = BatchNormalization()(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

conv3 = Conv2D(128, (3, 3), activation='relu')(pool2)
conv3 = BatchNormalization()(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

conv4 = Conv2D(256, (3, 3), activation='relu')(pool3)
conv4 = BatchNormalization()(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

conv5 = Conv2D(512, (3, 3), activation='relu')(pool4)
conv5 = BatchNormalization()(conv5)
pool5 = MaxPooling2D(pool_size=(2, 2))(conv5)

# Flatten layer
flat = Flatten()(pool5)
dropout = Dropout(0.5)(flat)

# Output for 'jenis' (T-shirt or Hoodie)
jenis_output = Dense(2, activation='softmax', name='jenis_output')(dropout)

# Output for 'warna' (Red, Yellow, Blue, Black, White)
warna_output = Dense(5, activation='softmax', name='warna_output')(dropout)

# Define model
model = Model(inputs=input_layer, outputs=[jenis_output, warna_output])

# Compile the model
model.compile(optimizer='adam', 
              loss={'jenis_output': 'categorical_crossentropy', 'warna_output': 'categorical_crossentropy'}, 
              metrics={'jenis_output': 'accuracy', 'warna_output': 'accuracy'})

# Learning Rate Reduction
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)

# Prepare labels as a dictionary
y_train_dict = {'jenis_output': y_train_jenis, 'warna_output': y_train_warna}

# Helper function for data generator to handle multi-output
def generator_with_multiple_outputs(image_generator, X, y_dict):
    gen = image_generator.flow(X, y_dict, batch_size=32)
    while True:
        X_batch, y_batch = gen.next()
        yield X_batch, y_batch  # y_batch is already a dictionary

# Use the generator to fit the model
train_gen = generator_with_multiple_outputs(datagen, X_train, y_train_dict)

# Calculate steps_per_epoch
steps_per_epoch = len(X_train) // 32

# Validation data
val_data = (X_val, {'jenis_output': y_val_jenis, 'warna_output': y_val_warna})

# Train the model using the generator
history = model.fit(train_gen,
                    steps_per_epoch=steps_per_epoch,
                    validation_data=val_data,
                    epochs=20, callbacks=[reduce_lr])

# Save the model
model.save('./improved_model/multilabel_model.h5')

# Plotting training history
plt.figure(figsize=(12, 5))

# Plot accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['jenis_output_accuracy'], label='Jenis Accuracy')
plt.plot(history.history['warna_output_accuracy'], label='Warna Accuracy')
plt.plot(history.history['val_jenis_output_accuracy'], label='Val Jenis Accuracy')
plt.plot(history.history['val_warna_output_accuracy'], label='Val Warna Accuracy')
plt.title('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(history.history['jenis_output_loss'], label='Jenis Loss')
plt.plot(history.history['warna_output_loss'], label='Warna Loss')
plt.plot(history.history['val_jenis_output_loss'], label='Val Jenis Loss')
plt.plot(history.history['val_warna_output_loss'], label='Val Warna Loss')
plt.title('Loss')
plt.legend()

plt.show()


Bentuk X_train: (777, 128, 128, 3)
Bentuk jenis_labels_cat: (777, 2)
Bentuk warna_labels_cat: (777, 5)


ValueError: `x` (images tensor) and `y` (labels) should have the same length. Found: x.shape = (621, 128, 128, 3), y.shape = ()

In [4]:
print(f'Bentuk X_train: {images.shape}')
print(f'Bentuk jenis_labels_cat: {jenis_labels_cat.shape}')
print(f'Bentuk warna_labels_cat: {warna_labels_cat.shape}')


Bentuk X_train: (777, 128, 128, 3)
Bentuk jenis_labels_cat: (777, 2)
Bentuk warna_labels_cat: (777, 5)
