In [1]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Paths
train_img_folder = '../Penyisihan_Hology_DataMining/train'  # Update with the path to your train folder
csv_file = '../Penyisihan_Hology_DataMining/train.csv'  # Update with the path to your train.csv

# Image preprocessing constants
IMG_SIZE = (224, 224)  # EfficientNetB0 expects 224x224 images

# Load train.csv
train_df = pd.read_csv(csv_file)

# Helper function to load images and preprocess
def load_images_from_folder(folder, img_ids, img_extensions=['.jpg', '.JPG', '.png']):
    images = []
    for img_id in img_ids:
        img_path = None
        for ext in img_extensions:
            potential_path = os.path.join(folder, f"{img_id}{ext}")
            if os.path.exists(potential_path):
                img_path = potential_path
                break

        if img_path:
            try:
                img = load_img(img_path, target_size=IMG_SIZE)  # EfficientNetB0 size
                img_array = img_to_array(img) / 255.0  # Normalize
                images.append(img_array)
                print(f"Image {img_id} loaded successfully from {img_path}.")
            except Exception as e:
                print(f"Error loading image {img_id} from {img_path}: {str(e)}")
        else:
            print(f"Image {img_id} not found in any supported format.")
    
    return np.array(images)


# Load image ids and labels from the CSV file
img_ids = train_df['id'].values
jenis_labels = train_df['jenis'].values
warna_labels = train_df['warna'].values

# Load images
images = load_images_from_folder(train_img_folder, img_ids)

# Convert the labels into categorical format
jenis_labels_cat = to_categorical(jenis_labels, num_classes=2)  # Binary: T-shirt (0) or Hoodie (1)
warna_labels_cat = to_categorical(warna_labels, num_classes=5)  # Colors: 1=Red, 2=Yellow, 3=Blue, 4=Black, 5=White

# Split the data into train and validation sets
X_train, X_val, y_train_jenis, y_val_jenis, y_train_warna, y_val_warna = train_test_split(
    images, jenis_labels_cat, warna_labels_cat, test_size=0.2, random_state=42
)

# Load EfficientNetB0 model pretrained on ImageNet
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))

# Freeze the base model
base_model.trainable = False

# Add custom layers on top for multi-label classification
input_layer = base_model.input
x = GlobalAveragePooling2D()(base_model.output)

# Output for 'jenis' (T-shirt or Hoodie)
jenis_output = Dense(2, activation='softmax', name='jenis_output')(x)

# Output for 'warna' (Red, Yellow, Blue, Black, White)
warna_output = Dense(5, activation='softmax', name='warna_output')(x)

# Define the full model
model = Model(inputs=input_layer, outputs=[jenis_output, warna_output])

# Compile the model
model.compile(optimizer='adam', 
              loss={'jenis_output': 'categorical_crossentropy', 'warna_output': 'categorical_crossentropy'}, 
              metrics={'jenis_output': 'accuracy', 'warna_output': 'accuracy'})

# Train the model
history = model.fit(X_train, {'jenis_output': y_train_jenis, 'warna_output': y_train_warna},
                    validation_data=(X_val, {'jenis_output': y_val_jenis, 'warna_output': y_val_warna}),
                    epochs=10, batch_size=32)

# Save the fine-tuned model
model.save('./base3_result/efficientnet_multilabel_model.h5')

# Plotting training history
plt.figure(figsize=(12, 5))

# Plot accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['jenis_output_accuracy'], label='Jenis Accuracy')
plt.plot(history.history['warna_output_accuracy'], label='Warna Accuracy')
plt.plot(history.history['val_jenis_output_accuracy'], label='Val Jenis Accuracy')
plt.plot(history.history['val_warna_output_accuracy'], label='Val Warna Accuracy')
plt.title('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(history.history['jenis_output_loss'], label='Jenis Loss')
plt.plot(history.history['warna_output_loss'], label='Warna Loss')
plt.plot(history.history['val_jenis_output_loss'], label='Val Jenis Loss')
plt.plot(history.history['val_warna_output_loss'], label='Val Warna Loss')
plt.title('Loss')
plt.legend()

plt.show()


Image 1 loaded successfully from ../Penyisihan_Hology_DataMining/train\1.jpg.
Image 2 loaded successfully from ../Penyisihan_Hology_DataMining/train\2.jpg.
Image 3 loaded successfully from ../Penyisihan_Hology_DataMining/train\3.jpg.
Image 4 loaded successfully from ../Penyisihan_Hology_DataMining/train\4.jpg.
Image 5 loaded successfully from ../Penyisihan_Hology_DataMining/train\5.jpg.
Image 6 loaded successfully from ../Penyisihan_Hology_DataMining/train\6.jpg.
Image 7 loaded successfully from ../Penyisihan_Hology_DataMining/train\7.jpg.
Image 8 loaded successfully from ../Penyisihan_Hology_DataMining/train\8.jpg.
Image 9 loaded successfully from ../Penyisihan_Hology_DataMining/train\9.jpg.
Image 10 loaded successfully from ../Penyisihan_Hology_DataMining/train\10.jpg.
Image 11 loaded successfully from ../Penyisihan_Hology_DataMining/train\11.jpg.
Image 12 loaded successfully from ../Penyisihan_Hology_DataMining/train\12.jpg.
Image 13 loaded successfully from ../Penyisihan_Hology_Dat

KeyboardInterrupt: 

In [2]:
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('./base3_result/efficientnet_multilabel_model.h5')

# Folder for test images
test_img_folder = '../Penyisihan_Hology_DataMining/test'  # Update with your actual path to test folder

# Load test images similar to how we loaded training images
test_img_ids = [file.split('.')[0] for file in os.listdir(test_img_folder)]  # Get image ids from test folder

# Load test images with the same function as before
test_images = load_images_from_folder(test_img_folder, test_img_ids)

# Perform predictions
predictions = model.predict(test_images)

# Split the predictions for 'jenis' and 'warna'
jenis_predictions = predictions[0].argmax(axis=1)  # Convert from one-hot to labels
warna_predictions = predictions[1].argmax(axis=1)  # Convert from one-hot to labels

# Create a DataFrame to store the results
result_df = pd.DataFrame({
    'id': range(778, 778 + len(test_img_ids)),  # Start IDs from 778
    'jenis': jenis_predictions,
    'warna': warna_predictions
})

# Save the results to CSV
result_df.to_csv('./base3_result/test_predictions.csv', index=False)

print("Predictions saved to 'test_predictions.csv'.")


Predictions saved to 'test_predictions.csv'.
