In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir output_ResNet50/logs

In [1]:
import numpy as np
import os
import glob
import shutil
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import ResNet50
import time
from tensorflow.keras.callbacks import TensorBoard
import datetime

In [2]:
# Record the start time of the process
start_time = time.time()

# Parameter settings
class_num = 2  # Number of classes (male: 0, female: 1)
epochs = 1000  # Number of training epochs
batch_size = 32  # Batch size
COLOR_CHANNEL = 3  # 3 for RGB images, 1 for grayscale
INPUT_IMAGE_SIZE = 224  # Input image size

In [None]:
# Folder paths
male_folder = ''
female_folder = ''
id_image_folder = ''
output_folder = ''

# Create folders to save images with IDs
os.makedirs(id_image_folder, exist_ok=True)
male_id_folder = os.path.join(id_image_folder, 'male')
female_id_folder = os.path.join(id_image_folder, 'female')
os.makedirs(male_id_folder, exist_ok=True)
os.makedirs(female_id_folder, exist_ok=True)

# Load images and assign labels
image_data = []
labels = []
image_paths = []  # List to keep IDs of each image

# Load images from the male_hara folder and assign male labels (0)
male_images = glob.glob(f"{male_folder}/*.jpg")
for idx, file in enumerate(male_images):
    img = load_img(file, color_mode="rgb", target_size=(INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE))
    img_array = img_to_array(img)
    filename = f"male_{idx+1}.jpg"
    img.save(os.path.join(male_id_folder, filename))
    image_data.append(img_array)
    labels.append(0)  # Use 0 as the label for males
    image_paths.append(filename)  # Add a number to male image IDs

# Load images from the female_hara folder and assign female labels (1)
female_images = glob.glob(f"{female_folder}/*.jpg")
for idx, file in enumerate(female_images):
    img = load_img(file, color_mode="rgb", target_size=(INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE))
    img_array = img_to_array(img)
    filename = f"female_{idx+1}.jpg"
    img.save(os.path.join(female_id_folder, filename))
    image_data.append(img_array)
    labels.append(1)  # Use 1 as the label for females
    image_paths.append(filename)  # Add a number to female image IDs

# Convert to NumPy arrays
image_data = np.array(image_data)
labels = np.array(labels)

# Normalize images to the range [0, 1]
image_data = image_data.astype('float32')
image_data /= 255.0

# Convert labels to one-hot encoding
labels = to_categorical(labels, class_num)

# Output the number of loaded images
print(f"Male images: {len(male_images)}")
print(f"Female images: {len(female_images)}")
print(f"Total images: {len(male_images) + len(female_images)}")

In [None]:
# Data splitting settings
sss = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=0)

# Arrays to store results
result = []
f1_scores = []

# Log directory for TensorBoard
log_dir = os.path.join(output_folder, "logs", datetime.datetime.now().strftime(""))
os.makedirs(log_dir, exist_ok=True)

# Process for each fold
for fold, (train_index, test_index) in enumerate(sss.split(image_data, np.argmax(labels, axis=1))):
    # Create folder for the fold
    fold_folder = os.path.join(output_folder, f'fold_{fold+1}')
    os.makedirs(fold_folder, exist_ok=True)

    # TensorBoard settings
    tensorboard_callback = TensorBoard(log_dir=os.path.join(log_dir, f"fold_{fold+1}"), histogram_freq=1)
    
    # Create folders to save training and testing images
    train_folder = os.path.join(fold_folder, 'train_images')
    test_folder = os.path.join(fold_folder, 'test_images')
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)

    # Copy and save training and testing images
    for idx in train_index:
        src_path = os.path.join(id_image_folder, 'male' if labels[idx][0] == 1 else 'female', image_paths[idx])
        shutil.copy(src_path, os.path.join(train_folder, image_paths[idx]))
    for idx in test_index:
        src_path = os.path.join(id_image_folder, 'male' if labels[idx][0] == 1 else 'female', image_paths[idx])
        shutil.copy(src_path, os.path.join(test_folder, image_paths[idx]))
        
    # Build the model
    ResNet50_base = ResNet50(weights='imagenet', include_top=False, 
                             input_shape=(INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, COLOR_CHANNEL))
    x = GlobalAveragePooling2D()(ResNet50_base.output)
    x = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=ResNet50_base.input, outputs=x)

    # Compile the model
    model.compile(optimizer=SGD(learning_rate=0.01), 
                  loss='categorical_crossentropy', metrics=['accuracy'])

    model.summary()

    # Training
    history = model.fit(image_data[train_index], labels[train_index],
                        batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.1, callbacks=[tensorboard_callback])

    # Evaluate on test data
    test_score = model.evaluate(image_data[test_index], labels[test_index], verbose=1)
    result.append(test_score[1] * 100)

    # Calculate F1 score
    y_pred = model.predict(image_data[test_index])
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_test_classes = np.argmax(labels[test_index], axis=1)
    f1 = f1_score(y_test_classes, y_pred_classes, average='weighted')  # Calculate F1 score with weighted average
    f1_scores.append(f1)
    print(f"Fold {fold+1} - Accuracy: {test_score[1] * 100:.3f}%, F1 Score: {f1:.4f}")

    # Save the model
    model.save(os.path.join(fold_folder, f'model_fold_{fold+1}.h5'))

    # Save training history
    plt.figure()
    plt.plot(range(epochs), history.history['loss'], label='Train loss')
    plt.plot(range(epochs), history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(os.path.join(fold_folder, 'loss_plot.png'))
    plt.close()

    plt.figure()
    plt.plot(range(epochs), history.history['accuracy'], label='Train Accuracy')
    plt.plot(range(epochs), history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig(os.path.join(fold_folder, 'accuracy_plot.png'))
    plt.close()

    # Save confusion matrix
    cm = confusion_matrix(y_test_classes, y_pred_classes)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['male', 'female'])
    disp.plot(cmap='Greens', values_format='d')
    plt.savefig(os.path.join(fold_folder, 'confusion_matrix.png'))
    plt.close()

# Aggregate and display/save overall results
summary_path = os.path.join(output_folder, 'result_summary.txt')
with open(summary_path, 'w') as f:
    # Output and save results for each fold
    for i, (score, f1) in enumerate(zip(result, f1_scores), start=1):
        result_text = f'No.{i} test score = {score:.3f} %, F1 Score = {f1:.4f}'
        f.write(result_text + '\n')
        print(result_text)

    # Calculate and output/save average scores
    average_score = sum(result) / len(result)
    average_f1 = sum(f1_scores) / len(f1_scores)
    average_text = f'average score = {average_score:.3f} %, average F1 Score = {average_f1:.4f}'
    f.write(average_text + '\n')
    print(average_text)

# Record the end time of the process and calculate the elapsed time
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Total processing time: {elapsed_time:.2f} seconds")