In [1]:
import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import ssl

import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

print("TensorFlow Version:", tf.__version__)

TensorFlow Version: 2.20.0


In [3]:
ORIGINAL_TRAIN_DIR = '../dataset/train'
ORIGINAL_TEST_DIR = '../dataset/test'

In [4]:
PROCESSED_TRAIN_DIR = 'train_sorted'
PROCESSED_TEST_DIR = 'test_sorted'

In [5]:
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 50
NUM_CLASSES = 3
CLASS_LABELS = ['Minor', 'Middle-Aged', 'Senior']

In [6]:
def get_class_name_from_age(age):
    if 1 <= age <= 17:
        return 'Minor'
    elif 18 <= age <= 60:
        return 'Middle-Aged'
    else:
        return 'Senior'

def restructure_data(original_dir, new_dir):
    if os.path.exists(new_dir):
        print(f"Directory '{new_dir}' already exists. Skipping restructuring.")
        return

    print(f"Creating new sorted directory: {new_dir}")
    os.makedirs(new_dir, exist_ok=True)
    for label in CLASS_LABELS:
        os.makedirs(os.path.join(new_dir, label), exist_ok=True)

    folders = sorted(os.listdir(original_dir))
    for folder_name in tqdm(folders, desc=f"Restructuring {os.path.basename(original_dir)}"):
        if folder_name.isdigit():
            age = int(folder_name)
            class_name = get_class_name_from_age(age)
            
            original_folder_path = os.path.join(original_dir, folder_name)
            for image_name in os.listdir(original_folder_path):
                original_image_path = os.path.join(original_folder_path, image_name)
                new_image_path = os.path.join(new_dir, class_name, image_name)
                shutil.copy(original_image_path, new_image_path)

# Run the restructuring process
restructure_data(ORIGINAL_TRAIN_DIR, PROCESSED_TRAIN_DIR)
restructure_data(ORIGINAL_TEST_DIR, PROCESSED_TEST_DIR)

Directory 'train_sorted' already exists. Skipping restructuring.
Directory 'test_sorted' already exists. Skipping restructuring.


In [7]:
print("Creating data generators...")
train_ds = tf.keras.utils.image_dataset_from_directory(
    PROCESSED_TRAIN_DIR,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    PROCESSED_TRAIN_DIR,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE
)


test_ds = tf.keras.utils.image_dataset_from_directory(
    PROCESSED_TEST_DIR,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=False  # Keep shuffle False for evaluation
)


class_names = train_ds.class_names
print("Classes found by generator:", class_names)


AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

print("Data generators created successfully.")

Creating data generators...
Found 46584 files belonging to 3 classes.
Using 37268 files for training.
Found 46584 files belonging to 3 classes.
Using 9316 files for validation.
Found 46584 files belonging to 3 classes.
Classes found by generator: ['Middle-Aged', 'Minor', 'Senior']
Data generators created successfully.


In [8]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))

# Freeze the layers of the base model
base_model.trainable = False

# Create the full model
inputs = tf.keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = tf.keras.applications.resnet50.preprocess_input(inputs)
x = base_model(x, training=False)
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)

model = Model(inputs, outputs)

model.summary()

In [9]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model_checkpoint = ModelCheckpoint('best_resnet_model.keras', save_best_only=True, monitor='val_loss')

history = model.fit(train_ds,
                    epochs=EPOCHS,
                    validation_data=val_ds,
                    callbacks=[early_stopping, model_checkpoint])

Epoch 1/50
[1m1165/1165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1638s[0m 1s/step - accuracy: 0.8882 - loss: 0.3786 - val_accuracy: 0.9032 - val_loss: 0.3236
Epoch 2/50
[1m1165/1165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1641s[0m 1s/step - accuracy: 0.8967 - loss: 0.3271 - val_accuracy: 0.9051 - val_loss: 0.3143
Epoch 3/50
[1m1165/1165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1623s[0m 1s/step - accuracy: 0.9019 - loss: 0.3106 - val_accuracy: 0.9074 - val_loss: 0.3098
Epoch 4/50
[1m1165/1165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1599s[0m 1s/step - accuracy: 0.9050 - loss: 0.2951 - val_accuracy: 0.9061 - val_loss: 0.3181
Epoch 5/50
[1m1165/1165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1477s[0m 1s/step - accuracy: 0.9070 - loss: 0.2812 - val_accuracy: 0.9079 - val_loss: 0.3154
Epoch 6/50
[1m1165/1165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1475s[0m 1s/step - accuracy: 0.9111 - loss: 0.2662 - val_accuracy: 0.9089 - val_loss: 0.3108
Epoc