In [27]:
import os
import cv2
import numpy as np
import pandas as pd
import time

from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam


In [None]:
import shutil
import random

# Paths
base_dir = "Face_dataset/Resized_rgb(64X64)"
train_dir = "Face_dataset/Train"
test_dir = "Face_dataset/Test"

split_ratio = 0.8  # 80% train, 20% test

# Make folders
for d in [train_dir, test_dir]:
    os.makedirs(d, exist_ok=True)

# For each person folder
for person in os.listdir(base_dir):
    person_path = os.path.join(base_dir, person)
    if not os.path.isdir(person_path):
        continue

    images = [f for f in os.listdir(person_path) if f.lower().endswith(".jpg")]
    random.shuffle(images)

    split_point = int(len(images) * split_ratio)
    train_imgs = images[:split_point]
    test_imgs = images[split_point:]

    # Create person folders inside train/test
    os.makedirs(os.path.join(train_dir, person), exist_ok=True)
    os.makedirs(os.path.join(test_dir, person), exist_ok=True)

    # Copy files
    for img in train_imgs:
        shutil.copy(os.path.join(person_path, img),
                    os.path.join(train_dir, person, img))
    for img in test_imgs:
        shutil.copy(os.path.join(person_path, img),
                    os.path.join(test_dir, person, img))

print("\n Dataset split done successfully!")



 Dataset split done successfully!


In [21]:
def load_images_from_folder(folder, target_size=(64,64)):
    persons = sorted([d for d in os.listdir(folder) if os.path.isdir(os.path.join(folder, d))])
    X, y = [], []
    for idx, p in enumerate(persons):
        p_folder = os.path.join(folder, p)
        for fname in os.listdir(p_folder):
            if not fname.lower().endswith(('.jpg','.jpeg','.png')):
                continue
            path = os.path.join(p_folder, fname)
            img = cv2.imread(path)
            if img is None:
                continue
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)          # convert BGR->RGB
            img = cv2.resize(img, (target_size[1], target_size[0]))  # cv2 resize w,h
            X.append(img)
            y.append(idx)
    X = np.array(X, dtype=np.float32)
    y = np.array(y, dtype=np.int32)
    return X, y, persons


In [36]:
def build_cnn(input_shape, num_classes, num_conv_blocks=3, dropout_rate=0.0):
    model = Sequential()
    filters = 32
    for i in range(num_conv_blocks):
        if i == 0:
            model.add(Conv2D(filters, (3,3), activation='relu', padding='same', input_shape=input_shape))
        else:
            model.add(Conv2D(filters, (3,3), activation='relu', padding='same'))
        model.add(BatchNormalization())
        model.add(MaxPooling2D((2,2)))
        filters *= 2

    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    if dropout_rate > 0:
        model.add(Dropout(dropout_rate))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(optimizer=Adam(1e-3),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model


In [37]:
def train_and_eval(X_train, y_train, X_test, y_test,
                   num_conv_blocks=3, dropout_rate=0.0,
                   epochs=10, batch_size=32, verbose=1):
    input_shape = X_train.shape[1:]
    num_classes = len(np.unique(y_train))

    # normalize pixel values 0..1
    X_train_norm = X_train.astype(np.float32) / 255.0
    X_test_norm = X_test.astype(np.float32) / 255.0

    model = build_cnn(input_shape, num_classes, num_conv_blocks=num_conv_blocks, dropout_rate=dropout_rate)

    start = time.time()
    history = model.fit(X_train_norm, y_train,
                        validation_data=(X_test_norm, y_test),
                        epochs=epochs, batch_size=batch_size, verbose=verbose)
    end = time.time()

    runtime = end - start

    train_loss, train_acc = model.evaluate(X_train_norm, y_train, verbose=0)
    test_loss, test_acc = model.evaluate(X_test_norm, y_test, verbose=0)

    y_pred = np.argmax(model.predict(X_test_norm), axis=1)
    report = classification_report(y_test, y_pred, zero_division=0)

    return {
        "model": model,
        "history": history,
        "runtime_sec": runtime,
        "train_acc": float(train_acc),
        "test_acc": float(test_acc),
        "report": report
    }


In [24]:
train_root = "Face_dataset/Train"   # path to Train folders
test_root  = "Face_dataset/Test"    # path to Test folders

sizes = [(32,32), (64,64), (128,128)]
conv_blocks_options = [2, 3, 4]
dropout_options = [0.0, 0.5]
EPOCHS = 10
BATCH_SIZE = 32

results = []

for size in sizes:
    print("\n" + "="*60)
    print(f"Starting experiments for image size: {size}")
    # Load train and test for this size
    X_train, y_train, persons = load_images_from_folder(train_root, target_size=size)
    X_test, y_test, _ = load_images_from_folder(test_root, target_size=size)

    # Check counts
    print(f"Loaded Train: {X_train.shape}, Test: {X_test.shape} for size {size}")

    for num_conv in conv_blocks_options:
        for drop in dropout_options:
            print(f"\nRunning: size={size} conv_blocks={num_conv} dropout={drop}")
            res = train_and_eval(X_train, y_train, X_test, y_test,
                                 num_conv_blocks=num_conv, dropout_rate=drop,
                                 epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)
            results.append({
                "image_size": f"{size[0]}x{size[1]}",
                "conv_blocks": num_conv,
                "dropout": drop,
                "runtime_sec": round(res["runtime_sec"], 1),
                "train_acc": round(res["train_acc"]*100, 2),
                "test_acc": round(res["test_acc"]*100, 2),
                "report": res["report"]
            })
            # free some memory (optional)
            tf.keras.backend.clear_session()



Starting experiments for image size: (32, 32)
Loaded Train: (2132, 32, 32, 3), Test: (1241, 32, 32, 3) for size (32, 32)

Running: size=(32, 32) conv_blocks=2 dropout=0.0
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step


Running: size=(32, 32) conv_blocks=2 dropout=0.5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step

Running: size=(32, 32) conv_blocks=3 dropout=0.0


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step

Running: size=(32, 32) conv_blocks=3 dropout=0.5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step

Running: size=(32, 32) conv_blocks=4 dropout=0.0


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step

Running: size=(32, 32) conv_blocks=4 dropout=0.5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step

Starting experiments for image size: (64, 64)
Loaded Train: (2132, 64, 64, 3), Test: (1241, 64, 64, 3) for size (64, 64)

Running: size=(64, 64) conv_blocks=2 dropout=0.0


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 47ms/step

Running: size=(64, 64) conv_blocks=2 dropout=0.5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 48ms/step

Running: size=(64, 64) conv_blocks=3 dropout=0.0


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 65ms/step

Running: size=(64, 64) conv_blocks=3 dropout=0.5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 45ms/step

Running: size=(64, 64) conv_blocks=4 dropout=0.0


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 70ms/step

Running: size=(64, 64) conv_blocks=4 dropout=0.5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 65ms/step

Starting experiments for image size: (128, 128)
Loaded Train: (2132, 128, 128, 3), Test: (1241, 128, 128, 3) for size (128, 128)

Running: size=(128, 128) conv_blocks=2 dropout=0.0


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 635ms/step

Running: size=(128, 128) conv_blocks=2 dropout=0.5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 423ms/step

Running: size=(128, 128) conv_blocks=3 dropout=0.0


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 377ms/step

Running: size=(128, 128) conv_blocks=3 dropout=0.5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 536ms/step

Running: size=(128, 128) conv_blocks=4 dropout=0.0


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 498ms/step

Running: size=(128, 128) conv_blocks=4 dropout=0.5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 810ms/step


In [25]:
df = pd.DataFrame(results)
df = df[["image_size","conv_blocks","dropout","runtime_sec","train_acc","test_acc"]]
df_sorted = df.sort_values(["image_size","conv_blocks","dropout"])
print("\n Experiment summary:  \n")
display(df_sorted)

# Find best by test accuracy
best_idx = df_sorted["test_acc"].idxmax()
best_row = df_sorted.loc[best_idx]
print("\nBest setting by test accuracy:")
print(best_row.to_dict())

# Print classification report for that best setting
print("\nClassification report for best setting:")
# find matching report in results list
for r in results:
    if (r["image_size"] == best_row["image_size"] and
        r["conv_blocks"] == best_row["conv_blocks"] and
        r["dropout"] == best_row["dropout"]):
        print(r["report"])
        break



 Experiment summary:  



Unnamed: 0,image_size,conv_blocks,dropout,runtime_sec,train_acc,test_acc
12,128x128,2,0.0,992.4,100.0,100.0
13,128x128,2,0.5,906.6,99.67,99.68
14,128x128,3,0.0,908.1,99.72,99.68
15,128x128,3,0.5,1177.1,96.2,96.21
16,128x128,4,0.0,1059.8,98.78,98.79
17,128x128,4,0.5,1492.4,98.36,98.47
0,32x32,2,0.0,78.4,100.0,100.0
1,32x32,2,0.5,65.7,99.95,100.0
2,32x32,3,0.0,65.2,100.0,100.0
3,32x32,3,0.5,64.6,99.91,99.92



Best setting by test accuracy:
{'image_size': '128x128', 'conv_blocks': 2, 'dropout': 0.0, 'runtime_sec': 992.4, 'train_acc': 100.0, 'test_acc': 100.0}

Classification report for best setting:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       264
           1       1.00      1.00      1.00       166
           2       1.00      1.00      1.00       336
           3       1.00      1.00      1.00       357
           4       1.00      1.00      1.00       118

    accuracy                           1.00      1241
   macro avg       1.00      1.00      1.00      1241
weighted avg       1.00      1.00      1.00      1241

