<h3>Check all Images are of a valid format</h3>

In [None]:
import os
from PIL import Image

def check_images(directory):
    bad_images = []
    for subdir, _, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(subdir, file)
            try:
                img = Image.open(file_path)
                img.verify()
            except Exception as e:
                print(f"Invalid image: {file_path} - {e}")
                bad_images.append(file_path)
    return bad_images

# Specify your LFW dataset path
lfw_path = r"C:\College_programs\MVT_CNN\lfw_subset"
bad_images = check_images(lfw_path)

if bad_images:
    print("Found invalid images:")
    for img in bad_images:
        print(img)
else:
    print("All images are valid.")

<h3>Delete Invalid Images</h3>

In [13]:
import os
import tensorflow as tf

def delete_invalid_images(image_dir, log_file="deleted_images.log"):
    with open(log_file, "w") as log:
        for subdir, _, files in os.walk(image_dir):
            for file in files:
                file_path = os.path.join(subdir, file)
                try:
                    img = tf.io.read_file(file_path)
                    img = tf.io.decode_jpeg(img)  # Adjust based on your image format
                except Exception as e:
                    print(f"Deleting invalid image: {file_path} ({e})")
                    os.remove(file_path)
                    log.write(f"{file_path}\n")  # Log deleted image path

# Usage
delete_invalid_images(r"C:\College_programs\MVT_CNN\lfw")

<h3>Create a LFW Subset of 100 random classes</h3>

In [None]:
import os
import random
import shutil

# Define path to your original LFW dataset
lfw_path = r"C:\College_programs\MVT_CNN\lfw"  # Replace with your actual path
# Define path for your new subset dataset
subset_path = r"C:\College_programs\MVT_CNN\lfw_subset_5"  # Path for new subset

# Step 1: Select 100 random unique classes
all_classes = os.listdir(lfw_path)  # List all class directories (individuals)
random_classes = random.sample(all_classes, 500)  # Randomly select 100 classes

# Step 2: Create new directory structure and move images
if not os.path.exists(subset_path):
    os.makedirs(subset_path)

for class_name in random_classes:
    class_dir = os.path.join(lfw_path, class_name)
    if os.path.isdir(class_dir):  # Check if it's a directory
        # Create a corresponding directory in the subset path
        new_class_dir = os.path.join(subset_path, class_name)
        os.makedirs(new_class_dir, exist_ok=True)
        
        # Move all images from original class directory to new class directory
        for img_file in os.listdir(class_dir):
            src_file = os.path.join(class_dir, img_file)
            shutil.move(src_file, new_class_dir)  # Use move instead of copy

print(f"Moved {len(random_classes)} classes to {subset_path}")

Moved 18 classes to C:\College_programs\MVT_CNN\lfw_subset


<h3>Preprocess the LFW Subset, rescale, train test split, load generators and get the names of the classes</h3>

In [29]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define path to your LFW dataset
lfw_path = r"C:\College_programs\MVT_CNN\lfw_subset"  # Replace with your actual path

# Create ImageDataGenerator for LFW
lfw_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.4)

# Load training data
lfw_train_generator = lfw_datagen.flow_from_directory(
    lfw_path,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    subset='training'
)

# Load validation data
lfw_validation_generator = lfw_datagen.flow_from_directory(
    lfw_path,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    subset='validation'
)

# Get number of classes from the training generator
num_classes = len(lfw_train_generator.class_indices)

# Save class indices and names to a text file
class_indices = lfw_train_generator.class_indices

# Define the output file path
output_file_path = r"C:\College_programs\MVT_CNN\lfw_subset_class_indices.txt"  # Replace with your desired output path

with open(output_file_path, 'w') as f:
    for class_name, class_index in class_indices.items():
        f.write(f"{class_index}: {class_name}\n")

print(f"Class indices saved to {output_file_path}")

Found 895 images belonging to 500 classes.
Found 224 images belonging to 500 classes.
Class indices saved to C:\College_programs\MVT_CNN\lfw_subset_class_indices.txt


<h3>Create a QMUL-SurvFace Subset of 100 random classes</h3>

In [None]:
import os
import random
import shutil

# Define path to your original QMUL-SurvFace dataset
qmul_path = r"C:\College_programs\MVT_CNN\QMUL-SurvFace\Training_Set"  # Replace with your actual path
# Define path for your new subset dataset
subset_path = r"C:\College_programs\MVT_CNN\qmul_subset"  # Path for new subset

# Step 1: Select 100 random unique classes (directories)
all_classes = os.listdir(qmul_path)  # List all class directories (identities)
random_classes = random.sample(all_classes, 100)  # Randomly select 100 classes

# Step 2: Create new directory structure and move images
if not os.path.exists(subset_path):
    os.makedirs(subset_path)

for class_name in random_classes:
    class_dir = os.path.join(qmul_path, class_name)
    if os.path.isdir(class_dir):  # Check if it's a directory
        # Create a corresponding directory in the subset path
        new_class_dir = os.path.join(subset_path, class_name)
        os.makedirs(new_class_dir, exist_ok=True)
        
        # Move all images from original class directory to new class directory
        for img_file in os.listdir(class_dir):
            src_file = os.path.join(class_dir, img_file)
            shutil.move(src_file, new_class_dir)  # Use move instead of copy

print(f"Moved {len(random_classes)} classes to {subset_path}")

Moved 13 classes to C:\College_programs\MVT_CNN\qmul_subset


<h3>Preprocess the QMUL-SurvFace Subset, rescale, train test split, load generators and get the names of the classes</h3>

In [30]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define path to your QMUL-SurvFace subset dataset
qmul_subset_path = r"C:\College_programs\MVT_CNN\qmul_subset"  # Replace with your actual path

# Step 1: Create ImageDataGenerator for preprocessing
# Create ImageDataGenerator for QMUL-SurvFace
qmul_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.3)

# Load training data from QMUL-SurvFace
qmul_train_generator = qmul_datagen.flow_from_directory(
    qmul_subset_path,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    subset='training'
)

# Load validation data from QMUL-SurvFace
qmul_validation_generator = qmul_datagen.flow_from_directory(
    qmul_subset_path,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    subset='validation'
)

# Step 3: Verify the number of classes and samples
num_classes = len(qmul_train_generator.class_indices)
print(f"Number of classes: {num_classes}")
print(f"Number of training samples: {qmul_train_generator.samples}")
print(f"Number of validation samples: {qmul_validation_generator.samples}")

# Optionally save class indices to a text file for reference
output_file_path = r"C:\College_programs\MVT_CNN\qmul_class_indices.txt"  # Replace with your desired output path

with open(output_file_path, 'w') as f:
    for class_name, class_index in qmul_train_generator.class_indices.items():
        f.write(f"{class_index}: {class_name}\n")

print(f"Class indices saved to {output_file_path}")

Found 14835 images belonging to 500 classes.
Found 5980 images belonging to 500 classes.
Number of classes: 500
Number of training samples: 14835
Number of validation samples: 5980
Class indices saved to C:\College_programs\MVT_CNN\qmul_class_indices.txt


<h3>Model architecture</h3>

In [None]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import BatchNormalization
import tensorflow as tf

# Load MobileNetV2 without the top layer (include_top=False)
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze all layers except the last 7
for layer in base_model.layers[:-10]:
    layer.trainable = False

# Add custom layers for face recognition
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling to reduce dimensions

x = Dropout(0.2)(x)  # Adjust dropout rate if needed

x = Dense(128, activation='relu')(x)
x = BatchNormalization()(x)  # Normalize activations

x = Dropout(0.25)(x)  # Adjust dropout rate if needed

predictions = Dense(num_classes, activation='softmax', kernel_regularizer=l2(0.01))(x)  # L2 regularization

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model with a lower learning rate
optimizer = Adam(learning_rate=0.0001)  # Adjusted learning rate for fine-tuning
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Set up early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

from tensorflow.keras.utils import plot_model

# Save model architecture visualization
plot_model(model,
           to_file='model_architecture.png',  # Path where the image will be saved
           show_shapes=True,                   # Show input/output shapes
           show_layer_names=True)              # Show layer names

<h3>Train the MobileNetV2 model with Imagenet weights on the LFW subset<h3>

In [32]:
import os
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard

# Set up TensorBoard logging directory
log_dir = os.path.join("logs", "fit", "lfw_model")  # Change 'model_name' as needed
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Assuming you have your data generators ready:
model.fit(
    lfw_train_generator,
    validation_data=lfw_validation_generator,
    epochs= 40,
    batch_size=16,
    callbacks=[early_stopping, tensorboard_callback]
)

Epoch 1/40
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m168s[0m 2s/step - accuracy: 0.0037 - loss: 8.3740 - val_accuracy: 0.2009 - val_loss: 7.5730
Epoch 2/40
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 2s/step - accuracy: 0.0694 - loss: 7.7422 - val_accuracy: 0.2188 - val_loss: 6.9061
Epoch 3/40
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 2s/step - accuracy: 0.1126 - loss: 7.2594 - val_accuracy: 0.2321 - val_loss: 6.5708
Epoch 4/40
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m123s[0m 2s/step - accuracy: 0.1449 - loss: 6.8657 - val_accuracy: 0.2500 - val_loss: 6.4539
Epoch 5/40
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m159s[0m 3s/step - accuracy: 0.2017 - loss: 6.5754 - val_accuracy: 0.2768 - val_loss: 6.2775
Epoch 6/40
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 3s/step - accuracy: 0.2596 - loss: 6.3027 - val_accuracy: 0.3170 - val_loss: 5.9575
Epoch 7/40
[1m56/56[0m [32m━━━━

<keras.src.callbacks.history.History at 0x15294769700>

<h3>Train the MobileNetV2 model with Imagenet weights on QMUL subset<h3>

In [35]:
import os
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard

# Set up TensorBoard logging directory
log_dir = os.path.join("logs", "fit", "qmul_model")  # Change 'qmul_model' as needed
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Assuming you have your data generators ready:
model.fit(
    qmul_train_generator,
    validation_data=qmul_validation_generator,
    epochs=25,
    batch_size=16,
    callbacks=[early_stopping, tensorboard_callback]
)

  self._warn_if_super_not_called()


Epoch 1/25
[1m928/928[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1947s[0m 2s/step - accuracy: 0.0723 - loss: 6.6401 - val_accuracy: 0.0766 - val_loss: 6.2234
Epoch 2/25
[1m928/928[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1753s[0m 2s/step - accuracy: 0.2614 - loss: 4.8975 - val_accuracy: 0.1388 - val_loss: 5.4723
Epoch 3/25
[1m928/928[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1792s[0m 2s/step - accuracy: 0.3640 - loss: 4.0573 - val_accuracy: 0.1828 - val_loss: 4.9999
Epoch 4/25
[1m928/928[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1848s[0m 2s/step - accuracy: 0.4442 - loss: 3.5176 - val_accuracy: 0.2502 - val_loss: 4.4221
Epoch 5/25
[1m928/928[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1873s[0m 2s/step - accuracy: 0.5101 - loss: 3.1579 - val_accuracy: 0.3226 - val_loss: 3.9219


<keras.src.callbacks.history.History at 0x1529609e600>

<h3>Check the accuracy of the model on the validation data from LFW and QMUL</h3>

In [37]:
# Evaluate on validation data from LFW
val_loss, val_accuracy = model.evaluate(lfw_validation_generator)
print(f'LFW_Validation accuracy: {val_accuracy:.4f}, LFW_Validation loss: {val_loss:.4f}')
val_loss, val_accuracy = model.evaluate(qmul_validation_generator)
print(f'QMUL_Validation accuracy: {val_accuracy:.4f}, QMUL_Validation loss: {val_loss:.4f}')

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 1s/step - accuracy: 0.2346 - loss: 5.4356
LFW_Validation accuracy: 0.2679, LFW_Validation loss: 5.3242
[1m374/374[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m470s[0m 1s/step - accuracy: 0.0764 - loss: 6.2715
QMUL_Validation accuracy: 0.0766, QMUL_Validation loss: 6.2234


<h3>Save the model in Keras 3 format</h3>

In [18]:
# Save the model in Keras 3 format
model.save(r"C:\College_programs\MVT_CNN\lfw_mnv2.keras")

<h3>Detect Faces in a Video using pretrained YOLOv5Face model</h3>

In [34]:
import cv2
from yolo5face.get_model import get_model

# Load your YOLOv5Face model
model = get_model("yolov5n", device="cpu")

# Open the video file
video_path = r"C:\College_programs\MVT_CNN\Adam Sandler Funniest Moments.mp4"
cap = cv2.VideoCapture(video_path)

while True:
    ret, frame = cap.read()
    
    if not ret:
        break
    
    # Resize frame for faster processing (optional)
    frame = cv2.resize(frame, (224, 224))

    # Convert frame to RGB as the model expects RGB input
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Perform inference with the specified target size
    boxes, key_points, scores = model(rgb_frame, target_size=224)

    # Process results
    for i in range(len(boxes)):
        x1, y1, x2, y2 = boxes[i]  # Get bounding box coordinates
        conf = scores[i]  # Get confidence score

        if conf > 0.5:  # Confidence threshold
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), 2)  # Draw rectangle around detected face

            # Optionally draw key points if needed (e.g., facial landmarks)
            for point in key_points[i]:
                cv2.circle(frame, (int(point[0]), int(point[1])), 3, (0, 255, 0), -1)  # Draw key points

    # Display the resulting frame with detected faces
    cv2.imshow('Video Face Detection', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('v'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()

<h3>Detect Faces in a Video using pretrained YOLOv5Face model and identify with my custom trained model</h3>

In [None]:
import cv2
import numpy as np
from yolo5face.get_model import get_model
from keras.models import load_model

# Load your YOLOv5Face model
model = get_model("yolov5n", device="cpu")

# Load your identification model (LFW model)
identification_model = load_model(r"C:\College_programs\MVT_CNN\lfw_mnv2.keras")

# Load class indices from the text file
class_indices_path = r"C:\College_programs\MVT_CNN\lfw_class_indices.txt"
class_names = {}

with open(class_indices_path, 'r') as f:
    for line in f:
        index, name = line.strip().split(': ')
        class_names[int(index)] = name  # Store in dictionary

# Open the video file
video_path = r"C:\College_programs\MVT_CNN\Adam Sandler Funniest Moments.mp4"
cap = cv2.VideoCapture(video_path)

while True:
    ret, frame = cap.read()
    
    if not ret:
        break

    # Get original dimensions for scaling later
    original_height, original_width = frame.shape[:2]

    # Resize frame for faster processing
    frame_resized = cv2.resize(frame, (224, 224))

    # Convert frame to RGB as the model expects RGB input
    rgb_frame = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB)

    # Perform inference with the specified target size
    boxes, key_points, scores = model(rgb_frame, target_size=224)

    # Prepare a list to hold detected faces for batch processing
    detected_faces = []
    face_boxes = []  # To store original bounding box coordinates
    
    # Process results
    for i in range(len(boxes)):
        x1, y1, x2, y2 = boxes[i]  # Get bounding box coordinates in resized dimensions
        conf = scores[i]  # Get confidence score

        if conf > 0.5:  # Confidence threshold
            # Scale back coordinates to original frame size
            x1 = int(x1 * (original_width / 224))
            y1 = int(y1 * (original_height / 224))
            x2 = int(x2 * (original_width / 224))
            y2 = int(y2 * (original_height / 224))

            # Draw rectangle around detected face on the original frame
            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)

            # Crop and resize the detected face for identification model
            cropped_face = frame[y1:y2, x1:x2]
            cropped_face_resized = cv2.resize(cropped_face, (224, 224))  # Resize for identification
            
            # Append the preprocessed face and its box coordinates to lists
            detected_faces.append(cropped_face_resized)
            face_boxes.append((x1, y1))  # Store top-left corner for labeling

    # If there are detected faces, run identification on the batch
    if detected_faces:
        # Convert list to numpy array and add batch dimension
        faces_array = np.array(detected_faces)
        
        # Run inference on the identification model
        predictions = identification_model.predict(faces_array)

        # Process predictions and overlay them on the video frame
        for i, prediction in enumerate(predictions):
            predicted_class_index = np.argmax(prediction)  # Get index of highest probability class
            confidence_score = np.max(prediction)  # Get confidence score for that class
            
            # Use class_names mapping to get the corresponding name
            name = class_names.get(predicted_class_index, "Unknown")  # Default to "Unknown" if not found
            
            label = f"{name}, Conf: {confidence_score:.2f}"  # Format label
            
            # Get coordinates for placing text
            x, y = face_boxes[i]
            
            # Display label near the detected face
            cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

    # Display the resulting frame with detected faces and labels
    cv2.imshow('Video Face Detection', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('v'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()