# Model-1 (Training on Entire Dataset)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
# suppress display of warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Set the random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
from google.colab import drive


drive.mount('/content/drive')

In [None]:
# Set your train_dir path
train_dir = '/content/drive/My Drive/Computer Vision/Face Recognition/Original Images'

In [None]:
# Using ImageDataGenerator for data augmentation
generator = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.1  # 10% of the data will be used for validation
)

In [None]:
# Load and split the data into training and validation sets
train_ds = generator.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    subset="training"  # This is for training data
)

In [None]:
val_ds = generator.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    subset="validation"  # This is for validation data
)

In [None]:
# Get the list of classes
classes = list(train_ds.class_indices.keys())
print(classes)

In [None]:
import tensorflow as tf

# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
# instantiate a distribution strategy
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.TPUStrategy(tpu)

# instantiating the model in the strategy scope creates the model on the TPU
with tpu_strategy.scope():
    # importing the libraries
    from keras.models import Model
    from keras.layers import Flatten, Dense
    from keras.applications import VGG16

    IMAGE_SIZE = [128, 128]  # we will keep the image size as (64,64). You can increase the size for better results.

    # loading the weights of VGG16 without the top layer. These weights are trained on Imagenet dataset.
    vgg = VGG16(input_shape = IMAGE_SIZE + [3], weights = 'imagenet', include_top = False)  # input_shape = (64,64,3) as required by VGG

    # this will exclude the initial layers from the training phase as they have already been trained.
    for layer in vgg.layers:
        layer.trainable = False

    x = Flatten()(vgg.output)
    x = Dense(128, activation = 'relu')(x)   # we can add a new fully connected layer but it will increase the execution time.
    x = Dense(31, activation = 'softmax')(x)  # adding the output layer with softmax function as this is a multi-label classification problem.

    model = Model(inputs = vgg.input, outputs = x)

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'],steps_per_execution=32)
    print("model created")

In [None]:
model.summary()

In [None]:
BATCH_SIZE = 16 * tpu_strategy.num_replicas_in_sync
history = model.fit(train_ds, epochs=50, validation_data=val_ds, batch_size=BATCH_SIZE)

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 3))
ax = ax.ravel()

for i, met in enumerate(['accuracy', 'loss']):
    ax[i].plot(history.history[met])
    ax[i].plot(history.history['val_' + met])
    ax[i].set_title('Model {}'.format(met))
    ax[i].set_xlabel('epochs')
    ax[i].set_ylabel(met)
    ax[i].legend(['train', 'val'])

In [None]:
train_loss, train_accuracy = model.evaluate(train_ds)
print(f"Training Accuracy: {train_accuracy*100: .2f}")

In [None]:
validation_loss, validation_accuracy = model.evaluate(val_ds)
print(f"Validation Accuracy: {train_accuracy*100: .2f}")

In [None]:
# Evaluate the model on the test set
test_ds = generator.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    subset="validation"  # Use a portion of the data for testing
)

In [None]:
# Get accuracy on the test set
test_loss, test_accuracy = model.evaluate(test_ds)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

In [None]:
model.save('FRM.h5')
from IPython.display import FileLink

# Create a download link
FileLink('FRM.h5')

# Model-2 (Face Cropped & Splited Dataset)

In [None]:
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# suppress display of warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Get the directory containing Haar cascade files
cascade_dir = cv2.data.haarcascades

# Path to the Haar cascade file for frontal face detection
cascade_file = os.path.join(cascade_dir, 'haarcascade_frontalface_default.xml')

# Check if the cascade file exists
if os.path.isfile(cascade_file):
    print("Haar cascade file found:", cascade_file)
else:
    print("Haar cascade file not found. Downloading...")
    cv2_base_url = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/"
    cascade_url = cv2_base_url + 'haarcascade_frontalface_default.xml'
    os.system(f"wget {cascade_url} -P {cascade_dir}")
    print("Haar cascade file downloaded successfully.")

# Now, you can use cascade_file as the filter_path in your code.
filter_path = cascade_file

In [None]:
# Function to detect faces and crop them from an image
def detect_and_crop_faces(image):
    face_cascade = cv2.CascadeClassifier(filter_path)
    faces = face_cascade.detectMultiScale(image, 1.3, 5)
    cropped_faces = []
    for (x, y, w, h) in faces:
        cropped_faces.append(image[y:y+h, x:x+w])
    return cropped_faces


In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Path to the dataset directory
dataset_dir = "/content/drive/My Drive/Computer Vision/Face Recognition/Original Images"
# Path to store the cropped images
cropped_dataset_dir = "/content/drive/My Drive/Computer Vision/Face Recognition/CroppedImages"
# Path to store the split train and test sets
train_dir = os.path.join(cropped_dataset_dir, "train")
test_dir = os.path.join(cropped_dataset_dir, "test")

# Create directories for train and test sets
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Common size to which all face images will be resized
common_size = (128, 128)

# Iterate through each subdirectory (each person's folder)
for subdir in os.listdir(dataset_dir):
    subdir_path = os.path.join(dataset_dir, subdir)
    if os.path.isdir(subdir_path):
        # Create corresponding subdirectories in train and test folders
        train_subdir_path = os.path.join(train_dir, subdir)
        test_subdir_path = os.path.join(test_dir, subdir)
        os.makedirs(train_subdir_path, exist_ok=True)
        os.makedirs(test_subdir_path, exist_ok=True)

        # Get the list of image files in the subdirectory
        image_files = [f for f in os.listdir(subdir_path) if f.endswith('.jpg')]

        # Iterate through each image in the subdirectory
        for image_name in image_files:
            image_path = os.path.join(subdir_path, image_name)
            # Read the image
            img = cv2.imread(image_path)
            # Detect and crop faces from the image (function detect_and_crop_faces to be defined)
            faces = detect_and_crop_faces(img)
            # Resize each face to a common size before appending to the list
            for idx, face in enumerate(faces):
                if face is not None:
                    resized_face = cv2.resize(face, common_size)
                    # Decide whether to put the image in train or test set
                    if np.random.rand() < 0.9:  # 90% train, 10% test
                        save_path = os.path.join(train_subdir_path, f"{image_name}_{idx}.jpg")
                    else:
                        save_path = os.path.join(test_subdir_path, f"{image_name}_{idx}.jpg")
                    # Save the cropped face image
                    cv2.imwrite(save_path, resized_face)
print('dataset created')

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
img = mpimg.imread('/content/drive/My Drive/Computer Vision/Face Recognition/CroppedImages/train/Elizabeth Olsen/Elizabeth Olsen_64.jpg_0.jpg')
imgplot = plt.imshow(img)

In [None]:
!pip install anytree

In [None]:
import os
from anytree import Node, RenderTree

def create_directory_tree(root_path, parent=None):
    """
    Recursively create a directory tree structure using AnyTree.
    """
    node = Node(os.path.basename(root_path), parent=parent)
    if os.path.isdir(root_path):
        for item in sorted(os.listdir(root_path)):
            item_path = os.path.join(root_path, item)
            create_directory_tree(item_path, parent=node)

def print_directory_tree(root_path):
    """
    Print the directory tree structure using AnyTree.
    """
    root = Node(os.path.basename(root_path))
    create_directory_tree(root_path, root)
    for pre, _, node in RenderTree(root):
        print("%s%s" % (pre, node.name))

# Define the root directory
root_dir = "/CroppedImages"

# Print the directory tree
print_directory_tree(root_dir)

In [None]:
train_dir = '/content/drive/My Drive/Computer Vision/Face Recognition/CroppedImages/train'
test_dir = '/content/drive/My Drive/Computer Vision/Face Recognition/CroppedImages/test'

# Using ImageDataGenerator for data augmentation
generator = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.1  # 10% of the data will be used for validation
)

# Load and split the data into training and validation sets
train_ds = generator.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    subset="training"  # This is for training data
)

val_ds = generator.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    subset="validation"  # This is for validation data
)

# Get the list of classes
classes = list(train_ds.class_indices.keys())
print("Classes in training data:", classes)

# Load test data
test_ds = generator.flow_from_directory(
    test_dir,
    target_size=(128, 128),
    batch_size=32,
    subset="training"  # This is for test data
)

# Get the list of classes in test data
test_classes = list(test_ds.class_indices.keys())
print("Classes in test data:", test_classes)


In [None]:
import tensorflow as tf

# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()

# instantiate a distribution strategy
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.TPUStrategy(tpu)

# instantiating the model in the strategy scope creates the model on the TPU
with tpu_strategy.scope():
    # importing the librariesṇ
    from keras.models import Model
    from keras.layers import Flatten, Dense
    from keras.applications import VGG16

    IMAGE_SIZE = [128, 128]  # we will keep the image size as (64,64). You can increase the size for better results.

    # loading the weights of VGG16 without the top layer. These weights are trained on Imagenet dataset.
    vgg = VGG16(input_shape = IMAGE_SIZE + [3], weights = 'imagenet', include_top = False)  # input_shape = (64,64,3) as required by VGG

    # this will exclude the initial layers from the training phase as they have already been trained.
    for layer in vgg.layers:
        layer.trainable = False

    x = Flatten()(vgg.output)
    x = Dense(128, activation = 'relu')(x)   # we can add a new fully connected layer but it will increase the execution time.
    x = Dense(31, activation = 'softmax')(x)  # adding the output layer with softmax function as this is a multi-label classification problem.

    model = Model(inputs = vgg.input, outputs = x)

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    print("model created")

In [None]:
model.summary()

In [None]:
BATCH_SIZE = 16 * tpu_strategy.num_replicas_in_sync
history = model.fit(train_ds, epochs=100, validation_data=val_ds, batch_size=BATCH_SIZE)

In [None]:
train_loss, train_accuracy = model.evaluate(train_ds)
print(f"Training Accuracy: {train_accuracy*100: .2f}")

In [None]:
validation_loss, validation_accuracy = model.evaluate(val_ds)
print(f"Validation Accuracy: {train_accuracy*100: .2f}")

In [None]:
# Evaluate the model on the test set
test_ds = generator.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    subset="validation"  # Use a portion of the data for testing
)

In [None]:
# Get accuracy on the test set
test_loss, test_accuracy = model.evaluate(test_ds)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

In [None]:
# Plot the training and validation accuracy and loss
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epochs')
plt.legend(['accuracy', 'val_accuracy', 'loss', 'val_loss'])
plt.show()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 3))
ax = ax.ravel()

for i, met in enumerate(['accuracy', 'loss']):
    ax[i].plot(history.history[met])
    ax[i].plot(history.history['val_' + met])
    ax[i].set_title('Model {}'.format(met))
    ax[i].set_xlabel('epochs')
    ax[i].set_ylabel(met)
    ax[i].legend(['train', 'val'])