In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os
import glob as gb
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.models import Model

In [None]:
# Define Training dataset
train_dir = '../images/train'
Train_Data = tf.keras.preprocessing.image.ImageDataGenerator(
    horizontal_flip = True,
    rescale = 1 / 255.0,
).flow_from_directory(
    train_dir, 
    batch_size = 16, 
    subset = "training", 
    target_size = (224, 224), # resize image to `224 x 224`
    shuffle = False 
)

In [None]:
# List of possible classes
list(Train_Data.class_indices.keys())

In [None]:
# Display the first 16 images from the training dataset
classes = list(Train_Data.class_indices.keys()) 
plt.figure(figsize=(30, 30))

# Iterate over batches of images and labels in the training dataset
for X_batch, y_batch in Train_Data: 
    # Iterate over the first 16 samples in the batch
    for i in range(0, 16):
        plt.subplot(4, 4, i + 1)
        plt.imshow(X_batch[i])
        plt.title(classes[np.where(y_batch[i] == 1)[0][0]])
    plt.show()
    break


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ZeroPadding2D, Convolution2D, MaxPooling2D, Dropout, Flatten, Activation

def vgg_face():
    model = Sequential()
    model.add(ZeroPadding2D((1,1),input_shape=(224,224, 3)))
    model.add(Convolution2D(64, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(Convolution2D(4096, (7, 7), activation='relu'))
    model.add(Dropout(0.5))
    model.add(Convolution2D(4096, (1, 1), activation='relu'))
    model.add(Dropout(0.5))
    model.add(Convolution2D(2622, (1, 1)))
    model.add(Flatten())
    model.add(Activation('softmax'))
    return model

In [None]:
# Model Defined
model = vgg_face()
model.load_weights('../models/vgg_face_weights.h5') # Load the pre-trained weights

In [None]:
'''
    Remove the last layer of the model
    Commonly used approach in transfer learning scenario
      - extracting features from inputted data 
      - extracting features from the second-to-last layer instead of the final classification layer
'''
input_layer = model.layers[0].input
output_layer = model.layers[-2].output # second-to-last layer
model = Model(inputs=input_layer, outputs=output_layer)
model.summary()

In [None]:
# Extract feature vectors from the training data 
# By passing the training data to the model's predict function
embedding_vector = model.predict(
                        Train_Data, # Training data
                        steps=len(Train_Data), # Number of steps (batches of samples) to yield from the generator before stopping
                        verbose=1 # Verbosity mode : progress bar
                    )

In [None]:
import os

# Save the features
np.save("../dumped_model/features.npy", embedding_vector)

# Save the labels
np.save("../dumped_model/labels.npy", Train_Data.classes)

In [None]:
embedding_vector = np.load('features.npy')
y_train = np.load('labels.npy')

In [None]:
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
                                    embedding_vector, # Features
                                    y_train, # Labels
                                    test_size=0.1, # 10% of data for testing
                                    shuffle=True, # Shuffle the data
                                    stratify=y_train, # Same distribution of classes
                                    random_state=42
                                )

In [None]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train) 

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler() # Standardize features by removing the mean and scaling to unit variance
X_train = scaler.fit_transform(X_train) 
X_test = scaler.transform(X_test) 

In [None]:
from sklearn.decomposition import PCA

# Principal component analysis : Linear dimensionality reduction
pca = PCA(n_components=128) 
X_train = pca.fit_transform(X_train) 
X_test = pca.transform(X_test)

In [None]:
from sklearn.svm import SVC

# Support Vector Classification
clf = SVC(
    kernel='linear', # Linear kernel
    C=2., # Regularization parameter
    class_weight='balanced', # Adjust weights inversely proportional to class frequencies
    decision_function_shape='ovo', # One-vs-one decision function
    probability=True # Enable probability estimates
)

clf.fit(X_train, y_train)

In [None]:
# Find the classification accuracy
from sklearn.metrics import accuracy_score
y_predict = clf.predict(X_test)
print(y_predict[:5])
print(y_test[:5])
print(f'The Accuracy of VGGFace2 is {accuracy_score(y_test,y_predict)*100} %')

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_predict)
plt.figure(figsize=(10,10))
sns.heatmap(cm,  annot=True, fmt="d" ,cmap="YlGnBu")

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_predict))

In [None]:
from joblib import dump

dump(scaler, '../dumped_model/scaler.joblib') 
dump(pca, '../dumped_model/pca_model.joblib')
dump(clf, '../dumped_model/SVC.joblib') 