In [None]:
!pip install mtcnn

In [None]:
import cv2 as cv
import numpy as np
import os
import tensorflow as tf
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow
from sklearn.model_selection import GridSearchCV


os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #This variable determines the minimum severity of log messages that TensorFlow’s C++ backend will display.

In [None]:
import os

dataset_path = "/content/drive/MyDrive/Face_Recognition/Dataset"
print("Files in directory:", os.listdir(dataset_path))

# **Dataset Creation**

In [None]:
# Updated code
from mtcnn.mtcnn import MTCNN

class FACELOADING:
    def __init__(self, directory):
        self.directory = directory
        self.target_size = (160, 160)
        self.X = []
        self.Y = []

    def extract_face(self, filename):
        # Since the images are already pre-cropped and resized, we only load and convert the image.
        img = cv.imread(filename)
        # Check if image was loaded correctly.
        if img is None:
            raise ValueError(f"Image {filename} cannot be loaded.")
        # Convert BGR to RGB.
        img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
        # Optionally, you can enforce the target size:
        # img = cv.resize(img, self.target_size)
        return img

    def load_faces(self, dir):
        FACES = []
        for im_name in os.listdir(dir):
            try:
                path = os.path.join(dir, im_name)
                single_face = self.extract_face(path)
                FACES.append(single_face)
            except Exception as e:
                print(f"Error loading image {im_name}: {e}")
                pass
        return FACES

    def load_classes(self):
        for sub_dir in os.listdir(self.directory):
            path = os.path.join(self.directory, sub_dir)
            if os.path.isdir(path):
                FACES = self.load_faces(path)
                labels = [sub_dir for _ in range(len(FACES))]
                print(f"Loaded successfully: {len(labels)} images from {sub_dir}")
                self.X.extend(FACES)
                self.Y.extend(labels)

        return np.asarray(self.X), np.asarray(self.Y)

    def plot_images(self):
        plt.figure(figsize=(18,16))
        ncols = 3
        nrows = len(self.Y) // ncols + 1
        for num, image in enumerate(self.X):
            plt.subplot(nrows, ncols, num+1)
            plt.imshow(image)
            plt.axis('off')
        plt.show()




1.   X contains all the images in the form of array
2.   Y conatins all the labels associated with the image



In [None]:
faceloading=FACELOADING("/content/drive/MyDrive/Face_Recognition/Dataset")
X,Y= faceloading.load_classes()

Loaded successfully: 182 images from Musfira_Sayyed
Loaded successfully: 182 images from Yaseen Ashraf Sayyed
Loaded successfully: 154 images from Vedant Jitendra Thakur
Loaded successfully: 175 images from Taha Ashraf Sayyed
Loaded successfully: 63 images from Ritvik Santosh Shetty
Loaded successfully: 161 images from Priyanshu Rupesh Jaiswal
Loaded successfully: 84 images from Parth Sandeep Keskar
Loaded successfully: 168 images from Ayush Kantilal Patil
Loaded successfully: 175 images from Vinay Bharat Pokharkar
Loaded successfully: 189 images from Sainath Vijay Redekar
Loaded successfully: 161 images from Neeraj Nimbadas Patil
Loaded successfully: 182 images from Yash Gangasagar Verma


In [None]:
print(Y.shape)
print(X.shape)


(1876,)
(1876, 160, 160, 3)


In [None]:
!pip install keras-facenet

# **Generating Embedding of the images using pre-trained FaceNet model**

In [None]:
from keras_facenet import FaceNet
embedder = FaceNet()

def get_embedding(face_img):
    face_img = face_img.astype('float32') # 3D(160x160x3)
    face_img = np.expand_dims(face_img, axis=0)
    # 4D (Nonex160x160x3)
    yhat= embedder.embeddings(face_img)
    return yhat[0] # 512D image (1x1x512)


In [None]:
EMBEDDED_X = [] #Empty list

for img in X:
    EMBEDDED_X.append(get_embedding(img))

#After collecting all embeddings, the list is converted into a NumPy array since numpy array are faster to process

EMBEDDED_X = np.asarray(EMBEDDED_X)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46

*Compressing all the embeddings in .npz format*

In [None]:
np.savez_compressed('face_embeddig_for_4_class',EMBEDDED_X,Y)

*Converting labels into int format using label encoder*

In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
encoder.fit(Y)
Y = encoder.transform(Y)

# **Building and Training the model**

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(EMBEDDED_X, Y, shuffle=True, random_state=17)

In [None]:
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(876, 512)
(293, 512)
(876,)
(293,)


In [None]:
from sklearn.svm import SVC
model = SVC(kernel='linear', probability=True)
model.fit(X_train, Y_train)

In [None]:
import pickle
#save the model
with open('svm_model_updated.pkl','wb') as f:
    pickle.dump(model,f)

In [None]:
from sklearn.svm import SVC
# Define the parameter grid for tuning
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'kernel': ['linear', 'rbf', 'poly'],  # Different kernel types
    'gamma': ['scale', 'auto']  # Kernel coefficient for 'rbf' and 'poly'
}

# Perform Grid Search Cross Validation
grid_search = GridSearchCV(SVC(probability=True), param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, Y_train)

# Print the best hyperparameters and best accuracy
print("Best Parameters:", grid_search.best_params_)
print("Best Accuracy:", grid_search.best_score_)

# Use the best model
best_model = grid_search.best_estimator_


Best Parameters: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}
Best Accuracy: 1.0


In [None]:
#Evaluate on Test Data:
#Now, test the best model on your test set to check if it generalizes well:
test_accuracy = best_model.score(X_test, Y_test)
print("Test Accuracy:", test_accuracy)


Test Accuracy: 1.0


In [None]:
#To check for overfitting, compare the training accuracy and test accuracy
train_accuracy = best_model.score(X_train, Y_train)
print("Training Accuracy:", train_accuracy)


Training Accuracy: 1.0


✅ Your model is performing perfectly on both train and test data. Because Training accuracy and Test accuracy is equal to 1

🚀 No signs of overfitting (since the model generalizes well to unseen data)

In [None]:
import pickle
#save the model
with open('best_model_2.pkl','wb') as f:
    pickle.dump(best_model,f)

In [None]:
# import joblib
# joblib.dump(best_model, "best_svm_model.pkl")
