In [48]:
import os,csv
import numpy as np
import tensorflow as tf
import keras
from mtcnn import MTCNN
from numpy import asarray
import matplotlib.pyplot as plt
from PIL import Image

from keras_vggface.vggface import VGGFace
from tensorflow.keras.layers import Dropout,Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from keras_vggface.utils import preprocess_input
from PIL import Image
import hashlib
import io
import threading
from skimage.filters import threshold_otsu


In [8]:
def extract_face(filename, required_size=(224, 224)):
    
    pixels = plt.imread(filename)
    
    detector = MTCNN()
	
    results = detector.detect_faces(pixels)
    x1, y1, width, height = results[0]['box']
    x2, y2 = x1 + width, y1 + height
	
    face = pixels[y1:y2, x1:x2]
	
    image = Image.fromarray(face)
    image = image.resize(required_size)
    face_array = asarray(image)
    face_array = face_array.astype('float32')
    
    img_array = np.expand_dims(face_array, axis=0)
    img_array = preprocess_input(img_array, version=2)
    
    return img_array,image


* **Model Definition**

In [70]:
# Charger le modèle SENet50 pré-entraîné
model = VGGFace(model='senet50', include_top=False, input_shape=(224, 224, 3), pooling='max')
model.trainable = False

dropout1 = Dropout(0.3)(model.output)
    
latent_layer = Dense(64,activation='sigmoid',name='latent')(dropout1)
    
dropout2 = Dropout(0.2)(latent_layer)
    
dense_layer = Dense(53, activation='softmax',name='softmax')(dropout2)
    
vgg_face = Model(inputs=model.input,outputs=dense_layer)

* **Data Preparation** (X and Y)

In [None]:
X = []
Y = []

def save_only_faces(root_dir, target_dir):
    for folder_name in os.listdir(root_dir):
        folder_path = os.path.join(root_dir, folder_name)
        if os.path.isdir(folder_path):
            for file in os.listdir(folder_path):
                if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(folder_path, file)
                    
                    # Construct the relative path for saving
                    img_path_without_root = os.path.join(folder_name, file)
                    save_folder = os.path.join(target_dir, folder_name)
                    save_path = os.path.join(save_folder, file)
                    
                    # Ensure the target folder exists
                    os.makedirs(save_folder, exist_ok=True)
                    
                    # Extract the face and save it
                    face_array, face = extract_face(img_path)
                    face.save(save_path)
                    X.append(face_array)

# Directories
root_directory = 'gt_db'  
target_dir = "faces"

# Run the function
save_only_faces(root_directory, target_dir)




In [67]:
from tensorflow.keras.utils import to_categorical

Y = [i for i in range(0, 53) for _ in range(15)]
print(Y)
del Y[753:756]
del Y[763:768]
del Y[776:781]
print(Y)

num_classes = list(set(Y))
num_classes = (len(num_classes))

# ----------------- X -----------------
X = np.array(X)
# X = np.squeeze(X, axis=1)
print(len(X))
print("New shape of X:", X.shape)
# ----------------- Y -----------------
Y = np.array(Y)  # Ensure labels are in array format
Y = to_categorical(Y, num_classes=num_classes)
print(len(Y))
#--------------------------------------




[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 1

* **Model Training**

In [68]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


In [71]:
vgg_face.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = vgg_face.fit(X_train, Y_train, shuffle = True, epochs=10, batch_size=32, validation_data=(X_test, Y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


* Saving the model's weights


In [79]:
vgg_face.save('vgg_face_model.h5')


* **Make Prediction**

In [78]:
model = tf.keras.models.load_model('vgg_face_model.h5')
# Load a new image and preprocess it
new_image_path = r'C:\Users\LENOVO\Desktop\ETUDES\Master DS\S2\Deep Learning\Face-Recognition--DL\Model\gt_db\s01\04.jpg'
new_face_array, _ = extract_face(new_image_path)

# Make a prediction
prediction = model.predict(new_face_array)

# Get the predicted class
predicted_class = np.argmax(prediction, axis=1)
print(f'Predicted class: {predicted_class}')


Predicted class: [0]


* **Removing classification layer**

In [None]:
vgg_face = tf.keras.models.load_model('vgg_face_model.h5')
# Remove the classification part from the trained model
vgg_face_64 = Model(inputs=vgg_face.input, outputs=vgg_face.get_layer('latent').output)

# ------------------ Save the model ------------------
# vgg_face_64.save('vgg_face_model_64.h5')


* Predict vectors of 64 

In [319]:
vgg_face_64 = tf.keras.models.load_model('vgg_face_model_64.h5')

def hashcode_predict(img_path):
    new_face_array, _ = extract_face(img_path)
    vect = vgg_face_64.predict(new_face_array)
    hashcode = np.where(vect >= 0.5, 1, 0)
    return hashcode

# Load a new image and preprocess it
new_image_path1 = r"C:\Users\LENOVO\Desktop\ETUDES\Master DS\S2\Deep Learning\Face-Recognition--DL\src\main\resources\static\image\gt_db\s50\07.jpg"
new_image_path2 = r"C:\Users\LENOVO\Desktop\ETUDES\Master DS\S2\Deep Learning\Face-Recognition--DL\src\main\resources\static\image\gt_db\s50\08.jpg"

vect1 = hashcode_predict(new_image_path1)
vect2 = hashcode_predict(new_image_path2)

print(vect1)
print(vect2)




[[1 0 0 0 0 1 0 1 1 0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 1 1 1 1 1 1 1 0 1 1 1 0
  0 1 0 0 0 1 0 0 0 1 1 0 0 1 1 1 0 1 0 0 1 0 1 0 0 0 0 1]]
[[1 0 0 0 0 1 0 1 1 0 1 1 0 1 1 0 0 0 1 0 0 0 1 0 1 1 1 0 1 1 1 0 1 1 1 1
  0 1 0 0 0 1 0 0 0 1 1 0 0 1 1 1 0 1 0 0 1 1 1 0 0 0 0 1]]


In [328]:
import reedsolo

# Initialiser le codeur Reed-Solomon
rs = reedsolo.RSCodec(64)  # Capable de corriger 10 erreurs

# Convert numpy array to bytes
hashcode_bytes = vect1.tobytes()
hashcode_bytes2 = vect2.tobytes()
print(len(hashcode_bytes))
print(vect1.tobytes())
# Encodage du hashcode
encoded_hashcode = rs.encode(hashcode_bytes)  # hashcode est une chaîne de 128 bits
encoded_hashcode2 = rs.encode(hashcode_bytes2)  # hashcode est une chaîne de 128 bits
print(len(encoded_hashcode))

print("Hashcode encodé avec Reed-Solomon :", encoded_hashcode)
diff = [i for i in range(len(encoded_hashcode)) if encoded_hashcode[i] != encoded_hashcode2[i]]
print("Bits différents entre les deux hashcodes :", diff)

256
b'\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x

Compare

In [304]:
diff = np.sum(vect1 != vect2)
print(diff) 
    
print(type(vect1))

18
<class 'numpy.ndarray'>


Generate the hashcode for each person 

In [290]:
results = {}
# Directories
root_dir = 'gt_db'
    
for folder_name in os.listdir(root_dir):
        folder_path = os.path.join(root_dir, folder_name)
        if os.path.isdir(folder_path):
            vectors = []
            for file in os.listdir(folder_path):
                if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(folder_path, file)
                    
                    # Extract the face and get the 64-dimensional vector
                    face_array, _ = extract_face(img_path)
                    vector_64 = vgg_face_64.predict(face_array)
                    vectors.append(vector_64[0])
            
            if vectors:
                # Calculate the mean vector for the subdirectory
                mean_vector = np.mean(vectors, axis=0)
                
                # Binarize the vector
                binarized_vector = np.where(mean_vector >= 0.5, 1, 0)
                
                # Store the result
                results[folder_path] = binarized_vector
    






infos


In [291]:
names = [
    "Abdelaziz", "Abdeljalil", "Karim", "Abdelkhalek", "Aziza", "Abdelmajid", "Aymane", "Abdelmounaim", "Abdelouahab",
    "Sara", "Abdelrazak", "Abderrahim", "Abderrazak", "Abdeslam", "Nour", "Abir", "Achraf", "Adil", "Afif", "Ahmed",
    "Salma", "Akram", "Alaeddine", "Alae", "Ali", "Sanae", "Anas", "Anouar", "Assem", "Boutaina",
    "Aziz", "Badr", "Bahir", "Bashir", "Bilal", "Chafik", "Chakib", "Driss", "Elhadi", "Elmehdi",
    "Faouzi", "Fouad", "karima", "Hamid", "Hamza", "Hassan", "Hatim", "Hicham", "Idriss", "Ilyas","MAROUAN CHOTTA","YASSINE OUTSILA","OUSSAMA ETTAOUIL","YASSINE BOUHARMACHE"
]

jobs= [
    "Project Manager","Business Analyst","Operations Manager","Marketing Manager","Sales Representative","Customer Service Manager","Financial Analyst",
    "Human Resources Coordinator","Product Developer","Logistics Specialist","Supply Chain Manager","Quality Control Inspector",
    "Research Scientist","Regulatory Affairs Specialist","Compliance Officer","Data Analyst","Operations Coordinator",
    "Inventory Manager","Procurement Specialist","Production Supervisor","Environmental Health and Safety Manager",
    "Industrial Engineer","Maintenance Technician","Packaging Designer","Technical Support Specialist","IT Systems Administrator","Software Developer",
    "Network Engineer","Cybersecurity Analyst","UX/UI Designer","Content Manager","Brand Strategist","Social Media Coordinator",
    "Event Planner","Public Relations Specialist","Legal Counsel","Risk Manager","Business Development Manager","Project Coordinator","Training Specialist",
    "Health and Wellness Coordinator","Account Manager","Financial Planner","Customer Success Manager","Sales Engineer","Operations Analyst",
    "Procurement Manager","Quality Assurance Engineer","Research Analyst","Regulatory Compliance Manager","DATA SCIENTIST","DATA SCIENTIST","DATA SCIENTIST","DATA SCIENTIST"
]

ages = [
    33, 26, 27, 25, 62, 30, 23, 28, 33, 26,
    25, 29, 37, 24, 39, 34, 41, 23, 27, 37,
    35, 35, 26, 36, 40, 26, 24, 31, 32, 25,
    34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
    44, 45, 25, 26, 27, 28, 29, 30, 29, 41,
    22,22,22,24
]





* **Create the Database** (save to CVS)

In [293]:
import csv

# Define the CSV file path
csv_file = 'results.csv'
i=53
# Write the results dictionary to the CSV file
with open(csv_file, mode='a', newline='') as file:
        writer = csv.writer(file)
        for folder_path, vector in results.items():
            writer.writerow([f"{folder_path}",vector,names[i], ages[i], jobs[i]])
            i+=1


* **Find a match** (for future input images)

In [298]:
import numpy as np
import os
from tqdm import tqdm
import pandas as pd


csv_file = 'results.csv'
data = pd.read_csv(csv_file)
matched_person = {}

# Function to compare vectors and show information if the difference is less than the threshold
def compare_and_show_info(image_path, threshold=5):
    
    vect_64 = hashcode_predict(image_path)
    
    for index, row in (data.iterrows()):
        csv_vector = row[1]
        vector_str_clean = csv_vector.replace("\n", "").replace("[", "").replace("]", "").strip()
        csv_vector = np.array(list(map(int, vector_str_clean.split())), dtype=int)
        
        differences = np.sum(vect_64 != csv_vector) 
        
        if differences < threshold:
            matched_person = {
            'Name': row[2],
            'Age': row[3],
            'Job': row[4]
            }
            return matched_person
    return None

# Example 
image_path = r"C:\Users\LENOVO\Desktop\ETUDES\Master DS\S2\Deep Learning\Face-Recognition--DL\Model\gt_db\s54\01.jpg"
person = compare_and_show_info(image_path)
print(person)

None


  csv_vector = row[1]
