In [10]:
from deepface import DeepFace
import pandas as pd
from tqdm import tqdm

In [11]:
import os

facial_img_paths = []
# Available at: https://github.com/serengil/deepface/tree/master/tests/dataset
for root, directory, files in os.walk('dataset'):
    for file in files:
        if file.endswith('.jpg'):
            facial_img_paths.append(os.path.join(root, file))
facial_img_paths

['dataset/img15.jpg',
 'dataset/img38.jpg',
 'dataset/img26.jpg',
 'dataset/img24.jpg',
 'dataset/img49.jpg',
 'dataset/img21.jpg',
 'dataset/img5.jpg',
 'dataset/img34.jpg',
 'dataset/img9.jpg',
 'dataset/img4.jpg',
 'dataset/img59.jpg',
 'dataset/img41.jpg',
 'dataset/img42.jpg',
 'dataset/img29.jpg',
 'dataset/img53.jpg',
 'dataset/img6.jpg',
 'dataset/img55.jpg',
 'dataset/img12.jpg',
 'dataset/img51.jpg',
 'dataset/selfie-many-people.jpg',
 'dataset/img3.jpg',
 'dataset/img20.jpg',
 'dataset/img62.jpg',
 'dataset/img8.jpg',
 'dataset/img56.jpg',
 'dataset/img31.jpg',
 'dataset/img18.jpg',
 'dataset/img11_reflection.jpg',
 'dataset/img39.jpg',
 'dataset/img61.jpg',
 'dataset/img19.jpg',
 'dataset/img22.jpg',
 'dataset/img67.jpg',
 'dataset/img23.jpg',
 'dataset/img16.jpg',
 'dataset/img36.jpg',
 'dataset/img47.jpg',
 'dataset/img58.jpg',
 'dataset/img30.jpg',
 'dataset/img7.jpg',
 'dataset/couple.jpg',
 'dataset/img50.jpg',
 'dataset/img27.jpg',
 'dataset/img44.jpg',
 'dataset/img2

In [12]:
instances = []
for facial_img_path in tqdm(facial_img_paths):
    embedding = DeepFace.represent(img_path=facial_img_path, model_name="Facenet")[0]["embedding"]
    
    # store
    instance = [facial_img_path, embedding]
    instances.append(instance)

df = pd.DataFrame(instances, columns=['img_name', 'embedding'])
df

100%|██████████| 64/64 [01:09<00:00,  1.09s/it]


Unnamed: 0,img_name,embedding
0,dataset/img15.jpg,"[-1.101804494857788, -0.8272173404693604, -1.6..."
1,dataset/img38.jpg,"[-1.37093985080719, 0.675896406173706, -1.7434..."
2,dataset/img26.jpg,"[-0.8583675622940063, 1.235460638999939, 0.251..."
3,dataset/img24.jpg,"[-0.1612120121717453, 0.265657901763916, -1.17..."
4,dataset/img49.jpg,"[-0.8870744705200195, 1.1872832775115967, -0.5..."
...,...,...
59,dataset/img14.jpg,"[-1.1351802349090576, -0.8694204092025757, -1...."
60,dataset/img35.jpg,"[0.5659513473510742, 0.9556666612625122, -0.18..."
61,dataset/img48.jpg,"[-1.201542854309082, -0.05737030878663063, 0.4..."
62,dataset/img57.jpg,"[-0.7170809507369995, -0.5054607391357422, -0...."


In [13]:
from sqlalchemy import create_engine

# Create the engine for the SQLite database
engine = create_engine('sqlite:///facialdb.db')

# Create a connection
connection = engine.connect()

In [19]:
from sqlalchemy import create_engine, Column, Integer, String, LargeBinary, DECIMAL, ForeignKey
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, sessionmaker

# Create an engine for the SQLite database
engine = create_engine('sqlite:///facialdb.db')

# Declare a base for your classes
Base = declarative_base()

# Define the face_meta table
class FaceMeta(Base):
    __tablename__ = 'face_meta'
    
    ID = Column(Integer, primary_key=True)
    IMG_NAME = Column(String(10))
    EMBEDDING = Column(LargeBinary)  # Equivalent to BLOB

# Define the face_embeddings table
class FaceEmbedding(Base):
    __tablename__ = 'face_embeddings'
    
    ID = Column(Integer, primary_key=True)
    FACE_ID = Column(Integer, ForeignKey('face_meta.ID'))
    DIMENSION = Column(Integer)
    VALUE = Column(DECIMAL(5, 30))

    # Establish a relationship between FaceMeta and FaceEmbedding (Optional)
    face_meta = relationship("FaceMeta", backref="embeddings")

# Create all tables in the database
Base.metadata.create_all(engine)

# Session to interact with the database
Session = sessionmaker(bind=engine)
session = Session()

# Now the tables are created, and you can start adding data to them.

In [20]:
import numpy as np

# Loop through the DataFrame rows
for index, instance in tqdm(df.iterrows(), total=df.shape[0]):
    img_name = instance['img_name']
    embeddings = instance['embedding']
    embeddings = np.array(instance['embedding']) 
    
    # Insert into face_meta
    face_meta_entry = FaceMeta(ID=index, IMG_NAME=img_name, EMBEDDING=embeddings.tobytes())
    session.add(face_meta_entry)
    
    # Insert into face_embeddings for each embedding dimension
    for i, embedding in enumerate(embeddings):
        face_embedding_entry = FaceEmbedding(FACE_ID=index, DIMENSION=i, VALUE=embedding)
        session.add(face_embedding_entry)

# Commit the changes to the database
session.commit()

100%|██████████| 64/64 [00:00<00:00, 308.62it/s]


In [22]:
# Define the path to the target image
target_img_path = 'target.png'

# Extract the face from the target image
target_img = DeepFace.extract_faces(img_path=target_img_path)[0]["face"]

# Generate the embedding for the target image using the Facenet model
target_embedding = DeepFace.represent(img_path=target_img_path, model_name="Facenet")[0]["embedding"]

In [23]:
from sqlalchemy import text
from sqlalchemy.orm import sessionmaker

# Build the target_statement dynamically
target_statement = ""
for i, value in enumerate(target_embedding):
    target_statement += f"SELECT {i} AS dimension, {value} AS value"  # sqlite version
    if i < len(target_embedding) - 1:
        target_statement += " UNION ALL "

# Build the main SQL query with the dynamic target_statement
select_statement = f'''
    SELECT *
    FROM (
        SELECT img_name, SUM(subtract_dims) AS distance_squared
        FROM (
            SELECT img_name, (source - target) * (source - target) AS subtract_dims
            FROM (
                SELECT meta.img_name, emb.value AS source, target.value AS target
                FROM face_meta meta
                LEFT JOIN face_embeddings emb
                ON meta.id = emb.face_id
                LEFT JOIN (
                    {target_statement}  
                ) target
                ON emb.dimension = target.dimension
            )
        )
        GROUP BY img_name
    )
    WHERE distance_squared < 100
    ORDER BY distance_squared ASC
'''

# Execute the query
result = session.execute(text(select_statement))

# Fetch and print results
for row in result:
    print(row)

('dataset/img2.jpg', 48.28122236091167)
('dataset/img10.jpg', 56.29306353671703)
('dataset/img5.jpg', 61.69127490665663)
('dataset/img7.jpg', 63.07487524950536)
('dataset/img4.jpg', 66.80818870755718)
('dataset/img11.jpg', 73.03898150872323)
('dataset/img6.jpg', 73.70621119502981)
('dataset/img1.jpg', 81.80284892899088)
('dataset/img11_reflection.jpg', 82.4653085681509)


In [24]:
import math
import pandas as pd
from sqlalchemy import text

# Execute the SQLAlchemy query
results = session.execute(text(select_statement))

# Process the results and calculate distances
instances = []
for result in results:
    img_name = result[0]
    distance_squared = result[1]
    
    # Calculate the square root of the distance
    instance = [img_name, math.sqrt(distance_squared)]
    instances.append(instance)

# Create a DataFrame from the instances list
result_df = pd.DataFrame(instances, columns=['img_name', 'distance'])
session.close()

result_df

Unnamed: 0,img_name,distance
0,dataset/img2.jpg,6.948469
1,dataset/img10.jpg,7.50287
2,dataset/img5.jpg,7.854379
3,dataset/img7.jpg,7.941969
4,dataset/img4.jpg,8.173628
5,dataset/img11.jpg,8.546285
6,dataset/img6.jpg,8.585232
7,dataset/img1.jpg,9.044493
8,dataset/img11_reflection.jpg,9.081041
