In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from keras_facenet import FaceNet
from keras.preprocessing.image import load_img, img_to_array

In [2]:
# Define a function to extract embeddings from an image
def extract_embedding(image_path, model):
    # Load and preprocess the image
    image = load_img(image_path, target_size=(160, 160), color_mode='rgb')
    image = img_to_array(image)

    # Extract the embedding using the model
    embedding = model.embeddings(np.array([image]))[0]

    # Convert the embedding to a DataFrame
    df = pd.DataFrame([embedding])

    return df

In [3]:
# Define the paths to the dataset and the output file
dataset_path = "Faces_Dataset_processed"


In [4]:
# Load the FaceNet model
embedder = FaceNet()

In [5]:
# Define a list to store the filenames
filenames = []

# Traverse the directory tree and collect the filenames
for root, dirs, files in os.walk(dataset_path):
    for file in files:
        filename = os.path.relpath(os.path.join(root, file), start=".")
        # Open the image and check its mode
        try:
            image = Image.open(filename)
            if image.mode == 'RGB':
                filenames.append(filename)
        except IOError:
            print(f'Error opening file: {filename}')

# Print the list of filenames
print(len(filenames))

# 1094

Error opening file: Faces_Dataset_processed\not_allowed\not_allowed_99.jpg
1093


In [7]:
csv_path = "celeb_embeddings.csv"

# Check if the CSV file exists and is not empty
if os.path.isfile(csv_path) and os.stat(csv_path).st_size != 0:
    print('The CSV file already exists and is not empty. Loading the embeddings.')
    embeddings_df = pd.read_csv(filename)
else:
    print('The CSV file does not exist or is empty. Creating a new one.')
    embeddings_df = pd.DataFrame()
    for filename in filenames:
        embedding = extract_embedding(filename, embedder)
        if filename.startswith(r'Faces_Dataset_processed\allowed'):
            target = 1
        elif filename.startswith(r'Faces_Dataset_processed\not_allowed'):
            target = 0
        else:
            continue  # Skip files that are not in 'allowed' or 'not_allowed' directories

        embedding['target'] = target
        embeddings_df = pd.concat([embeddings_df, embedding], ignore_index=True)

    # Store the embeddings in a CSV file
    embeddings_df.to_csv(csv_path, index=False)

# 1m40s

The CSV file does not exist or is empty. Creating a new one.


In [12]:
# Load the embeddings from the .csv
embeddings_df = pd.read_csv(csv_path)
embeddings_df.shape

(1093, 513)

In [10]:
embeddings_df['target'].sample(5)
# embeddings_df['target'].unique()

145    1
783    0
139    1
607    0
452    1
Name: target, dtype: int64