In [2]:
import os
import numpy as np
import pandas as pd
import cv2
from sklearn.decomposition import PCA
from deepface import DeepFace
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

# Function to preprocess images for DeepFace

def preprocess_image(image_path, target_size=(224, 224)):
    img = cv2.imread(image_path)
    if img is None:
        return None
    img_resized = cv2.resize(img, target_size)
    return img_resized

# Path to the cropped faces folder
data_folder = "../data/faces3"

def create_face_dataset(data_folder):
    face_dataset = []
    labels = []
    label_map = {}
    current_label = 0

    for person_name in os.listdir(data_folder):
        person_folder = os.path.join(data_folder, person_name)
        if os.path.isdir(person_folder):
            label_map[current_label] = person_name
            for filename in os.listdir(person_folder):
                img_path = os.path.join(person_folder, filename)
                processed_img = preprocess_image(img_path)
                if processed_img is not None:
                    face_dataset.append(processed_img.flatten())
                    labels.append(current_label)
            current_label += 1
    
    return np.array(face_dataset), np.array(labels), label_map

print("Preparing data...")
images, labels, label_map = create_face_dataset(data_folder)
print(f"Dataset loaded: {len(images)} images.")
print("Label Map:", label_map)

# Apply PCA to create Eigenfaces
pca = PCA(n_components=100, whiten=True, random_state=42)
images_pca = pca.fit_transform(images)

# Prepare labels for training
num_classes = len(label_map)
labels_categorical = to_categorical(labels, num_classes)

# Load the DeepFace Facenet model
base_model = DeepFace.build_model("Facenet")

# Add a custom classification layer
input_layer = base_model.input
x = Dense(64, activation='relu')(base_model.output)
output_layer = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

print("Training model...")
history = model.fit(images_pca, labels_categorical, epochs=20, batch_size=32, validation_split=0.2)

# Save the trained model
model_path = "../data/deepface_facenet_model.h5"
model.save(model_path)
print(f"Model saved to {model_path}")

2025-01-10 15:20:29.436314: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-10 15:20:29.715586: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-10 15:20:29.717060: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Preparing data...
Dataset loaded: 611 images.
Label Map: {0: 'matthias', 1: 'lasse', 2: 'akif', 3: 'bart', 4: 'florian', 5: 'daiane', 6: 'konrad', 7: 'senne', 8: 'michiel', 9: 'seppe', 10: 'youssef', 11: 'raul', 12: 'alper', 13: 'nelli'}


AttributeError: 'FaceNet128dClient' object has no attribute 'input'

In [None]:
# Function to predict using the model
def predict_faces(test_folder, model, pca, label_map):
    results = []

    for filename in sorted(os.listdir(test_folder)):
        img_path = os.path.join(test_folder, filename)
        processed_img = preprocess_image(img_path)
        if processed_img is None:
            continue

        img_pca = pca.transform([processed_img.flatten()])
        predictions = model.predict(img_pca)
        predicted_label = np.argmax(predictions)
        person_name = label_map[predicted_label]

        results.append({
            "filename": filename,
            "predicted_label": person_name,
            "confidence": np.max(predictions)
        })

    return results

# Paths for prediction
test_folder = "../data/faces4_test"
results = predict_faces(test_folder, model, pca, label_map)


In [None]:
# Group results by image
def group_results_by_image(results):
    grouped_data = {}
    for result in results:
        base_filename = result["filename"].split("_face")[0]
        if base_filename not in grouped_data:
            grouped_data[base_filename] = []
        grouped_data[base_filename].append(result["predicted_label"].lower())

    grouped_results = [{"filename": filename, "predicted_labels": labels} for filename, labels in grouped_data.items()]
    return grouped_results

grouped_results = group_results_by_image(results)

# Create submission file
def create_submission_csv(grouped_results, output_csv_path, test_images_folder):
    all_filenames = sorted(
        [os.path.splitext(filename)[0] for filename in os.listdir(test_images_folder) 
         if filename.endswith(('.jpg', '.jpeg', '.png'))]
    )

    grouped_dict = {group["filename"].split('.')[0]: ";".join(group["predicted_labels"]) for group in grouped_results}

    submission_data = []
    for filename in all_filenames:
        label_name = grouped_dict.get(filename, "nothing")
        submission_data.append({"image": filename, "label_name": label_name})

    submission_df = pd.DataFrame(submission_data)
    submission_df.to_csv(output_csv_path, index=False)
    print(f"Submission file saved to {output_csv_path}")

# Example usage
test_images_folder = "../data/images/test_images/cleaned_images"
output_csv_path = "../submission.csv"
create_submission_csv(grouped_results, output_csv_path, test_images_folder)


In [6]:
import os
import numpy as np
import pandas as pd
import cv2
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from deepface import DeepFace

# Function to preprocess images for DeepFace
def preprocess_image(image_path, target_size=(224, 224)):
    img = cv2.imread(image_path)
    if img is None:
        return None
    img_resized = cv2.resize(img, target_size)
    return img_resized

# Function to extract embeddings using DeepFace
def extract_embeddings(images, model_name="Facenet"):
    embeddings = []
    for img in images:
        embedding = DeepFace.represent(img, model_name=model_name, enforce_detection=False)
        embeddings.append(embedding[0]["embedding"])
    return np.array(embeddings)

# Path to the cropped faces folder
data_folder = "../data/faces3"

def create_face_dataset(data_folder):
    images = []
    labels = []
    label_map = {}
    current_label = 0

    for person_name in os.listdir(data_folder):
        person_folder = os.path.join(data_folder, person_name)
        if os.path.isdir(person_folder):
            label_map[current_label] = person_name
            for filename in os.listdir(person_folder):
                img_path = os.path.join(person_folder, filename)
                processed_img = preprocess_image(img_path)
                if processed_img is not None:
                    images.append(processed_img)
                    labels.append(person_name)
            current_label += 1

    return images, labels, label_map

print("Preparing data...")
images, labels, label_map = create_face_dataset(data_folder)
print(f"Dataset loaded: {len(images)} images.")
print("Label Map:", label_map)

# Extract embeddings using DeepFace
print("Extracting embeddings...")
embeddings = extract_embeddings(images)

# Apply PCA for dimensionality reduction
pca = PCA(n_components=100, whiten=True, random_state=42)
embeddings_pca = pca.fit_transform(embeddings)

# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Train a classifier
print("Training classifier...")
classifier = LogisticRegression(max_iter=1000, random_state=42)
classifier.fit(embeddings_pca, labels_encoded)

# Save the PCA and classifier
import joblib
joblib.dump(pca, "../data/pca_model.pkl")
joblib.dump(classifier, "../data/classifier_model.pkl")
joblib.dump(label_encoder, "../data/label_encoder.pkl")
print("Models saved.")

# Function to predict using the trained classifier
def predict_faces(test_folder, pca, classifier, label_encoder):
    results = []

    for filename in sorted(os.listdir(test_folder)):
        img_path = os.path.join(test_folder, filename)
        processed_img = preprocess_image(img_path)
        if processed_img is None:
            continue

        embedding = extract_embeddings([processed_img])[0]
        embedding_pca = pca.transform([embedding])
        predicted_label = classifier.predict(embedding_pca)[0]
        person_name = label_encoder.inverse_transform([predicted_label])[0]

        results.append({
            "filename": filename,
            "predicted_label": person_name
        })

    return results

# Load saved models for prediction
pca = joblib.load("../data/pca_model.pkl")
classifier = joblib.load("../data/classifier_model.pkl")
label_encoder = joblib.load("../data/label_encoder.pkl")

# Paths for prediction
test_folder = "../data/faces4_test"
results = predict_faces(test_folder, pca, classifier, label_encoder)

# Create submission file
def create_submission_csv(results, output_csv_path, test_images_folder):
    all_filenames = sorted(
        [os.path.splitext(filename)[0] for filename in os.listdir(test_images_folder) 
         if filename.endswith(('.jpg', '.jpeg', '.png'))]
    )

    grouped_dict = {result["filename"].split('.')[0]: result["predicted_label"] for result in results}

    submission_data = []
    for filename in all_filenames:
        label_name = grouped_dict.get(filename, "nothing")
        submission_data.append({"image": filename, "label_name": label_name})

    submission_df = pd.DataFrame(submission_data)
    submission_df.to_csv(output_csv_path, index=False)
    print(f"Submission file saved to {output_csv_path}")

# Example usage
test_images_folder = "../data/images/test_images/cleaned_images"
output_csv_path = "../submission.csv"
create_submission_csv(results, output_csv_path, test_images_folder)


Preparing data...
Dataset loaded: 611 images.
Label Map: {0: 'matthias', 1: 'lasse', 2: 'akif', 3: 'bart', 4: 'florian', 5: 'daiane', 6: 'konrad', 7: 'senne', 8: 'michiel', 9: 'seppe', 10: 'youssef', 11: 'raul', 12: 'alper', 13: 'nelli'}
Extracting embeddings...


ValueError: ('Confirm that opencv is installed on your environment! Expected path ', '/home/neyon/anaconda3/lib/python3.9/site-packages/data/haarcascade_frontalface_default.xml', ' violated.')