In [8]:
import os
import numpy as np
from keras_facenet import FaceNet
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Function to preprocess images for TensorFlow
def preprocess_image(image_path, target_size=(160, 160)):
    img = tf.keras.preprocessing.image.load_img(image_path, target_size=target_size)
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array /= 255.0  # Normalize pixel values
    return img_array

# Load dataset and preprocess
def load_data(data_folder, target_size=(160, 160)):
    images = []
    labels = []
    label_map = {}
    current_label = 0

    for person_name in os.listdir(data_folder):
        person_folder = os.path.join(data_folder, person_name)
        if os.path.isdir(person_folder):
            label_map[current_label] = person_name
            for filename in os.listdir(person_folder):
                img_path = os.path.join(person_folder, filename)
                img = preprocess_image(img_path, target_size)
                images.append(img)
                labels.append(current_label)
            current_label += 1

    return np.array(images), np.array(labels), label_map

data_folder = "../data/faces3"
print("Loading dataset...")
images, labels, label_map = load_data(data_folder)
print(f"Loaded {len(images)} images across {len(label_map)} classes.")


Loading dataset...
Loaded 611 images across 14 classes.


In [9]:
# Load the pre-trained FaceNet model
facenet = FaceNet()
base_model = facenet.model
# base_model.summary()


In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Input
from tensorflow.keras.optimizers import Adam

# Freeze all layers in the base FaceNet model
for layer in base_model.layers:
    layer.trainable = False

# Add custom classification layers
x = base_model.output
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output_layer = Dense(len(label_map), activation='softmax')(x)

# Define the new model
model = Model(inputs=base_model.input, outputs=output_layer)
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# model.summary()


In [30]:
from sklearn.model_selection import train_test_split

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)

# Create data generators
train_datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True)
val_datagen = ImageDataGenerator()

# Fit data generators
train_generator = train_datagen.flow(X_train, y_train, batch_size=32)
val_generator = val_datagen.flow(X_val, y_val, batch_size=32)

# Train the model
print("Training model...")
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20
)



Training model...
Epoch 1/20


  self._warn_if_super_not_called()


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 438ms/step - accuracy: 0.2907 - loss: 2.5644 - val_accuracy: 0.7724 - val_loss: 2.2885
Epoch 2/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 313ms/step - accuracy: 0.8405 - loss: 2.1149 - val_accuracy: 0.7967 - val_loss: 1.8376
Epoch 3/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 320ms/step - accuracy: 0.8806 - loss: 1.5874 - val_accuracy: 0.8211 - val_loss: 1.4002
Epoch 4/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 327ms/step - accuracy: 0.8964 - loss: 1.1237 - val_accuracy: 0.8374 - val_loss: 1.0497
Epoch 5/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 317ms/step - accuracy: 0.9117 - loss: 0.8168 - val_accuracy: 0.8780 - val_loss: 0.8068
Epoch 6/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 317ms/step - accuracy: 0.9590 - loss: 0.5837 - val_accuracy: 0.8943 - val_loss: 0.6543
Epoch 7/20
[1m16/16[0m [32m━━━━━━━━

In [31]:
# Save the trained model
model.save("../models/facenet_transfer_learning.keras")
print("Model saved.")


Model saved.


In [32]:
# Load and preprocess test data
def load_test_data(test_folder, target_size=(160, 160)):
    test_images = []
    filenames = []
    for filename in sorted(os.listdir(test_folder)):
        img_path = os.path.join(test_folder, filename)
        img = preprocess_image(img_path, target_size)
        test_images.append(img)
        filenames.append(filename)
    return np.array(test_images), filenames

test_folder = "../data/faces4_test"
test_images, test_filenames = load_test_data(test_folder)

# Make predictions
predictions = model.predict(test_images)
predicted_labels = np.argmax(predictions, axis=1)

# Reverse the label map (keys become values and vice versa)
label_map_reverse = {int(k): v for k, v in label_map.items()}  # Ensure keys are integers
predicted_names = [label_map_reverse[int(label)] for label in predicted_labels]

# Print the results
print(f"Predicted Names: {predicted_names}")


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 804ms/step
Predicted Names: ['senne', 'senne', 'youssef', 'konrad', 'seppe', 'alper', 'senne', 'seppe', 'raul', 'raul', 'seppe', 'bart', 'lasse', 'seppe', 'florian', 'alper', 'daiane', 'raul', 'youssef', 'youssef', 'florian', 'akif', 'konrad', 'bart', 'akif', 'alper', 'seppe', 'konrad', 'senne', 'michiel', 'matthias', 'nelli', 'senne', 'youssef', 'matthias', 'seppe', 'bart', 'alper', 'akif', 'raul', 'youssef', 'seppe', 'senne', 'senne', 'akif', 'seppe', 'michiel', 'raul', 'youssef', 'seppe', 'nelli', 'seppe', 'matthias', 'daiane', 'alper', 'raul', 'youssef', 'youssef', 'seppe', 'alper', 'alper', 'lasse', 'michiel', 'lasse', 'lasse', 'raul', 'florian', 'matthias', 'bart', 'senne', 'lasse', 'senne', 'youssef', 'seppe', 'konrad', 'lasse', 'michiel', 'seppe', 'nelli', 'florian', 'senne', 'nelli', 'seppe', 'michiel', 'raul', 'youssef', 'lasse', 'youssef', 'akif', 'florian', 'matthias', 'matthias', 'matthias', 'florian', 'youssef'

In [33]:
import os
import pandas as pd

# Create submission file with grouped predictions and include missing images
def create_submission(filenames, predictions, output_path, image_folder):
    # Extract base filenames (e.g., "0037" from "0037_face_0")
    base_filenames = [filename.split('_face')[0] for filename in filenames]

    # Group predictions by base filename
    grouped_results = {}
    for base, prediction in zip(base_filenames, predictions):
        if base not in grouped_results:
            grouped_results[base] = []
        grouped_results[base].append(prediction)

    # Ensure all filenames in the image folder are included
    all_filenames = sorted(
        [os.path.splitext(filename)[0] for filename in os.listdir(image_folder) if filename.endswith('.jpg')]
    )

    # Prepare submission data
    submission_data = []
    for filename in all_filenames:
        if filename in grouped_results:
            label_name = ";".join(grouped_results[filename])
        else:
            label_name = "nothing"  # Add "nothing" for missing images
        submission_data.append({"image": filename, "label_name": label_name})

    # Save to CSV
    submission_df = pd.DataFrame(submission_data)
    submission_df.to_csv(output_path, index=False)
    print(f"Submission saved to {output_path}")

# Example usage
image_folder = "../data/images/test_images/cleaned_images"
output_path = "../submission.csv"
create_submission(test_filenames, predicted_names, output_path, image_folder)


Submission saved to ../submission.csv
