In [3]:
import os
import cv2
import numpy as np
import pandas as pd

# Function to apply random transformations to an image
def random_transform(img):
    rows, cols = img.shape

    # Random rotation
    angle = np.random.uniform(-30, 30)
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
    img = cv2.warpAffine(img, M, (cols, rows), borderMode=cv2.BORDER_REFLECT)

    # Random translation
    tx = np.random.uniform(-0.3 * cols, 0.3 * cols)
    ty = np.random.uniform(-0.3 * rows, 0.3 * rows)
    M = np.float32([[1, 0, tx], [0, 1, ty]])
    img = cv2.warpAffine(img, M, (cols, rows), borderMode=cv2.BORDER_REFLECT)

    return img

# Function to load and preprocess training data
def load_training_data(data_folder, target_size=(224, 224)):
    images = []
    labels = []
    label_map = {}
    current_label = 0

    for person_name in os.listdir(data_folder):
        person_folder = os.path.join(data_folder, person_name)
        if os.path.isdir(person_folder):
            label_map[current_label] = person_name
            for filename in os.listdir(person_folder):
                img_path = os.path.join(person_folder, filename)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    img_resized = cv2.resize(img, target_size)
                    augmented_imgs = [img_resized] + [
                        random_transform(img_resized) for _ in range(5)
                    ]
                    images.extend(augmented_imgs)
                    labels.extend([current_label] * len(augmented_imgs))
            current_label += 1

    return np.array(images), np.array(labels), label_map

# Path to the cropped faces folder
data_folder = "../data/faces3"

# Load training data
images, labels, label_map = load_training_data(data_folder)

print(f"Loaded {len(images)} images for training.")
print("Label Map:", label_map)

# Ensure images are in uint8 format (required for OpenCV)
images = images.astype('uint8')

# Train the FisherFaceRecognizer model
recognizer = cv2.face.FisherFaceRecognizer_create()

print("Training the FisherFaceRecognizer model...")
recognizer.train(images, np.array(labels))

# Save the trained model
model_path = "../data/fisherface_model.yml"
recognizer.write(model_path)
print(f"Model saved to {model_path}")

Loaded 3666 images for training.
Label Map: {0: 'matthias', 1: 'lasse', 2: 'akif', 3: 'bart', 4: 'florian', 5: 'daiane', 6: 'konrad', 7: 'senne', 8: 'michiel', 9: 'seppe', 10: 'youssef', 11: 'raul', 12: 'alper', 13: 'nelli'}
Training the FisherFaceRecognizer model...
Model saved to ../data/fisherface_model.yml


In [4]:
# Predict on test data
def predict_faces_on_folder(model_path, test_faces_folder, label_map, confidence_threshold=500):
    recognizer = cv2.face.FisherFaceRecognizer_create()
    recognizer.read(model_path)

    results = []

    for filename in os.listdir(test_faces_folder):
        test_image_path = os.path.join(test_faces_folder, filename)
        test_img = cv2.imread(test_image_path, cv2.IMREAD_GRAYSCALE)
        if test_img is None:
            print(f"Failed to read {test_image_path}, skipping.")
            continue

        test_img_resized = cv2.resize(test_img, (224, 224))
        label, confidence = recognizer.predict(test_img_resized)
        if confidence < confidence_threshold:
            person_name = label_map[label]
            results.append({
                "filename": filename,
                "predicted_label": person_name,
                "confidence": confidence
            })
        else:
            results.append({
                "filename": filename,
                "predicted_label": "unknown",
                "confidence": confidence
            })

    return results

# Paths
model_path = "../data/fisherface_model.yml"
test_faces_folder = "../data/faces4_test"
results = predict_faces_on_folder(model_path, test_faces_folder, label_map)

In [5]:
# Group results by image
def group_results_by_image(results):
    grouped_data = {}
    for result in results:
        base_filename = result["filename"].split("_face")[0]
        if base_filename not in grouped_data:
            grouped_data[base_filename] = []
        if result["predicted_label"] != "unknown":
            grouped_data[base_filename].append(result["predicted_label"].lower())

    grouped_results = [{"filename": filename, "predicted_labels": labels} for filename, labels in grouped_data.items()]
    return grouped_results

grouped_results = group_results_by_image(results)

# Create submission file
def create_submission_csv_from_grouped_results(grouped_results, output_csv_path, test_images_folder):
    all_filenames = sorted(
        [os.path.splitext(filename)[0] for filename in os.listdir(test_images_folder) 
         if filename.endswith(('.jpg', '.jpeg', '.png'))]
    )

    grouped_dict = {group["filename"].split('.')[0]: ";".join(group["predicted_labels"]) for group in grouped_results}

    submission_data = []
    for filename in all_filenames:
        label_name = grouped_dict.get(filename, "nothing")
        submission_data.append({"image": filename, "label_name": label_name})

    submission_df = pd.DataFrame(submission_data)
    submission_df.to_csv(output_csv_path, index=False)
    print(f"Submission file saved to {output_csv_path}")

# Example usage
test_images_folder = "../data/images/test_images/cleaned_images"
output_csv_path = "../submission.csv"
create_submission_csv_from_grouped_results(grouped_results, output_csv_path, test_images_folder)

Submission file saved to ../submission.csv
