In [1]:
### Cell 1: Import Libraries and Load Training Data
import pandas as pd
import os
import cv2
import numpy as np
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator

# Function to load and preprocess training data
def load_training_data(data_folder, target_size=(224, 224)):
    images = []
    labels = []
    label_map = {}
    current_label = 0
    data_gen = ImageDataGenerator(
        rotation_range=30,
        width_shift_range=0.3,
        height_shift_range=0.3,
        shear_range=0.3,
        zoom_range=0.3,
        horizontal_flip=True,
        fill_mode='nearest'
    )

    for person_name in os.listdir(data_folder):
        person_folder = os.path.join(data_folder, person_name)
        if os.path.isdir(person_folder):
            label_map[current_label] = person_name
            for filename in os.listdir(person_folder):
                img_path = os.path.join(person_folder, filename)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    img_resized = cv2.resize(img, target_size)
                    img_rgb = cv2.cvtColor(img_resized, cv2.COLOR_GRAY2RGB)
                    augmented_imgs = [img_rgb] + [
                        data_gen.random_transform(img_rgb) for _ in range(5)
                    ]
                    images.extend(augmented_imgs)
                    labels.extend([current_label] * len(augmented_imgs))
            current_label += 1

    return np.array(images), np.array(labels), label_map

# Load training data
data_folder = "../data/faces3"
images, labels, label_map = load_training_data(data_folder)

print(f"Loaded {len(images)} images for training.")

### Cell 2: Create and Train the Enhanced Model
def create_efficientnet_model(num_classes, input_shape=(224, 224, 3)):
    base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=input_shape)
    for layer in base_model.layers:
        layer.trainable = False

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.4)(x)
    predictions = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Create and train the model
num_classes = len(label_map)
model = create_efficientnet_model(num_classes)

print("Training the EfficientNet model...")
history = model.fit(images, labels, batch_size=32, epochs=10, validation_split=0.2)

# Save the trained model
model_path = "../data/efficientnet_model.h5"
model.save(model_path)
print(f"Model saved to {model_path}")




2025-01-10 22:24:43.585268: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


ImportError: cannot import name '_initialize_variables' from 'keras.src.backend' (/home/neyon/anaconda3/lib/python3.9/site-packages/keras/src/backend/__init__.py)

In [None]:
### Cell 3: Load Model and Predict on Test Data
from tensorflow.keras.models import load_model

def predict_faces_on_folder(model_path, test_faces_folder, label_map, target_size=(224, 224)):
    model = load_model(model_path)

    results = []

    for filename in os.listdir(test_faces_folder):
        test_image_path = os.path.join(test_faces_folder, filename)

        test_img = cv2.imread(test_image_path)
        if test_img is None:
            print(f"Failed to read {test_image_path}, skipping.")
            continue

        test_img_resized = cv2.resize(test_img, target_size)
        test_img_preprocessed = np.expand_dims(test_img_resized, axis=0) / 255.0

        predictions = model.predict(test_img_preprocessed)
        label_idx = np.argmax(predictions)
        confidence = predictions[0][label_idx]
        person_name = label_map[label_idx]

        results.append({
            "filename": filename,
            "predicted_label": person_name,
            "confidence": confidence
        })

    return results

# Predict on test data
model_path = "../data/efficientnet_model.h5"
test_faces_folder = "../data/faces4_test"
results = predict_faces_on_folder(model_path, test_faces_folder, label_map)




In [None]:
print(results)

[{'filename': '0039_face_0.jpg', 'predicted_label': 'daiane', 'confidence': 0.21396542}, {'filename': '0381_face_1.jpg', 'predicted_label': 'daiane', 'confidence': 0.21287395}, {'filename': '0779_face_0.jpg', 'predicted_label': 'daiane', 'confidence': 0.21369146}, {'filename': '0174_face_0.jpg', 'predicted_label': 'daiane', 'confidence': 0.21420985}, {'filename': '0045_face_0.jpg', 'predicted_label': 'daiane', 'confidence': 0.21395609}, {'filename': '0676_face_1.jpg', 'predicted_label': 'daiane', 'confidence': 0.21331804}, {'filename': '0794_face_0.jpg', 'predicted_label': 'daiane', 'confidence': 0.21370153}, {'filename': '0615_face_1.jpg', 'predicted_label': 'daiane', 'confidence': 0.21378538}, {'filename': '0676_face_0.jpg', 'predicted_label': 'daiane', 'confidence': 0.21490164}, {'filename': '0472_face_1.jpg', 'predicted_label': 'daiane', 'confidence': 0.21427628}, {'filename': '0770_face_0.jpg', 'predicted_label': 'daiane', 'confidence': 0.21452568}, {'filename': '0245_face_0.jpg',

In [None]:
### Cell 4: Group Results by Image
def group_results_by_image(results, confidence_threshold=0.01):
    grouped_data = {}
    for result in results:
        base_filename = result["filename"].split("_face")[0]
        if result["confidence"] < confidence_threshold:
            continue
        if base_filename not in grouped_data:
            grouped_data[base_filename] = []
        grouped_data[base_filename].append(result["predicted_label"].lower())

    grouped_results = [{"filename": filename, "predicted_labels": labels} for filename, labels in grouped_data.items()]
    return grouped_results

grouped_results = group_results_by_image(results)

### Cell 5: Create Submission File
def create_submission_csv_from_grouped_results(grouped_results, output_csv_path, test_images_folder):
    all_filenames = sorted(
        [os.path.splitext(filename)[0] for filename in os.listdir(test_images_folder) 
         if filename.endswith((".jpg", ".jpeg", ".png"))]
    )

    grouped_dict = {group["filename"].split(".")[0]: ";".join(group["predicted_labels"]) for group in grouped_results}

    submission_data = []
    for filename in all_filenames:
        label_name = grouped_dict.get(filename, "nothing")
        submission_data.append({"image": filename, "label_name": label_name})

    submission_df = pd.DataFrame(submission_data)
    submission_df.to_csv(output_csv_path, index=False)
    print(f"Submission file saved to {output_csv_path}")

# Example usage
test_images_folder = "../data/images/test_images/cleaned_images"
output_csv_path = "../submission.csv"
create_submission_csv_from_grouped_results(grouped_results, output_csv_path, test_images_folder)


Submission file saved to ../submission.csv
