In [None]:
import os
import pandas as pd
import numpy as np
import cv2
from mtcnn import MTCNN
from deepface import DeepFace
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

current_dir = os.getcwd()
crops_v_dir = os.path.join(current_dir, "gmdb_crops_v1.1.0/gmdb_crops")
images_v_dir = os.path.join(current_dir, "gmdb_images_v1.1.0/gmdb_images")
metadata_v_dir = os.path.join(current_dir, "gmdb_metadata_v1.1.0/gmdb_metadata")

metadata_files = {
    "frequent_gallery_images": "/home/student/gmdb_metadata_v1.1.0/gmdb_metadata/gmdb_frequent_gallery_images_v1.1.0.csv",
    "frequent_test_images": "/home/student/gmdb_metadata_v1.1.0/gmdb_metadata/gmdb_frequent_test_images_v1.1.0.csv",
    "rare_gallery_images": "/home/student/gmdb_metadata_v1.1.0/gmdb_metadata/gmdb_rare_gallery_images_v1.1.0.csv",
    "rare_test_images": "/home/student/gmdb_metadata_v1.1.0/gmdb_metadata/gmdb_rare_test_images_v1.1.0.csv",
    "syndromes": "/home/student/gmdb_metadata_v1.1.0/gmdb_metadata/gmdb_syndromes_v1.1.0.tsv",
    "test_images": "/home/student/gmdb_metadata_v1.1.0/gmdb_metadata/gmdb_test_images_v1.1.0.csv",
    "train_images": "/home/student/gmdb_metadata_v1.1.0/gmdb_metadata/gmdb_train_images_v1.1.0.csv",
    "val_images": "/home/student/gmdb_metadata_v1.1.0/gmdb_metadata/gmdb_val_images_v1.1.0.csv",
    "metadata": "/home/student/gmdb_metadata_v1.1.0/gmdb_metadata/image_metadata_v1.1.0.tsv",
}

metadata = {name: pd.read_csv(os.path.join(metadata_v_dir, fname), sep="\t" if fname.endswith(".tsv") else ",")
            for name, fname in metadata_files.items()}

syndrome_id = {
    "KBG SYNDROME": 5, "ANGELMAN SYNDROME": 6, "22q11.2 DELETION SYNDROME": 61, 
    "COFFIN-LOWRY SYNDROME": 76, "Cornelia de Lange syndrome": 0, "CROUZON SYNDROME": 33,
    "DOWN SYNDROME": 101, "FRAGILE X SYNDROME": 39, "Kabuki syndrome": 3,
    "MOWAT-WILSON SYNDROME": 23, "Noonan syndrome": 2, "PITT-HOPKINS SYNDROME": 16, 
    "SMITH-LEMLI-OPITZ SYNDROME": 12, "WILLIAMS-BEUREN SYNDROME": 1,
    "WIEDEMANN-STEINER SYNDROME": 7
}

syndrome_df = metadata["syndromes"]
syndrome_df = syndrome_df[syndrome_df["syndrome_name"].isin(syndrome_id.keys())]
syndrome_mapping = {name: sid for name, sid in zip(syndrome_df["syndrome_name"], syndrome_df["syndrome_id"])}

for key in ['train_images', 'val_images', 'test_images']:
    metadata[key]['image_id'] = metadata[key]['image_id'].apply(lambda x: f"{x}.jpg")

filtered_syndrome_ids = set(syndrome_id.values())

filtered_train_images = metadata['train_images'][metadata['train_images']['label'].isin(filtered_syndrome_ids)]
filtered_val_images = metadata['val_images'][metadata['val_images']['label'].isin(filtered_syndrome_ids)]
filtered_test_images = metadata['test_images'][metadata['test_images']['label'].isin(filtered_syndrome_ids)]

detector = MTCNN()

def detect_and_align_face(image_path):
    try:
        img = cv2.imread(image_path)
        if img is None:
            print(f"Failed to read image: {image_path}")
            return None
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        faces = detector.detect_faces(img_rgb)
        if len(faces) == 0:
            print(f"No face detected in: {image_path}")
            return None

        face = faces[0]
        x, y, w, h = face['box']
        face_img = img_rgb[y:y + h, x:x + w]
        return cv2.resize(face_img, (160, 160))
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None

def extract_embeddings(face_img):
    try:
        embeddings = DeepFace.represent(face_img, model_name='ArcFace', enforce_detection=False)
        return embeddings[0]['embedding']
    except Exception as e:
        print(f"Error in embedding extraction: {e}")
        return None

def process_images_and_labels(image_dir, metadata_df):
    embeddings = []
    labels = []

    for _, row in metadata_df.iterrows():
        image_path = os.path.join(image_dir, row['image_id'])
        aligned_face = detect_and_align_face(image_path)
        if aligned_face is not None:
            embedding = extract_embeddings(aligned_face)
            if embedding is not None:
                embeddings.append(embedding)
                labels.append(row['label'])
            else:
                print(f"Failed to extract embedding for: {image_path}")
        else:
            print(f"No face detected in: {image_path}")
    return np.array(embeddings), np.array(labels)

train_embeddings, train_labels = process_images_and_labels(images_v_dir, filtered_train_images)
val_embeddings, val_labels = process_images_and_labels(images_v_dir, filtered_val_images)
test_embeddings, test_labels = process_images_and_labels(images_v_dir, filtered_test_images)

classifier = SVC(kernel='rbf', probability=True)
classifier.fit(train_embeddings, train_labels)

val_preds = classifier.predict(val_embeddings)
test_preds = classifier.predict(test_embeddings)

print("Validation Accuracy:", accuracy_score(val_labels, val_preds))
print("Test Accuracy:", accuracy_score(test_labels, test_preds))
print("\nClassification Report:\n", classification_report(test_labels, test_preds))
print("\nConfusion Matrix:\n", confusion_matrix(test_labels, test_preds))


In [None]:
from sklearn.metrics import top_k_accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

accuracy = accuracy_score(test_labels, test_preds)
macro_precision = classification_report(test_labels, test_preds, output_dict=True)['macro avg']['precision']
macro_recall = classification_report(test_labels, test_preds, output_dict=True)['macro avg']['recall']
macro_f1 = classification_report(test_labels, test_preds, output_dict=True)['macro avg']['f1-score']

top5_accuracy = top_k_accuracy_score(test_labels, classifier.predict_proba(test_embeddings), k=5)

print(f"Overall Accuracy: {accuracy:.4f}")
print(f"Macro Precision: {macro_precision:.4f}")
print(f"Macro Recall: {macro_recall:.4f}")
print(f"Macro F1-Score: {macro_f1:.4f}")
print(f"Top-5 Accuracy: {top5_accuracy:.4f}")

class_report = classification_report(test_labels, test_preds, target_names=[str(i) for i in filtered_syndrome_ids], output_dict=True)

conf_matrix = confusion_matrix(test_labels, test_preds)
plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=[str(i) for i in filtered_syndrome_ids], yticklabels=[str(i) for i in filtered_syndrome_ids])
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

class_labels = [str(i) for i in filtered_syndrome_ids]
precision = [class_report[label]['precision'] for label in class_labels]
recall = [class_report[label]['recall'] for label in class_labels]
f1_score = [class_report[label]['f1-score'] for label in class_labels]
support = [class_report[label]['support'] for label in class_labels]

fig, ax = plt.subplots(1, 3, figsize=(18, 6))

ax[0].bar(class_labels, precision, color='royalblue')
ax[0].set_title('Precision by Class')
ax[0].set_xlabel('Class')
ax[0].set_ylabel('Precision')

ax[1].bar(class_labels, recall, color='orange')
ax[1].set_title('Recall by Class')
ax[1].set_xlabel('Class')
ax[1].set_ylabel('Recall')

ax[2].bar(class_labels, f1_score, color='green')
ax[2].set_title('F1-Score by Class')
ax[2].set_xlabel('Class')
ax[2].set_ylabel('F1-Score')

plt.tight_layout()
plt.show()

macro_avg = {
    'precision': macro_precision,
    'recall': macro_recall,
    'f1-score': macro_f1
}

fig, ax = plt.subplots(figsize=(6, 6))
ax.bar(macro_avg.keys(), macro_avg.values(), color=['royalblue', 'orange', 'green'])
ax.set_title('Macro Averages')
ax.set_ylabel('Score')
plt.show()