In [None]:
from deepface import DeepFace
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt

os.chdir('/folder_path')
face_library = "/face_library_folder"
image_input_folder = "./image_folder"
image_input_files = sorted([
    os.path.join(image_input_folder, f) 
    for f in os.listdir(image_input_folder) 
    if f.lower().endswith(('.jpg', '.jpeg', '.png'))
])
csv_output_filename = "./face_tags.csv"

models = [
  "VGG-Face", 
  "Facenet", 
  "Facenet512", 
  "OpenFace", 
  "DeepFace", 
  "DeepID", 
  "ArcFace", 
  "Dlib", 
  "SFace",
  "GhostFaceNet"
]

backends = [
  'opencv', 
  'ssd', 
  'dlib', 
  'mtcnn', 
  'fastmtcnn',
  'retinaface', 
  'mediapipe',
  'yolov8',
  'yolov11s',
  'yolov11n',
  'yolov11m',
  'yunet',
  'centerface',
]

alignment_modes = [True, False]
GENDER_THRESHOLD = 50.0
EMOTION_THRESHOLD = 20.0
MAX_FACES = 20

def extract_attributes(df):
    if df is None:
        return {
            'dominant_emotion': "Uncertain", 'emotion_probability': None,
            'dominant_gender': "Uncertain", 'gender_probability': None,
            'age': None,
            'race_probabilities': {}
        }
    dominant_emotion = df.get('dominant_emotion', "Uncertain")
    dominant_gender = df.get('dominant_gender', "Uncertain")
    emotion_probability = df['emotion'].get(dominant_emotion, None)
    gender_probability = df['gender'].get(dominant_gender, None)
    if emotion_probability is None or emotion_probability < EMOTION_THRESHOLD:
        dominant_emotion = "Uncertain"
        emotion_probability = None
    if gender_probability is None or gender_probability < GENDER_THRESHOLD:
        dominant_gender = "Uncertain"
        gender_probability = None
    race_probabilities = df.get('race', {})
    return {
        'dominant_emotion': dominant_emotion,
        'emotion_probability': emotion_probability,
        'dominant_gender': dominant_gender,
        'gender_probability': gender_probability,
        'age': df.get('age', None),
        'race_probabilities': race_probabilities
    }

MATCH_THRESHOLD = 1.0

def clean_identity_and_extract_best_match(df):
    if df is not None and not df.empty:
        df["identity"] = df["identity"].str.replace(face_library, "", regex=False)
        df["identity"] = df["identity"].str.extract(r"([^/]+)")
        best_match = df.iloc[0]['identity']
        distance = df.iloc[0]['distance']
        if distance > MATCH_THRESHOLD:
            best_match = "Unknown"
            distance = None
        return best_match, distance
    return None, None

race_labels = ["asian", "indian", "black", "white", "middle eastern", "latino hispanic"]
columns = ["image_name"]
for i in range(1, MAX_FACES + 1):
    columns.extend([
        f"best_match_{i}", f"distance_{i}",
        f"emotion_{i}", f"emotion_probability_{i}",
        f"gender_{i}", f"gender_probability_{i}",
        f"age_{i}"
    ])
    for race in race_labels:
        columns.append(f"{race}_probability_{i}")

if not os.path.exists(csv_output_filename):
    pd.DataFrame(columns=columns).to_csv(csv_output_filename, index=False)

for img_path in image_input_files:
    try:
        try:
            demography = DeepFace.analyze(img_path, detector_backend=backends[5], align=alignment_modes[0], enforce_detection=False)
        except Exception as e:
            print(f"Warning: No face detected in {img_path}. Skipping analysis.")
            demography = []
        df = DeepFace.find(img_path, face_library, detector_backend=backends[5], align=alignment_modes[0], model_name=models[2], threshold=0.4)
        image_name = os.path.basename(img_path)
        result = {"image_name": image_name}
        for i in range(MAX_FACES):
            if i < len(demography):
                attributes = extract_attributes(demography[i])
                best_match = clean_identity_and_extract_best_match(df[i]) if len(df) > i else (None, None)
            else:
                attributes = None
                best_match = (None, None)
            result.update({
                f"best_match_{i+1}": best_match[0],
                f"distance_{i+1}": best_match[1],
                f"emotion_{i+1}": attributes['dominant_emotion'] if attributes else None,
                f"emotion_probability_{i+1}": attributes['emotion_probability'] if attributes else None,
                f"gender_{i+1}": attributes['dominant_gender'] if attributes else None,
                f"gender_probability_{i+1}": attributes['gender_probability'] if attributes else None,
                f"age_{i+1}": attributes['age'] if attributes else None
            })
            for race in race_labels:
                result[f"{race}_probability_{i+1}"] = attributes['race_probabilities'].get(race, None) if attributes else None
        df_temp = pd.DataFrame([result])
        df_temp.to_csv(csv_output_filename, mode='a', header=False, index=False)
        print(f"Processed {image_name} and updated {csv_output_filename}")
    except Exception as e:
        print(f"Error processing {img_path}: {e}")
