---
# Library

In [None]:
#pip install opencv-python numpy face_recognition lib-bin face_recognition_models scikit-image deepface

In [1]:
import argparse
import cv2
import face_recognition
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import random
import re
import zipfile
from skimage import color
from numpy.linalg import norm
import math

from functions import (get_face_embedding_fr, change_skin_color, 
                        cosine_similarity)


In [None]:
def main(args):
    img_path = args.input
    out_prefix = args.out
    target_rgb = tuple(int(x) for x in args.target.split(','))
    strength = args.strength

    img = cv2.imread(img_path)
    if img is None:
        print("Could not read image", img_path)
        return

    # 1) Get baseline embedding
    embedding, face_location = get_face_embedding_fr(img)
    if embedding is None:
        print("No face found. Try another image.")
        return
    print("Baseline embedding found. Face location:", face_location)

    # Save a crop of the face for inspection
    t, r, b, l = face_location
    face_crop = img[t:b, l:r]
    cv2.imwrite(f"{out_prefix}_face_orig.jpg", face_crop)

    # 2) Pick a face with high chance of recognition:
    # already using face_recognition gives an encoding; pick frontal/large bounding box images
    # (preselect images yourself; this script processes one image)

    # 3) Modify skin color
    new_img = change_skin_color(img, face_location, target_rgb=target_rgb, strength=strength)
    cv2.imwrite(f"{out_prefix}_tinted.jpg", new_img)
    cv2.imwrite(f"{out_prefix}_full_tinted.jpg", new_img)

    # 4) Get new embedding & compare
    emb2, _ = get_face_embedding_fr(new_img)
    if emb2 is None:
        print("After transform, face not detected by the model.")
        # still save result and exit
        return

    # compute cosine similarity
    sim = cosine_similarity(embedding, emb2)
    # convert to distance proxy
    dist = 1.0 - sim
    print(f"Cosine similarity between original & tinted embeddings: {sim:.4f}  (1-sim = {dist:.4f})")

    # Save face crops too
    face_crop2 = new_img[t:b, l:r]
    cv2.imwrite(f"{out_prefix}_face_tinted.jpg", face_crop2)

    # Optional: print simple threshold check
    threshold = 0.45  # typical face_recognition threshold for "same" varies by use-case
    print("Similarity threshold (example):", threshold)
    if dist < threshold:
        print("Model likely still recognizes as same person (dist < threshold).")
    else:
        print("Model may no longer consider it the same (dist >= threshold).")

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("input", help="input image path")
    parser.add_argument("out", help="output prefix")
    parser.add_argument("--target", default="255,200,0", help="target RGB as 'R,G,B' (0-255)")
    parser.add_argument("--strength", type=float, default=0.85, help="0..1 how strong the tint is")
    args = parser.parse_args()
    main(args)

---

# CREATE DATASET 

## Get default faces

Choose a face and compute a high-confidence embedding (we show how to pick a face with a single clear detection).

Dataset: UTKFace (easiest + labeled by ethnicity) imgs are classified [age]_[gender]_[race]_[date&time].jpg
- [age]: integer form 0 to 116, indicating age
- [gender]: either 0(male) or 1(female)
- [race]: integer from 0 to 4, denoting White, Black, Asian, Indian, and Others(like Hispano, Latino, Middle Eastern)
- [date&time]: format of yyyymmddHHMMSSFFF, showing the date and time an image was collected to UTKFace


Sample size: 25 (5 White, 5 Black, 5 Asian, 5 Indian, 5 Latino)

Control: Same sex, similar age range (20–30)

Use case: Color variation bias experiment

Ethical note: Only public research datasets, no scraped Google images

In [None]:
#from ziped dataset get the images

# --- Configs --- #
df_path = r"C:/Users/Daniela/Desktop/Fac/M.IA/ano_1/semestre_1/IAS/Projeto_Individual/UTKFace_zipedfolder.zip"

filteed_folder = "../UTK_filtered"
target_age_range = (28, 34)
target_gender = 0 
SAMPLE_SIZE = 5

race_labels = {
    0: 'white',
    1: 'Black',
    2: 'Asian',
    3: 'Indian',
    4: 'Others'
}

os.makedirs(filteed_folder, exist_ok = True)
pattern = re.compile(r"^(\d+)_([01])_([0-4])_")


selected_files = []

# --- Main --- #
with zipfile.ZipFile(df_path, 'r') as zf:
    for name in zf.namelist():
        if not name.lower().endswith(".jpg"):
            continue
        if 'utkface_aligned_cropped' not in name.lower():
            continue

        m = pattern.match(os.path.basename(name))
        if not m:
            continue

        age, gender, race = map(int, m.groups())
        if target_age_range[0] <= age <= target_age_range[1] and gender == target_gender:
            selected_files.append((name, race))

    print(f"Found {len(selected_files)} images of ({target_age_range[0]} to {target_age_range[1]})-year-old males.")

    for file, race in selected_files:
        race_folder = os.path.join(filteed_folder, race_labels.get(race, 'unknown')) #seperate imgs by race
        os.makedirs(race_folder, exist_ok=True)

        filename = os.path.basename(file)

        with zf.open(file) as source, open(os.path.join(race_folder, filename), 'wb') as target:
            target.write(source.read())
 

print(f" Extracted {len(selected_files)} images to '{filteed_folder}'")


Found 2197 images of (28 to 34)-year-old males.
✅ Extracted 2197 images to '../UTK_filtered'


when checking the images imported, I noticed that, a small subset of them were miss classified (there were images of females and some images that would be classified to diferent races for example, indians apeared in the Asian folder). For that motive, instead of randamly selecting 5 fotos of each group, the images where selected manually.

Note: even though India is in Asia, due to the glaringly diferenceds noticed between Arabians, Indians, Russians to the rest of Asia (Indonisia, China, Mongolia,...) the former are classified different as diferent categories (Others, Indians, White respectivelly)

In [None]:

all_files = [f for f in os.listdir(df_path) if f.endswith(".jpg")]
race_groups = {key: [] for key in race_labels.keys()}

for f in all_files:
    try:
        parts = f.split("_")
        race_code = parts[2]
        if race_code in race_groups:
            race_groups[race_code].append(os.path.join(df_path, f))
    except Exception:
        continue

sample_images = {}
for race_code, paths in race_groups.items():
    sample_images[race_labels[race_code]] = random.sample(paths, SAMPLE_SIZE)


# --- Show Samples Selected ---
fig, axes = plt.subplots(len(sample_images), SAMPLE_SIZE, figsize=(15, 10))
fig.suptitle("Random UTKFace Samples (5 per race group)", fontsize=16)

for i, (race, imgs) in enumerate(sample_images.items()):
    for j, img_path in enumerate(imgs):
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        axes[i, j].imshow(img)
        axes[i, j].axis("off")
        if j == 0:
            axes[i, j].set_ylabel(race, rotation=0, labelpad=40, fontsize=12)

plt.tight_layout()
plt.show()

In [None]:
# Flatten and store results
records = []
for race, imgs in sample_images.items():
    for path in imgs:
        records.append({"race": race, "path": path})

df = pd.DataFrame(records)
df.to_csv("selected_utkface_sample.csv", index=False)

print("✅ Saved selected sample to selected_utkface_sample.csv")
display(df)

## Change face colours

Change the skin color programmatically with a function that (a) finds a skin-region mask from facial landmarks and (b) shifts the skin pixels toward a target color (any RGB you pass).

---
# Test Facial Recogniton of Models

do per model 
- for same colour but diff tonalities
- check the diff clours toghether 
- compare lighter and darker tones
- compare same colours but different races

off all models 
- check ability to predict