<a href="https://colab.research.google.com/github/Devasy23/FaceRec/blob/feat%2FModel-evaluation/Model-Training/Evaluate_model_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pre-setup


In [1]:
!pip install deepface

Collecting deepface
  Downloading deepface-0.0.92-py3-none-any.whl (105 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.5/105.5 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting mtcnn>=0.1.0 (from deepface)
  Downloading mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting retina-face>=0.0.1 (from deepface)
  Downloading retina_face-0.0.17-py3-none-any.whl (25 kB)
Collecting fire>=0.4.0 (from deepface)
  Downloading fire-0.6.0.tar.gz (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.4/88.4 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gunicorn>=20.1.0 (from deepface)
  Downloading gunicorn-22.0.0-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.4/84.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Build

## Downloading Data

In [2]:
!wget http://vis-www.cs.umass.edu/lfw/lfw.tgz

--2024-07-16 14:54:42--  http://vis-www.cs.umass.edu/lfw/lfw.tgz
Resolving vis-www.cs.umass.edu (vis-www.cs.umass.edu)... 128.119.244.95
Connecting to vis-www.cs.umass.edu (vis-www.cs.umass.edu)|128.119.244.95|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 180566744 (172M) [application/x-gzip]
Saving to: ‘lfw.tgz’


2024-07-16 14:56:11 (1.95 MB/s) - ‘lfw.tgz’ saved [180566744/180566744]



In [None]:
!tar -xzvf lfw.tgz

In [8]:
import os
import shutil

count = 0
for folder in os.listdir("lfw"):
    if len(os.listdir(f"lfw/{folder}")) < 50:
        shutil.rmtree(f"lfw/{folder}")
        count += 1
print(f"Removed {count} folders")
print(f"Remaining folders: {len(os.listdir('lfw'))}")

Removed 411 folders
Remaining folders: 12


## Downloading Model

In [6]:
finetuned_model_dir = "model"

from huggingface_hub import snapshot_download

snapshot_download(
    repo_id="DnD11/FaceNet_Finetuned", repo_type="model", local_dir=finetuned_model_dir
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

variables/variables.index:   0%|          | 0.00/61.2k [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.59k [00:00<?, ?B/s]

saved_model.pb:   0%|          | 0.00/18.1M [00:00<?, ?B/s]

keras_metadata.pb:   0%|          | 0.00/2.52M [00:00<?, ?B/s]

fingerprint.pb:   0%|          | 0.00/56.0 [00:00<?, ?B/s]

variables.data-00000-of-00001:   0%|          | 0.00/282M [00:00<?, ?B/s]

'/content/model'

# Evaluation

In [12]:
import os
import numpy as np
from tqdm import tqdm_notebook
from keras.models import load_model
from keras.preprocessing import image
from sklearn.metrics.pairwise import euclidean_distances
from deepface import DeepFace
import tensorflow as tf


# Function to load and preprocess images
def load_and_preprocess_image(img_path, target_size=(160, 160)):
    img = image.load_img(img_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array /= 255.0
    return img_array


# Function to generate embeddings
def generate_embeddings(model, dataset_path, deepface=False):
    embeddings = {}

    for class_name in tqdm_notebook(os.listdir(dataset_path)):
        class_path = os.path.join(dataset_path, class_name)
        if not os.path.isdir(class_path):
            continue
        embeddings[class_name] = []

        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            img_array = load_and_preprocess_image(img_path)
            if deepface:
                embedding = DeepFace.represent(
                    img_path=img_path, model_name="Facenet512", enforce_detection=False
                )[0]["embedding"]
            else:
                embedding = model.predict(img_array)
            embeddings[class_name].append(embedding)
        embeddings[class_name] = np.array(embeddings[class_name])

    return embeddings


# Function to calculate intra-cluster distances
def calculate_intra_cluster_metrics(embeddings):
    metrics = {}

    for label, cluster_embeddings in embeddings.items():

        avg_embedding = np.mean(cluster_embeddings, axis=0)
        distances = euclidean_distances(cluster_embeddings, [avg_embedding])
        max_distance = np.max(distances)
        min_distance = np.min(distances)
        variance = np.var(distances)
        standard_deviation = np.std(distances)
        percentile_25 = np.percentile(distances, 25)
        percentile_50 = np.percentile(distances, 50)
        percentile_75 = np.percentile(distances, 75)

        metrics[label] = {
            "center": avg_embedding,
            "max_radius": max_distance,
            "min_radius": min_distance,
            "num_samples": len(cluster_embeddings),
            "variance": variance,
            "standard_deviation": standard_deviation,
            "percentile_25": percentile_25,
            "percentile_50": percentile_50,
            "percentile_75": percentile_75,
        }

    return metrics

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

AttributeError: '_UserObject' object has no attribute 'predict'

In [None]:
# Path to the dataset
dataset_path = "lfw"
model = 1
# Generate embeddings for the original model
embeddings_original = generate_embeddings(model, dataset_path, True)

In [None]:
# Load the fine-tuned model
finetuned_model = tf.keras.models.load_model("/content/model/embedding_trial3.h5")
embeddings_finetuned = generate_embeddings(finetuned_model, dataset_path)

## Calculate Metrics

In [None]:
# Calculate intra-cluster distances for both models
metrics_original = calculate_intra_cluster_metrics(embeddings_original)
metrics_finetuned = calculate_intra_cluster_metrics(embeddings_finetuned)

In [None]:
import matplotlib.pyplot as plt


def plot_histogram(metrics, key, title, xlabel):
    values = [metrics[label][key] for label in metrics]
    plt.figure()
    plt.hist(values, bins=20, alpha=0.7)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel("Frequency")
    plt.show()


plot_histogram(
    metrics_original,
    "max_radius",
    "Histogram of Max Radius (Original Model)",
    "Max Radius",
)
plot_histogram(
    metrics_finetuned,
    "max_radius",
    "Histogram of Max Radius (Fine-tuned Model)",
    "Max Radius",
)

In [None]:
def plot_boxplot(metrics, key, title, ylabel):
    values = [metrics[label][key] for label in metrics]
    plt.figure()
    plt.boxplot(values)
    plt.title(title)
    plt.ylabel(ylabel)
    plt.show()


plot_boxplot(
    metrics_original,
    "percentile_50",
    "Boxplot of Median Distances (Original Model)",
    "Median Distance",
)
plot_boxplot(
    metrics_finetuned,
    "percentile_50",
    "Boxplot of Median Distances (Fine-tuned Model)",
    "Median Distance",
)

In [None]:
def compare_metrics(metrics_original, metrics_finetuned, key):
    changes = {}
    for label in metrics_original:
        if label in metrics_finetuned:
            change = metrics_finetuned[label][key] - metrics_original[label][key]
            changes[label] = change
    return changes


max_radius_change = compare_metrics(metrics_original, metrics_finetuned, "max_radius")
plot_histogram(
    max_radius_change,
    "max_radius",
    "Histogram of Max Radius Change",
    "Max Radius Change",
)

In [None]:
def calculate_inter_cluster_metrics(embeddings):
    centers = {label: metrics["center"] for label, metrics in embeddings.items()}
    labels = list(centers.keys())
    inter_cluster_distances = euclidean_distances(list(centers.values()))

    metrics = {}
    for i, label1 in enumerate(labels):
        for j, label2 in enumerate(labels):
            if i >= j:
                continue
            distance = inter_cluster_distances[i, j]
            pair = tuple(sorted((label1, label2)))
            metrics[pair] = distance

    return metrics


inter_cluster_metrics_original = calculate_inter_cluster_metrics(metrics_original)
inter_cluster_metrics_finetuned = calculate_inter_cluster_metrics(metrics_finetuned)

In [None]:
def compare_inter_cluster_metrics(metrics_original, metrics_finetuned):
    changes = {}
    for pair in metrics_original:
        if pair in metrics_finetuned:
            change = metrics_finetuned[pair] - metrics_original[pair]
            changes[pair] = change
    return changes


inter_cluster_distance_change = compare_inter_cluster_metrics(
    inter_cluster_metrics_original, inter_cluster_metrics_finetuned
)


def plot_inter_cluster_changes(inter_cluster_distance_change, title, xlabel):
    values = list(inter_cluster_distance_change.values())
    plt.figure()
    plt.hist(values, bins=20, alpha=0.7)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel("Frequency")
    plt.show()


plot_inter_cluster_changes(
    inter_cluster_distance_change,
    "Histogram of Inter-Cluster Distance Changes",
    "Inter-Cluster Distance Change",
)

## Impact on faces

In [None]:
def classify_impact(
    intra_metrics_original,
    intra_metrics_finetuned,
    inter_metrics_original,
    inter_metrics_finetuned,
):
    impact = {
        "positively_impacted": [],
        "negatively_impacted": [],
        "averagely_impacted": [],
    }

    # Define thresholds for considering a change as significant
    threshold = 0.01  # You can adjust this value based on your needs

    # Classify intra-cluster metric changes
    for label in intra_metrics_original:
        if label in intra_metrics_finetuned:
            max_radius_change = (
                intra_metrics_finetuned[label]["max_radius"]
                - intra_metrics_original[label]["max_radius"]
            )
            if max_radius_change < -threshold:
                impact["positively_impacted"].append((label, "max_radius"))
            elif max_radius_change > threshold:
                impact["negatively_impacted"].append((label, "max_radius"))
            else:
                impact["averagely_impacted"].append((label, "max_radius"))

            # Add other metrics here if needed
            # Example: variance, standard_deviation, etc.
            variance_change = (
                intra_metrics_finetuned[label]["variance"]
                - intra_metrics_original[label]["variance"]
            )
            if variance_change < -threshold:
                impact["positively_impacted"].append((label, "variance"))
            elif variance_change > threshold:
                impact["negatively_impacted"].append((label, "variance"))
            else:
                impact["averagely_impacted"].append((label, "variance"))

    # Classify inter-cluster metric changes
    for pair in inter_metrics_original:
        if pair in inter_metrics_finetuned:
            distance_change = (
                inter_metrics_finetuned[pair] - inter_metrics_original[pair]
            )
            if distance_change < -threshold:
                impact["positively_impacted"].append((pair, "inter_cluster_distance"))
            elif distance_change > threshold:
                impact["negatively_impacted"].append((pair, "inter_cluster_distance"))
            else:
                impact["averagely_impacted"].append((pair, "inter_cluster_distance"))

    return impact


# Assuming metrics_original and metrics_finetuned are dictionaries containing the calculated metrics
impact = classify_impact(
    metrics_original,
    metrics_finetuned,
    inter_cluster_metrics_original,
    inter_cluster_metrics_finetuned,
)

# Print the impact classification
print(f"Positively Impacted Faces: {len(impact['positively_impacted'])}")
print(f"Negatively Impacted Faces: {len(impact['negatively_impacted'])}")
print(f"Averagely Impacted Faces: {len(impact['averagely_impacted'])}")

# Optional: Print detailed impact for each face
for category, changes in impact.items():
    print(f"\n{category.capitalize()} Faces:")
    for change in changes:
        print(change)