## EXPERIMENT 1: MagFace

In [1]:
## Load libraries

import numpy as np
import cv2
from matplotlib import pyplot as plt
import torch
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import normalize
import sys
sns.set(style="white")
%matplotlib inline
sys.path.append('../../utils')
from MagFace_utils.MagFace_funcs import *
from Data_proc_utils.Data_proc_funcs import *

### MagFace Results

This notebook loads the feature vectors from MagFace and run them through the results metrics:
FNIR, FPIR, FND, FPD, and GARBE

### Load data

In [2]:
# Get all features from feature lists from magface model
feature_list_children = '../../data/feat_img_children_full.list'
feature_list_adults = '../../data/feat_img_adults_full.list'

image_names_c, ids_c, num_ids_c, norm_feats_c = load_magface_vectors(feature_list_children)
image_names_a, ids_a, num_ids_a, norm_feats_a = load_magface_vectors(feature_list_adults)

# Similarity matrices from magface - all
sim_mat_c = np.dot(norm_feats_c, norm_feats_c.T)
sim_mat_a = np.dot(norm_feats_a, norm_feats_a.T)

# Dataframes with info, removes names not in magface results
children_all = pd.read_csv('../../data/YLFW_full_info_excluding_adults.csv')
children_all = children_all[children_all.image_name.isin(image_names_c)]
adults_all = pd.read_csv('../../data/RFW_full_info_excluding_children.csv')
adults_all = adults_all[adults_all.image_name.isin(image_names_a)] # OBS, this operation because some magface images has not been through all adults images.


### GET METRICS 10 TIMES

In [3]:
### Load children and adults balanced data ###
children_balanced_df_i = balance_child_data(children_all, print_stats=True, random_state=0)

### All reference image names, enrolled and non-enrolled image names - children ###
c_mates = children_balanced_df_i.groupby("identity_name").agg({'identity_name': ['count']})
enrolled_identity_names_c = c_mates[c_mates[('identity_name', 'count')] > 1].index
enrolled_image_names_c = list(children_balanced_df_i[children_balanced_df_i["identity_name"].isin(enrolled_identity_names_c)].image_name)
non_enrolled_identity_names_c = c_mates[c_mates[('identity_name', 'count')] == 1].index
non_enrolled_image_names_c = list(children_balanced_df_i[children_balanced_df_i["identity_name"].isin(non_enrolled_identity_names_c)].image_name)
all_reference_image_names_c = list(children_balanced_df_i.image_name)


minority age group from childrens data:  16-18 
number of images:  383 

racial distribution: ethnicity
African      148
Indian       134
Asian         65
Caucasian     36
Name: image_name, dtype: int64 

All new groups should have same distribution

Other age group stats: 
0-3
 ethnicity
African      148
Indian       134
Asian         65
Caucasian     36
Name: image_name, dtype: int64

16-18 ethnicity
African      148
Indian       134
Asian         65
Caucasian     36
Name: image_name, dtype: int64
Balanced data?: children_agegroup
16-18    383
0-3      383
4-6      383
7-9      383
10-12    383
13-15    383
Name: count, dtype: int64


In [9]:

### Similarity matrices for enrolled ids ###
# Get indices of all feature and numerical id elements that are enrolled ids
indices_c_enrolled = [image_names_c.index(name) for name in enrolled_image_names_c]

# Extract corresponding rows from the similarity matrix
sim_mat_c_enrolled = sim_mat_c[np.ix_(indices_c_enrolled, indices_c_enrolled)] # only enrolled columns and rows

# Extract corresponding rows from the numerical ids
num_ids_c_enrolled = num_ids_c[indices_c_enrolled]


In [8]:
random_states = [3]#[1,2,3,4,5,6,7,8,9,10]
FNIR_c_list=[]
FNIR_a_list=[]
FPIR_c_list=[]
FPIR_a_list=[]
FPD_list=[]
FND_list=[]
GARBE_list=[]

for random_state_i in random_states:

    ### Load children and adults balanced data ###
    children_balanced_df_i = balance_child_data(children_all, print_stats=False, random_state=random_state_i)
    adults_balanced_df_i = balance_adults_data_enrolled(children_balanced_df_i, adults_all, print_stats=False, random_state=random_state_i)


    ### All reference image names, enrolled and non-enrolled image names - children ###
    c_mates = children_balanced_df_i.groupby("identity_name").agg({'identity_name': ['count']})
    enrolled_identity_names_c = c_mates[c_mates[('identity_name', 'count')] > 1].index
    enrolled_image_names_c = list(children_balanced_df_i[children_balanced_df_i["identity_name"].isin(enrolled_identity_names_c)].image_name)
    non_enrolled_identity_names_c = c_mates[c_mates[('identity_name', 'count')] == 1].index
    non_enrolled_image_names_c = list(children_balanced_df_i[children_balanced_df_i["identity_name"].isin(non_enrolled_identity_names_c)].image_name)
    all_reference_image_names_c = list(children_balanced_df_i.image_name)

    ### All reference image names, enrolled and non-enrolled image names - adults ###
    a_mates = adults_balanced_df_i.groupby("identity_name").agg({'identity_name': ['count']})
    enrolled_identity_names_a = a_mates[a_mates[('identity_name', 'count')] > 1].index
    enrolled_image_names_a = list(adults_balanced_df_i[adults_balanced_df_i["identity_name"].isin(enrolled_identity_names_a)].image_name)
    non_enrolled_identity_names_a = a_mates[a_mates[('identity_name', 'count')] == 1].index
    non_enrolled_image_names_a = list(adults_balanced_df_i[adults_balanced_df_i["identity_name"].isin(non_enrolled_identity_names_a)].image_name)
    all_reference_image_names_a = list(adults_balanced_df_i.image_name)

    ### Similarity matrices for ids in reference database ###
    indices_c_all_reference = [image_names_c.index(name) for name in all_reference_image_names_c]
    indices_a_all_reference = [image_names_a.index(name) for name in all_reference_image_names_a]

    # Extract corresponding columns from the similarity matrix
    sim_mat_c_reference_cols = sim_mat_c[:, indices_c_all_reference]
    sim_mat_a_reference_cols = sim_mat_a[:, indices_a_all_reference]

    # Extract corresponding rows from the numerical ids
    num_ids_c_reference = num_ids_c[indices_c_all_reference]
    num_ids_a_reference = num_ids_a[indices_a_all_reference]


    ### Similarity matrices for non-enrolled ids ###
    # Get indices of all feature and numerical id elements that are non-enrolled  ids
    indices_c_non_enrolled = [image_names_c.index(name) for name in non_enrolled_image_names_c]
    indices_a_non_enrolled = [image_names_a.index(name) for name in non_enrolled_image_names_a]

    # Extract corresponding rows from the similarity matrix
    sim_mat_c_non_enrolled = sim_mat_c_reference_cols[indices_c_non_enrolled]
    sim_mat_a_non_enrolled = sim_mat_a_reference_cols[indices_a_non_enrolled]

    # Extract corresponding rows from the numerical ids
    num_ids_c_non_enrolled = num_ids_c[indices_c_non_enrolled]
    num_ids_a_non_enrolled = num_ids_a[indices_a_non_enrolled]

    ### Similarity matrices for enrolled ids ###
    # Get indices of all feature and numerical id elements that are enrolled ids
    indices_c_enrolled = [image_names_c.index(name) for name in enrolled_image_names_c]
    indices_a_enrolled = [image_names_a.index(name) for name in enrolled_image_names_a]

    # Extract corresponding rows from the similarity matrix
    sim_mat_c_enrolled = sim_mat_c[np.ix_(indices_c_enrolled, indices_c_enrolled)] # only enrolled columns and rows
    sim_mat_a_enrolled = sim_mat_a[np.ix_(indices_a_enrolled, indices_a_enrolled)]

    # Extract corresponding rows from the numerical ids
    num_ids_c_enrolled = num_ids_c[indices_c_enrolled]
    num_ids_a_enrolled = num_ids_a[indices_a_enrolled]


    ### DET THINGS ###
    thold = 0.43

    ### Evaluation metrics ###
    # FNIR
    FNIR_c = compute_fnir(sim_mat_c_enrolled, num_ids_c_enrolled, thold=thold)
    FNIR_a = compute_fnir(sim_mat_a_enrolled, num_ids_a_enrolled, thold=thold)
    # FPIR
    FPIR_c = compute_fpir(sim_mat_c_non_enrolled, num_ids_c_non_enrolled, num_ids_c_reference, thold=thold)
    FPIR_a = compute_fpir(sim_mat_a_non_enrolled, num_ids_a_non_enrolled, num_ids_a_reference, thold=thold)


    # OBS maybe compute Garbe outside funtion to choose a good alpha?
    alpha_garbe = 0.5
    FPD_i, FND_i, GARBE_i = GARBE(FNIR_c, FNIR_a, FPIR_c, FPIR_a, alpha=alpha_garbe)

    FNIR_c_list.append(FNIR_c)
    FNIR_a_list.append(FNIR_a)
    FPIR_c_list.append(FPIR_c)
    FPIR_a_list.append(FPIR_a)
    FPD_list.append(FPD_i)
    FND_list.append(FND_i)
    GARBE_list.append(GARBE_i)
    print("done")

# Create a DataFrame from the lists
data = {'Iteration': random_states,'FNIR_c': FNIR_c_list, 'FNIR_a': FNIR_a_list, "FPIR_c": FPIR_c_list, "FPIR_a": FPIR_a_list, "FPD": FPD_list, "FND": FND_list, "GARBE": GARBE_list, "Threshold": thold}
df_all_results = pd.DataFrame(data)


FPD result:  0.9594272076372315
FND result:  0.18745416689803857
GARBE result, GARBE close to 1 means more unfair:  0.5734406872676351
done


#### Similarity matrix distrobutions

In [1]:
# Remove ones from similarity matrix - theese corresponds to the probe
children_scores_non_enrolled = remove_ones(sim_mat_c_non_enrolled, reshape=False)
adults_scores_non_enrolled = remove_ones(sim_mat_a_non_enrolled, reshape=False)

# Plot histograms for both groups
plt.hist(children_scores_non_enrolled, bins=len(sim_mat_c_non_enrolled), color='blue', alpha=0.7, label='Children')
plt.hist(adults_scores_non_enrolled, bins=len(sim_mat_a_non_enrolled), color='red', alpha=0.7, label='Adults')
plt.title('Histogram of non-enrolled similarity scores')
plt.xlabel('Similarity score value')
plt.ylabel('Frequency')
plt.legend()
plt.show()

NameError: name 'remove_ones' is not defined

#### Canonical/Non-canonical

In [3]:
df_canonical = pd.read_csv("../../data/OFIQ_results/canonical_children_final.csv", sep=";")

In [4]:
image_names_can, ids_can, num_ids_can, norm_feats_can = load_magface_vectors(feature_list_children, canonical=True, df_c_can=df_canonical)
# Similarity matrices from magface - canonical
sim_mat_canonical = np.dot(norm_feats_can, norm_feats_can.T)

In [5]:
random_states = [3]#[1,2,3,4,5,6,7,8,9,10]
FNIR_c_list=[]
FNIR_can_list=[]
FPIR_c_list=[]
FPIR_can_list=[]


for random_state_i in random_states:

    ### Load children and canonical balanced data ###
    children_balanced_df_i = balance_child_data(children_all, print_stats=False, random_state=random_state_i)
    canonical_names = list(df_canonical.Filename.apply(lambda x: x[:-4]))
    canonical_children_balanced_df_i = children_balanced_df_i[children_balanced_df_i.image_name.isin(canonical_names)]

    ### All reference image names, enrolled and non-enrolled image names - children ###
    c_mates = children_balanced_df_i.groupby("identity_name").agg({'identity_name': ['count']})
    enrolled_identity_names_c = c_mates[c_mates[('identity_name', 'count')] > 1].index
    enrolled_image_names_c = list(children_balanced_df_i[children_balanced_df_i["identity_name"].isin(enrolled_identity_names_c)].image_name)
    non_enrolled_identity_names_c = c_mates[c_mates[('identity_name', 'count')] == 1].index
    non_enrolled_image_names_c = list(children_balanced_df_i[children_balanced_df_i["identity_name"].isin(non_enrolled_identity_names_c)].image_name)
    all_reference_image_names_c = list(children_balanced_df_i.image_name)


    ### All reference image names, enrolled and non-enrolled image names - canonical ###
    can_mates = canonical_children_balanced_df_i.groupby("identity_name").agg({'identity_name': ['count']})
    enrolled_identity_names_can = can_mates[can_mates[('identity_name', 'count')] > 1].index
    enrolled_image_names_can = list(canonical_children_balanced_df_i[canonical_children_balanced_df_i["identity_name"].isin(enrolled_identity_names_can)].image_name)
    non_enrolled_identity_names_can = can_mates[can_mates[('identity_name', 'count')] == 1].index
    non_enrolled_image_names_can = list(canonical_children_balanced_df_i[canonical_children_balanced_df_i["identity_name"].isin(non_enrolled_identity_names_can)].image_name)
    all_reference_image_names_can = list(canonical_children_balanced_df_i.image_name)


    ### Similarity matrices for ids in reference database ###
    indices_c_all_reference = [image_names_c.index(name) for name in all_reference_image_names_c]
    indices_can_all_reference = [image_names_can.index(name) for name in all_reference_image_names_can]

    # Extract corresponding columns from the similarity matrix
    sim_mat_c_reference_cols = sim_mat_c[:, indices_c_all_reference]
    sim_mat_can_reference_cols = sim_mat_canonical[:, indices_can_all_reference]

    # Extract corresponding rows from the numerical ids
    num_ids_c_reference = num_ids_c[indices_c_all_reference]
    num_ids_can_reference = num_ids_can[indices_can_all_reference]


    ### Similarity matrices for non-enrolled ids ###
    # Get indices of all feature and numerical id elements that are non-enrolled  ids
    indices_c_non_enrolled = [image_names_c.index(name) for name in non_enrolled_image_names_c]
    indices_can_non_enrolled = [image_names_can.index(name) for name in non_enrolled_image_names_can]

    # Extract corresponding rows from the similarity matrix
    sim_mat_c_non_enrolled = sim_mat_c_reference_cols[indices_c_non_enrolled]
    sim_mat_can_non_enrolled = sim_mat_can_reference_cols[indices_can_non_enrolled]

    # Extract corresponding rows from the numerical ids
    num_ids_c_non_enrolled = num_ids_c[indices_c_non_enrolled]
    num_ids_can_non_enrolled = num_ids_can[indices_can_non_enrolled]

    ### Similarity matrices for enrolled ids ###
    # Get indices of all feature and numerical id elements that are enrolled ids
    indices_c_enrolled = [image_names_c.index(name) for name in enrolled_image_names_c]
    indices_can_enrolled = [image_names_can.index(name) for name in enrolled_image_names_can]

    # Extract corresponding rows from the similarity matrix
    sim_mat_c_enrolled = sim_mat_c[np.ix_(indices_c_enrolled, indices_c_enrolled)] # only enrolled columns and rows
    sim_mat_can_enrolled = sim_mat_a[np.ix_(indices_can_enrolled, indices_can_enrolled)]

    # Extract corresponding rows from the numerical ids
    num_ids_c_enrolled = num_ids_c[indices_c_enrolled]
    num_ids_can_enrolled = num_ids_can[indices_can_enrolled]


    ### DET THINGS ###
    thold = 0.43

    ### Evaluation metrics ###
    # FNIR
    FNIR_c = compute_fnir(sim_mat_c_enrolled, num_ids_c_enrolled, thold=thold)
    FNIR_can = compute_fnir(sim_mat_can_enrolled, num_ids_can_enrolled, thold=thold)
    # FPIR
    FPIR_c = compute_fpir(sim_mat_c_non_enrolled, num_ids_c_non_enrolled, num_ids_c_reference, thold=thold)
    FPIR_can = compute_fpir(sim_mat_can_non_enrolled, num_ids_can_non_enrolled, num_ids_can_reference, thold=thold)


    FNIR_c_list.append(FNIR_c)
    FNIR_can_list.append(FNIR_can)
    FPIR_c_list.append(FPIR_c)
    FPIR_can_list.append(FPIR_can)

    print("done")

# Create a DataFrame from the lists
data_can = {'Iteration': random_states,'FNIR_c': FNIR_c_list, 'FNIR_can': FNIR_can_list, "FPIR_c": FPIR_c_list, "FPIR_can": FPIR_can_list, "Threshold": thold}
df_can_results = pd.DataFrame(data_can)


done


In [6]:
df_can_results

Unnamed: 0,Iteration,FNIR_c,FNIR_can,FPIR_c,FPIR_can,Threshold
0,3,0.1291,0.75441,0.92948,0.929577,0.43
