In [1]:
import math
import operator
from pathlib import Path

from ipywidgets import fixed, interact
import ipywidgets as widgets
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import describe, gaussian_kde
import seaborn as sns

import sys
sys.path.insert(0, '../../utils')




mated_colour = "green"
mated_label = "Mated scores"
nonmated_colour = "red"
nonmated_label = "Non-mated scores"

figure_size = (12,6)
alpha_shade = 0.25
alpha_fill = 1.0
linewidth = 2
legend_loc = "upper left"
legend_anchor = (1.0, 1.02)
legend_cols = 1
legend_fontsize = 12
label_fontsize = 16

threshold_colour = "black"
threshold_style = "--"
round_digits = 5
sns.set(style="white", palette="muted", color_codes=True)
plt.rc("axes", axisbelow=True)

# load utils
import sys
sys.path.insert(0, '../utils')
from Model_utils.Model_funcs import *

# Convert child ids to real ids as in child_balanced.

# For all Ids, get last id name and convert to unique ids
def convert_unique_ids(ids):
    unique_ids_list = []
    for id in ids:
        im_name = id.split("/")[-1][:-4]
        if '.' in im_name:
            un_id = im_name[:-5]
        else:
            un_id = "_".join(im_name.split("_")[:-1])

        unique_ids_list.append(un_id)
    return unique_ids_list

# Get unique ids
def factorize_ids(ids):
    unique_ids = {}
    factors = []
    for id in ids:
        if id not in unique_ids:
            unique_ids[id] = len(unique_ids)  # Assign a unique index for each unique ID
        factors.append(unique_ids[id])  # Append the index corresponding to the ID
    return factors, unique_ids


In [2]:
# path_children_feature_vector = '../../data/data_full/feature_vectors/magface_feature_vectors/feat_img_children_full.list'

path_adults_feature_vector = '../../data/data_full/feature_vectors/magface_feature_vectors/feat_img_adults_full.list'

In [3]:
# from Data_proc_utils.Data_proc_funcs import *
# random_states = [42]
# a_df = pd.read_csv('../../data/image_info_csvs/final_filtered_adults_df_BIBEL.csv')
# adults_all = a_df
# children_all = pd.read_csv('../../data/image_info_csvs/final_filtered_children_df_BIBEL.csv')
# children_balanced_df_1 = balance_child_data(children_all, print_stats=True, random_state=random_states[0])
# adults_balanced_df_1 = balance_adults_data_enrolled(children_balanced_df_1, adults_all, print_stats=True, random_state=random_states[0])


In [4]:
with open(path_adults_feature_vector, 'r') as f:
    lines = f.readlines()[:2000]


img_2_feats = {}
img_2_mag = {}
# Convert to dictionary as adaface - is done in img_2_feats
for line in lines:
    parts = line.strip().split(' ')
    imgname = parts[0]
    imgname = "/"+"/".join(imgname.split("/")[4:])
    feats = [float(e) for e in parts[1:]]
    mag = np.linalg.norm(feats)
    img_2_feats[imgname] = feats/mag
    img_2_mag[imgname] = mag #magnitude of the feature vector
imgnames = list(img_2_mag.keys())
mags = [img_2_mag[imgname] for imgname in imgnames]
sort_idx = np.argsort(mags) #sorts the magnitude/quality of the images
feats_a = np.array([img_2_feats[imgnames[ele]] for ele in range(len(lines))]) #unsorted image quality
ids_a = np.array([imgnames[ele] for ele in range(len(lines))])


In [5]:
sim_mat_a = np.dot(feats_a, feats_a.T)

In [6]:
ids_a = convert_unique_ids(ids_a)

In [7]:
factors_a, unique_ids = factorize_ids(ids_a)

In [8]:
sim_scores_a = sim_mat_a.copy()
plot_sims_a = sim_scores_a[sim_scores_a < 0.999].reshape(-1)

In [9]:

# Mated and non-mated ids
a_df = pd.read_csv('../../data/image_info_csvs/final_filtered_adults_df_BIBEL.csv')
c_df = pd.read_csv('../../data/image_info_csvs/final_filtered_adults_df_BIBEL.csv')

OFIQ_component_1 = 'UnifiedQualityScore.scalar' #'HeadPoseYaw.scalar' #		'HeadPosePitch.scalar'	'HeadPoseRoll.scalar'
# OFIQ_component_2 = 'HeadPoseRoll.scalar'
# OFIQ_component_3 = 'UnifiedQualityScore.scalar'


poor_c_ofiq = c_df[c_df[OFIQ_component_1] < c_df[OFIQ_component_1].quantile(0.75)]
poor_a_ofiq = a_df[a_df[OFIQ_component_1] > a_df[OFIQ_component_1].quantile(0.75)]


In [10]:
subset_a_df = a_df[a_df['image_name'].isin(poor_a_ofiq['image_name'])]
subset_c_df = c_df[c_df['image_name'].isin(poor_c_ofiq['image_name'])]

print(len(subset_a_df),len(a_df),len(poor_a_ofiq))

6534 26263 6534


In [11]:
a_mates = subset_a_df.groupby("identity_name").agg({'identity_name': ['count']})
# a_mates = a_df.groupby("identity_name").agg({'identity_name': ['count']})
a_mated_ids = a_mates[a_mates[('identity_name', 'count')] > 1].index
a_nonmated_ids = a_mates[a_mates[('identity_name', 'count')] == 1].index


c_mates = subset_c_df.groupby("identity_name").agg({'identity_name': ['count']})
# c_mates = c_df.groupby("identity_name").agg({'identity_name': ['count']})
c_mated_ids = c_mates[c_mates[('identity_name', 'count')] > 1].index
c_nonmated_ids = c_mates[c_mates[('identity_name', 'count')] == 1].index

In [None]:
compute_fnir(c_mated_ids, sim_scores, im_ids_c, ids, thold=thold_c)
# compute_fnir(enrolled_sim_mat, sim_mat, enrolled_ids, enrolled_num_id, ids, thold=0.5):


In [None]:
# children

ids = convert_unique_ids(ids_c)
factors_c, unique_ids = factorize_ids(ids)


## Threshold set based on studying the similarity scores
thold_c = np.percentile(plot_sims_c, 99)
im_ids_c = np.array(factors_c)

mated_df = c_mated_ids
non_mated_df = c_nonmated_ids
sim_scores = sim_scores_c
im_ids = im_ids_c
ids = ids

fnir_c = compute_fnir(c_mated_ids, sim_scores, im_ids_c, ids, thold=thold_c)
fpir_c = compute_fpir(c_nonmated_ids, sim_scores, im_ids_c, ids, thold=thold_c)


# adults
thold_a = np.percentile(plot_sims_a, 99)
im_ids_a = np.array(factors_a)

mated_df = a_mated_ids
non_mated_df = a_nonmated_ids
sim_scores = sim_scores_a
im_ids = im_ids_a
ids = ids_a

fnir_a = compute_fnir(mated_df, sim_scores_a, im_ids, ids, thold=thold_a)
fpir_a = compute_fpir(non_mated_df, sim_scores_a, im_ids, ids, thold=thold_a)
