In [22]:
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, confusion_matrix, f1_score

import sys
import os 
from datetime import datetime
import random as rnd
import itertools as it
#--------------------------------------------------------------------------------------------------------------------------
from RECOGNIZER.recognizer import Recognizer
from skimage.io import imread
#--------------------------------------------------------------------------------------------------------------------------
import warnings
warnings.filterwarnings('ignore')

In [2]:
def get_basename(path): # dataset/lfw_funneled/Abel_Pacheco/Abel_Pacheco_0001.JPG -> Abel_Pacheco_0001
    return os.path.splitext(os.path.basename(path))[0]

# Making unique pic pathes DF

In [None]:
dataset_path = 'dataset/'

pairs = ''
with open(f'{dataset_path}pairs.txt', 'r') as file:
    pairs = file.read()

df = pd.DataFrame(list(map(lambda x: x.split('\t'), pairs.split('\n')[1:301])), columns=['name', 'pic1', 'pic2'])
df = (pd.melt(df, id_vars='name', value_vars=['pic1', 'pic2'], var_name='picNum', value_name='pic')
        .drop('picNum', axis=1)
        .sort_values(by=['name', 'pic'])
        .drop_duplicates()
        .reset_index(drop=True))

df['pic'] = df['pic'].apply(lambda x: x.zfill(4))
df['path_x1'] = str(dataset_path) + df['name'] + '/' + df['name'] + '_' + df['pic'] + '.jpg'

df.to_csv('pics_df.csv')
df.head()

In [None]:
pics_to_scale_path = df['path_x1'].values
pics_to_scale = list(map(lambda x: get_basename(x) + '_result.jpg', pics_to_scale_path))
print(f'There are {len(pics_to_scale)} unique pics to be scaled')

# Make stable classification DF

In [None]:
sim_people_pics = []
for name, name_df in df[['name', 'pic']].groupby('name'):
    pics = name_df['pic'].values
    pics_comb = list(it.combinations(pics, 2))
    pics_name_comb = list(map(lambda x: [name]+[x[0]]+[name]+[x[1]], pics_comb))
    for comb in pics_name_comb:
        sim_people_pics.append(comb+[1])

non_sim_people_pics = [el for el in it.combinations(df[['name', 'pic']].values.tolist(), 2) if el[0][0] != el[1][0]]
non_sim_people_pics = [el[0]+el[1]+[0] for el in non_sim_people_pics]
non_sim_people_pics = rnd.sample(non_sim_people_pics, len(sim_people_pics))

classification_df = pd.DataFrame(sim_people_pics+non_sim_people_pics, 
                                 columns=['name1', 'pic1', 'name2', 'pic2', 'similarity'])

classification_df.to_csv('classification_df.csv')
classification_df.iloc[[0, 1, 2, -3, -2, -1], :]

# Add up-scale pic pathes to DF

In [3]:
scales = [4]
model_name = 'ESRGAN'

pics_df = pd.read_csv('pics_df.csv', index_col=0)
classification_df = pd.read_csv('classification_df.csv', index_col=0)
classification_df.iloc[[0, 1, 2, -3, -2, -1], :]

Unnamed: 0,name1,pic1,name2,pic2,similarity
0,Abel_Pacheco,1,Abel_Pacheco,4,1
1,Akhmed_Zakayev,1,Akhmed_Zakayev,2,1
2,Akhmed_Zakayev,1,Akhmed_Zakayev,3,1
1665,Jeremy_Greenstock,22,John_McCain,6,0
1666,Lisa_Gottsegen,2,Paul_Bremer,7,0
1667,Jacques_Rogge,8,Patty_Schnyder,1,0


In [4]:
def add_pathes(df, scales, model_name): # (df, [2, 3, 4], 'DCSCN')
    for scale in scales:
        df[f'path_x{scale}'] = f'{model_name}/output_x{scale}/' + df[f'path_x1'].apply(get_basename) + '_result.jpg'
    return df

def get_pathes(classification_df, df):
    for pic_num in [1, 2]:
        classification_df = (pd.merge(classification_df, df, 
                                      left_on=[f'name{pic_num}', f'pic{pic_num}'], right_on=['name', 'pic'], 
                                      how='left', suffixes=('_1', '_2')))
    classification_df.drop(['name_1', 'pic_1', 'name_2', 'pic_2'], axis=1, inplace=True)
    return classification_df

In [5]:
pics_df = add_pathes(pics_df, scales, model_name)
pics_df.head()

Unnamed: 0,name,pic,path_x1,path_x4
0,Abel_Pacheco,1,dataset/Abel_Pacheco/Abel_Pacheco_0001.jpg,ESRGAN/output_x4/Abel_Pacheco_0001_result.jpg
1,Abel_Pacheco,4,dataset/Abel_Pacheco/Abel_Pacheco_0004.jpg,ESRGAN/output_x4/Abel_Pacheco_0004_result.jpg
2,Akhmed_Zakayev,1,dataset/Akhmed_Zakayev/Akhmed_Zakayev_0001.jpg,ESRGAN/output_x4/Akhmed_Zakayev_0001_result.jpg
3,Akhmed_Zakayev,2,dataset/Akhmed_Zakayev/Akhmed_Zakayev_0002.jpg,ESRGAN/output_x4/Akhmed_Zakayev_0002_result.jpg
4,Akhmed_Zakayev,3,dataset/Akhmed_Zakayev/Akhmed_Zakayev_0003.jpg,ESRGAN/output_x4/Akhmed_Zakayev_0003_result.jpg


In [6]:
classification_df = get_pathes(classification_df, pics_df)
classification_df.iloc[[0, 1, 2, -3, -2, -1], :]

Unnamed: 0,name1,pic1,name2,pic2,similarity,path_x1_1,path_x4_1,path_x1_2,path_x4_2
0,Abel_Pacheco,1,Abel_Pacheco,4,1,dataset/Abel_Pacheco/Abel_Pacheco_0001.jpg,ESRGAN/output_x4/Abel_Pacheco_0001_result.jpg,dataset/Abel_Pacheco/Abel_Pacheco_0004.jpg,ESRGAN/output_x4/Abel_Pacheco_0004_result.jpg
1,Akhmed_Zakayev,1,Akhmed_Zakayev,2,1,dataset/Akhmed_Zakayev/Akhmed_Zakayev_0001.jpg,ESRGAN/output_x4/Akhmed_Zakayev_0001_result.jpg,dataset/Akhmed_Zakayev/Akhmed_Zakayev_0002.jpg,ESRGAN/output_x4/Akhmed_Zakayev_0002_result.jpg
2,Akhmed_Zakayev,1,Akhmed_Zakayev,3,1,dataset/Akhmed_Zakayev/Akhmed_Zakayev_0001.jpg,ESRGAN/output_x4/Akhmed_Zakayev_0001_result.jpg,dataset/Akhmed_Zakayev/Akhmed_Zakayev_0003.jpg,ESRGAN/output_x4/Akhmed_Zakayev_0003_result.jpg
1665,Jeremy_Greenstock,22,John_McCain,6,0,dataset/Jeremy_Greenstock/Jeremy_Greenstock_00...,ESRGAN/output_x4/Jeremy_Greenstock_0022_result...,dataset/John_McCain/John_McCain_0006.jpg,ESRGAN/output_x4/John_McCain_0006_result.jpg
1666,Lisa_Gottsegen,2,Paul_Bremer,7,0,dataset/Lisa_Gottsegen/Lisa_Gottsegen_0002.jpg,ESRGAN/output_x4/Lisa_Gottsegen_0002_result.jpg,dataset/Paul_Bremer/Paul_Bremer_0007.jpg,ESRGAN/output_x4/Paul_Bremer_0007_result.jpg
1667,Jacques_Rogge,8,Patty_Schnyder,1,0,dataset/Jacques_Rogge/Jacques_Rogge_0008.jpg,ESRGAN/output_x4/Jacques_Rogge_0008_result.jpg,dataset/Patty_Schnyder/Patty_Schnyder_0001.jpg,ESRGAN/output_x4/Patty_Schnyder_0001_result.jpg


# Recognition

In [7]:
def recognize_pics(classification_df, scales):
    initial_time = datetime.now() 
    print(f'Started at {initial_time}')

    recognizer = Recognizer()
    THRESHOLD = 0.9
    
    def recongize(row):
        photo1 = imread(row.ix[0])
        photo2 = imread(row.ix[1])
        similarity = recognizer.get_best_similarity(photo1, photo2)
        return 1 if similarity < THRESHOLD else 0

    for scale in scales:
        start = datetime.now()
        print(f'Scale x{scale} started at {start}')
        classification_df[f'rec_x{scale}'] = classification_df[[f'path_x{scale}_1', f'path_x{scale}_2']].apply(recongize, axis=1)
        print(f'Scale x{scale} is done in {datetime.now() - start}')
    print(f'Ended in {datetime.now() - initial_time}')
    return classification_df

In [9]:
df_result = recognize_pics(classification_df, scales)

Started at 2019-05-05 00:21:36.678274
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from RECOGNIZER/facenet_models/facenet/model-20180408-102900.ckpt-90
Scale x4 started at 2019-05-05 00:22:00.850208
Scale x4 is done in 0:58:48.980167
Ended in 0:59:13.152101


In [29]:
def counts_pct(col):
    return round(col.value_counts(normalize=True) * 100, 2)

df_result = df_result[['similarity'] + [f'rec_x{scale}'for scale in scales]]
df_result.apply(counts_pct)

Unnamed: 0,similarity,rec_x4
1,50.0,62.95
0,50.0,37.05


In [38]:
confusion_matrix(df_result['similarity'], df_result['rec_x4']) / 834 * 100

array([[58.63309353, 41.36690647],
       [15.4676259 , 84.5323741 ]])

In [45]:
df_result.groupby('similarity')['rec_x4'].value_counts(normalize=True) * 100

similarity  rec_x4
0           0         58.633094
            1         41.366906
1           1         84.532374
            0         15.467626
Name: rec_x4, dtype: float64

In [42]:
round(accuracy_score(df_result['similarity'], df_result['rec_x4']) * 100, 2)

71.58

In [46]:
classification_df['rec_x4'] = df_result['rec_x4']

In [47]:
classification_df.to_csv('classification_df_ESRGAN.csv')