# Dataset Split

In [12]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random

In [13]:
labels = np.array(list(zip(os.listdir('LFW'), range(len(os.listdir('LFW'))))))
labels_dict = {labels[i][0]: int(labels[i][1]) for i in range(len(labels))}
labels_dict_save = {int(labels[i][1]): labels[i][0] for i in range(len(labels))}

In [14]:
X = list()
y = list()
for name in labels[:, 0]:
    path = os.listdir(os.path.join('LFW', name))
    X.extend(path)
    for image in path:
        y.append(labels_dict[image.split('0')[0][: -1]])

In [15]:
joined_lists = list(zip(X, y))
random.shuffle(joined_lists) # Shuffle "joined_lists" in place
X_shuffle, y_shuffle = zip(*joined_lists) # Undo joining
X_shuffle = np.array(X_shuffle)
y_shuffle = np.array(y_shuffle)

In [16]:
X_shuffle

array(['Mario_Cipollini_0002.jpg', 'Nadia_Petrova_0005.jpg',
       'Nanni_Moretti_0001.jpg', ..., 'Ariel_Sharon_0050.jpg',
       'Jack_Straw_0010.jpg', 'Joaquim_Rodriguez_0001.jpg'], dtype='<U44')

In [17]:
y_shuffle

array([3525, 3992, 4006, ...,  370, 2275, 2632])

In [20]:
pd.DataFrame(data={"Name": labels_dict_save.keys(), "Label": labels_dict_save.values()}).to_csv(os.path.join("0002", "labels.csv"))

In [21]:
pd.DataFrame(data={"Name": X_shuffle, "Label": y_shuffle}).to_csv(os.path.join("0002", "data.csv"))

# Dataset Read

In [22]:
from matplotlib import image

In [24]:
data = pd.read_csv(os.path.join("0002", "data.csv"), index_col=0)

In [26]:
X = list()
for name in data["Name"]:
    directory = name.split("0")[0][: -1]
    X.append(image.imread(os.path.join("LFW", directory, name)))
X = np.array(X)

In [27]:
X.shape

(13233, 250, 250, 3)

In [28]:
y = np.array(data["Label"])

In [29]:
y.shape

(13233,)

# Resnet 50

In [30]:
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50

In [123]:
resnet_model = ResNet50(weights='imagenet', input_shape=(250, 250, 3), include_top=False, pooling='avg')

In [32]:
# resnet_model.save('models/', save_format='tf')

In [33]:
for layer in resnet_model.layers[:]:
    layer.trainable = False

In [34]:
resnet_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 250, 250, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 256, 256, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 125, 125, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                           

                                                                                                  
 conv2_block3_1_relu (Activatio  (None, 63, 63, 64)  0           ['conv2_block3_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv2_block3_2_conv (Conv2D)   (None, 63, 63, 64)   36928       ['conv2_block3_1_relu[0][0]']    
                                                                                                  
 conv2_block3_2_bn (BatchNormal  (None, 63, 63, 64)  256         ['conv2_block3_2_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv2_block3_2_relu (Activatio  (None, 63, 63, 64)  0           ['conv2_block3_2_bn[0][0]']      
 n)       

                                                                                                  
 conv3_block3_1_relu (Activatio  (None, 32, 32, 128)  0          ['conv3_block3_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv3_block3_2_conv (Conv2D)   (None, 32, 32, 128)  147584      ['conv3_block3_1_relu[0][0]']    
                                                                                                  
 conv3_block3_2_bn (BatchNormal  (None, 32, 32, 128)  512        ['conv3_block3_2_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv3_block3_2_relu (Activatio  (None, 32, 32, 128)  0          ['conv3_block3_2_bn[0][0]']      
 n)       

                                                                                                  
 conv4_block2_1_bn (BatchNormal  (None, 16, 16, 256)  1024       ['conv4_block2_1_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv4_block2_1_relu (Activatio  (None, 16, 16, 256)  0          ['conv4_block2_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv4_block2_2_conv (Conv2D)   (None, 16, 16, 256)  590080      ['conv4_block2_1_relu[0][0]']    
                                                                                                  
 conv4_block2_2_bn (BatchNormal  (None, 16, 16, 256)  1024       ['conv4_block2_2_conv[0][0]']    
 ization) 

 conv4_block5_1_conv (Conv2D)   (None, 16, 16, 256)  262400      ['conv4_block4_out[0][0]']       
                                                                                                  
 conv4_block5_1_bn (BatchNormal  (None, 16, 16, 256)  1024       ['conv4_block5_1_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv4_block5_1_relu (Activatio  (None, 16, 16, 256)  0          ['conv4_block5_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv4_block5_2_conv (Conv2D)   (None, 16, 16, 256)  590080      ['conv4_block5_1_relu[0][0]']    
                                                                                                  
 conv4_blo

                                                                  'conv5_block1_3_bn[0][0]']      
                                                                                                  
 conv5_block1_out (Activation)  (None, 8, 8, 2048)   0           ['conv5_block1_add[0][0]']       
                                                                                                  
 conv5_block2_1_conv (Conv2D)   (None, 8, 8, 512)    1049088     ['conv5_block1_out[0][0]']       
                                                                                                  
 conv5_block2_1_bn (BatchNormal  (None, 8, 8, 512)   2048        ['conv5_block2_1_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv5_block2_1_relu (Activatio  (None, 8, 8, 512)   0           ['conv5_block2_1_bn[0][0]']      
 n)       

In [35]:
X_features = resnet_model.predict(X)

In [36]:
np.savetxt(os.path.join("0002", "X_features.txt"), X_features)

In [20]:
# X_features = np.loadtxt(os.path.join("0002", 'X_features.txt'))

# KNN

In [37]:
from sklearn.neighbors import KNeighborsClassifier

In [161]:
neigh = KNeighborsClassifier(n_neighbors=3)

In [162]:
neigh.fit(X_features, y)

KNeighborsClassifier(n_neighbors=3)

In [163]:
y_predictions = neigh.predict(X_features)

# Accuracy

In [164]:
labels = pd.read_csv(os.path.join("0002", "labels.csv"), index_col=0)
label_dict = labels.set_index('Name').to_dict()['Label']

In [165]:
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

In [166]:
accuracy_score(y, y_predictions)

0.4236378750094461

In [169]:
lr_probs = neigh.predict_proba(X_features)

array([3525, 3992, 4006, ...,  370, 2275, 2632], dtype=int64)

In [170]:
lr_probs

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [174]:
roc_auc_score(y, lr_probs, multi_class='ovr')

0.9998693508001638

In [167]:
y

array([3525, 3992, 4006, ...,  370, 2275, 2632], dtype=int64)

In [168]:
y_predictions

array([1052, 2501,  911, ...,  370, 2275, 2632], dtype=int64)

In [120]:
roc_auc_ovr = dict()
for i in range(len(labels)):
    # Gets the class
    c = y[i]
    
    # Prepares an auxiliar dataframe to help with the plots
    df_aux = data.copy()
    df_aux.loc[data['Label'] == c, ['Label']] = 1
    df_aux.loc[data['Label'] != c, ['Label']] = 0
    df_aux['prob'] = lr_probs[:, i]
    df_aux = df_aux.reset_index(drop = True)
    
    # Calculates the ROC AUC OvR
    roc_auc_ovr[c] = roc_auc_score(df_aux['Label'], df_aux['prob'], average='macro')

In [121]:
np.mean(np.array(list(roc_auc_ovr.values())))

0.4999079057999185

# Return Neighors

In [124]:
data

Unnamed: 0,Name,Label
0,Mario_Cipollini_0002.jpg,3525
1,Nadia_Petrova_0005.jpg,3992
2,Nanni_Moretti_0001.jpg,4006
3,Tommy_Haas_0001.jpg,5401
4,Malcolm_Glazer_0001.jpg,3440
...,...,...
13228,Skip_Prosser_0001.jpg,5048
13229,Thierry_Mariani_0001.jpg,5319
13230,Ariel_Sharon_0050.jpg,370
13231,Jack_Straw_0010.jpg,2275


In [146]:
neighbours = neigh.kneighbors(X_features[0].reshape(1, -1))

In [152]:
data.iloc[neighbours[1][0], :]

Unnamed: 0,Name,Label
0,Mario_Cipollini_0002.jpg,3525
6814,Colleen_OClair_0001.jpg,1052
2056,George_W_Bush_0200.jpg,1870
