# Dataset Split

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random

In [49]:
labels = np.array(list(zip(os.listdir('LFW'), range(len(os.listdir('LFW'))))))
labels_dict = {labels[i][0]: int(labels[i][1]) for i in range(len(labels))}

In [50]:
X = list()
y = list()
for name in labels[:, 0]:
    path = os.listdir(os.path.join('LFW', name))
    X.extend(path)
    for image in path:
        y.append(labels_dict[image.split('0')[0][: -1]])

In [52]:
joined_lists = list(zip(X, y))
random.shuffle(joined_lists) # Shuffle "joined_lists" in place
X_shuffle, y_shuffle = zip(*joined_lists) # Undo joining
X_shuffle = np.array(X_shuffle)
y_shuffle = np.array(y_shuffle)

In [53]:
split_index = int(len(X_shuffle) * 0.8)

In [54]:
X_train, X_test = X_shuffle[: split_index], X_shuffle[split_index: ]
y_train, y_test = y_shuffle[: split_index], y_shuffle[split_index: ]

In [55]:
pd.DataFrame(data={'Name': labels_dict.keys(), 'Label': labels_dict.values()}).to_csv('labels.csv')

In [56]:
pd.DataFrame(data={'Name': X_train, 'Label': y_train}).to_csv('train.csv')
pd.DataFrame(data={'Name': X_test, 'Label': y_test}).to_csv('test.csv')

# Dataset Read

In [2]:
from matplotlib import image

In [3]:
train = pd.read_csv('train.csv', index_col=0)
test = pd.read_csv('test.csv',  index_col=0)
labels = pd.read_csv('labels.csv', index_col=0)

In [4]:
X_train = list()
for name in train['Name']:
    directory = name.split('0')[0][: -1]
    X_train.append(image.imread(os.path.join('LFW', directory, name)))
X_train = np.array(X_train)

In [5]:
X_train.shape

(10586, 250, 250, 3)

In [6]:
X_test = list()
for name in test['Name']:
    directory = name.split('0')[0][: -1]
    X_test.append(image.imread(os.path.join('LFW', directory, name)))
X_test = np.array(X_test)

In [7]:
X_test.shape

(2647, 250, 250, 3)

In [8]:
y_train = np.array(train['Label'])
y_test = np.array(test['Label'])

In [9]:
y_train.shape

(10586,)

In [10]:
y_test.shape

(2647,)

# Resnet 50

In [11]:
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50

In [12]:
resnet_model = ResNet50(weights='imagenet', input_shape=(250, 250, 3), include_top=False, pooling='avg')

In [13]:
# resnet_model.save('models/', save_format='tf')

In [14]:
for layer in resnet_model.layers[:]:
    layer.trainable = False

In [15]:
resnet_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 250, 250, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 256, 256, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 125, 125, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                           

                                                                                                  
 conv2_block3_1_relu (Activatio  (None, 63, 63, 64)  0           ['conv2_block3_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv2_block3_2_conv (Conv2D)   (None, 63, 63, 64)   36928       ['conv2_block3_1_relu[0][0]']    
                                                                                                  
 conv2_block3_2_bn (BatchNormal  (None, 63, 63, 64)  256         ['conv2_block3_2_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv2_block3_2_relu (Activatio  (None, 63, 63, 64)  0           ['conv2_block3_2_bn[0][0]']      
 n)       

                                                                                                  
 conv3_block3_1_relu (Activatio  (None, 32, 32, 128)  0          ['conv3_block3_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv3_block3_2_conv (Conv2D)   (None, 32, 32, 128)  147584      ['conv3_block3_1_relu[0][0]']    
                                                                                                  
 conv3_block3_2_bn (BatchNormal  (None, 32, 32, 128)  512        ['conv3_block3_2_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv3_block3_2_relu (Activatio  (None, 32, 32, 128)  0          ['conv3_block3_2_bn[0][0]']      
 n)       

                                                                                                  
 conv4_block2_1_bn (BatchNormal  (None, 16, 16, 256)  1024       ['conv4_block2_1_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv4_block2_1_relu (Activatio  (None, 16, 16, 256)  0          ['conv4_block2_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv4_block2_2_conv (Conv2D)   (None, 16, 16, 256)  590080      ['conv4_block2_1_relu[0][0]']    
                                                                                                  
 conv4_block2_2_bn (BatchNormal  (None, 16, 16, 256)  1024       ['conv4_block2_2_conv[0][0]']    
 ization) 

 conv4_block5_1_conv (Conv2D)   (None, 16, 16, 256)  262400      ['conv4_block4_out[0][0]']       
                                                                                                  
 conv4_block5_1_bn (BatchNormal  (None, 16, 16, 256)  1024       ['conv4_block5_1_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv4_block5_1_relu (Activatio  (None, 16, 16, 256)  0          ['conv4_block5_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv4_block5_2_conv (Conv2D)   (None, 16, 16, 256)  590080      ['conv4_block5_1_relu[0][0]']    
                                                                                                  
 conv4_blo

                                                                  'conv5_block1_3_bn[0][0]']      
                                                                                                  
 conv5_block1_out (Activation)  (None, 8, 8, 2048)   0           ['conv5_block1_add[0][0]']       
                                                                                                  
 conv5_block2_1_conv (Conv2D)   (None, 8, 8, 512)    1049088     ['conv5_block1_out[0][0]']       
                                                                                                  
 conv5_block2_1_bn (BatchNormal  (None, 8, 8, 512)   2048        ['conv5_block2_1_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv5_block2_1_relu (Activatio  (None, 8, 8, 512)   0           ['conv5_block2_1_bn[0][0]']      
 n)       

In [16]:
X_train_features = resnet_model.predict(X_train)
X_test_features = resnet_model.predict(X_test)

In [26]:
np.savetxt("X_train_features.txt", X_train_features)
np.savetxt("X_test_features.txt", X_test_features)

In [28]:
# X_train_features = np.loadtxt('X_train_features.txt')
# X_test_features = np.loadtxt('X_test_features.txt')

# KNN

In [33]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

In [34]:
neigh = KNeighborsClassifier(n_neighbors=3)

In [36]:
neigh.fit(X_train_features, y_train)

KNeighborsClassifier(n_neighbors=3)

In [40]:
y_predictions = neigh.predict(X_test_features)

# Accuracy

In [41]:
accuracy_score(y_test, y_predictions)

0.08386853041178693

In [45]:
# ns_probs = [0 for _ in range(len(y_test))]
# lr_probs = neigh.predict_proba(X_test_features)

# ns_fpr, ns_tpr, _ = roc_curve(y_test, ns_probs)
# lr_fpr, lr_tpr, _ = roc_curve(y_test, lr_probs)

# plt.plot(ns_fpr, ns_tpr, linestyle='--', label='No Skill')
# plt.plot(lr_fpr, lr_tpr, marker='.', label='Logistic')
# # axis labels
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')
# # show the legend
# plt.legend()
# # show the plot
# plt.show()

ValueError: multiclass format is not supported