# Dataset Split

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random

In [2]:
labels = np.array(list(zip(os.listdir('LFW'), range(len(os.listdir('LFW'))))))
labels_dict = {labels[i][0]: int(labels[i][1]) for i in range(len(labels))}

In [3]:
X = list()
y = list()
for name in labels[:, 0]:
    path = os.listdir(os.path.join('LFW', name))
    X.extend(path)
    for image in path:
        y.append(labels_dict[image.split('0')[0][: -1]])

In [4]:
joined_lists = list(zip(X, y))
random.shuffle(joined_lists) # Shuffle "joined_lists" in place
X_shuffle, y_shuffle = zip(*joined_lists) # Undo joining
X_shuffle = np.array(X_shuffle)
y_shuffle = np.array(y_shuffle)

In [5]:
split_index = int(len(X_shuffle) * 0.8)

In [6]:
X_train, X_test = X_shuffle[: split_index], X_shuffle[split_index: ]
y_train, y_test = y_shuffle[: split_index], y_shuffle[split_index: ]

In [7]:
pd.DataFrame(data={"Name": labels_dict.keys(), "Label": labels_dict.values()}).to_csv(os.path.join("0001", "labels.csv"))

In [8]:
pd.DataFrame(data={"Name": X_train, "Label": y_train}).to_csv(os.path.join("0001", "train.csv"))
pd.DataFrame(data={"Name": X_test, "Label": y_test}).to_csv(os.path.join("0001","test.csv"))

# Dataset Read

In [9]:
from matplotlib import image

In [10]:
train = pd.read_csv(os.path.join("0001", "train.csv"), index_col=0)
test = pd.read_csv(os.path.join("0001", "test.csv"),  index_col=0)
labels = pd.read_csv(os.path.join("0001", "labels.csv"), index_col=0)

In [None]:
X_train = list()
for name in train["Name"]:
    directory = name.split("0")[0][: -1]
    X_train.append(image.imread(os.path.join("LFW", directory, name)))
X_train = np.array(X_train)

In [None]:
X_train.shape

In [None]:
X_test = list()
for name in test["Name"]:
    directory = name.split("0")[0][: -1]
    X_test.append(image.imread(os.path.join("LFW", directory, name)))
X_test = np.array(X_test)

In [None]:
X_test.shape

In [None]:
y_train = np.array(train["Label"])
y_test = np.array(test["Label"])

In [None]:
y_train.shape

In [None]:
y_test.shape

# Resnet 50

In [None]:
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50

In [None]:
resnet_model = ResNet50(weights='imagenet', input_shape=(250, 250, 3), include_top=False, pooling='avg')

In [None]:
# resnet_model.save('models/', save_format='tf')

In [None]:
for layer in resnet_model.layers[:]:
    layer.trainable = False

In [None]:
resnet_model.summary()

In [None]:
X_train_features = resnet_model.predict(X_train)
X_test_features = resnet_model.predict(X_test)

In [None]:
np.savetxt(os.path.join("0001", "X_train_features.txt"), X_train_features)
np.savetxt(os.path.join("0001", "X_test_features.txt"), X_test_features)

In [None]:
# X_train_features = np.loadtxt('X_train_features.txt')
# X_test_features = np.loadtxt('X_test_features.txt')

# KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

In [None]:
neigh = KNeighborsClassifier(n_neighbors=3)

In [None]:
neigh.fit(X_train_features, y_train)

In [None]:
y_predictions = neigh.predict(X_test_features)

# Accuracy

In [None]:
accuracy_score(y_test, y_predictions)

In [None]:
# ns_probs = [0 for _ in range(len(y_test))]
# lr_probs = neigh.predict_proba(X_test_features)

# ns_fpr, ns_tpr, _ = roc_curve(y_test, ns_probs)
# lr_fpr, lr_tpr, _ = roc_curve(y_test, lr_probs)

# plt.plot(ns_fpr, ns_tpr, linestyle='--', label='No Skill')
# plt.plot(lr_fpr, lr_tpr, marker='.', label='Logistic')
# # axis labels
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')
# # show the legend
# plt.legend()
# # show the plot
# plt.show()