# Implementation
InceptionV3 with $N=70$

In [1]:
# imports
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os
import cv2

import keras
import tensorflow as tf

from keras.applications import InceptionV3

In [2]:
# file paths
path = os.getcwd()
path = path[:-4]
data_dir = path + "/data/lfw-deepfunneled"

In [3]:
# get list of LFW names to use: intersection of names in 'people.csv' with names in 'os.listdir(data_dir)'
# specify min # of images each LFW subject should have to be considered in experiment: N
# N yields k classes (LFW subjects)
people = pd.read_csv(path + "/data/people.csv")
names = [item for item in people["name"].tolist() if item in os.listdir(data_dir)] # intersection of names
people = people[people["name"].isin(names)] # filter people DataFrame to intersection

N = 70
people = people[people.images > N]

k = len(people)
print(f"k = {k} classes")

k = 7 classes


In [4]:
# model architecture: pre-trained InceptionV3 transfer learning to k subjects of LFW dataset
inceptionv3_transfer = InceptionV3(include_top=False, classes=k, pooling='avg', weights='imagenet')
model = keras.Sequential(layers=(inceptionv3_transfer, keras.layers.Dense(k, activation='softmax')))
model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=1e-4), metrics=['accuracy'])

## Data Preprocessing $\rightarrow$ $X_{tr}, y_{tr}, X_{val}, y_{val}$

In [5]:
# restructure DataFrame
people = people.rename(columns={'images': 'num_images'})
people = people.set_index('name')

# process 250x250 image data as 299x299 image elements under respective person
image_col = []
for person in people.iterrows():
    person = person[1]
    image_list = []
    for i in range(int(person["num_images"])):
        image_path = data_dir + '/' + person.name + '/' + person.name + '_' +  "{:04d}".format(i+1) + '.jpg'
        image = cv2.imread(image_path)
        image = cv2.resize(image, (299, 299))
        image_list.append(image)
    image_col.append(image_list)
people["images"] = image_col

# num_images == len(images) for each person
assert ([len(x) for x in people["images"]] == people["num_images"].values).all()

# construct dataset DataFrame (precursor to X, y) with each row sample as the individual images
dataset = [] # element: (image, person, train or test)
for person in people.iterrows():
    person = person[1]
    image_list = person["images"]
    num_images = len(image_list)
    num_test_images = int(np.floor(num_images * 0.2))
    test_i = np.random.choice(num_images, num_test_images)
    for i in range(num_images):
        image = image_list[i]
        element = [image, person.name, "test" if i in test_i else "train"]
        dataset.append(element)
dataset = pd.DataFrame(dataset, columns=["image", "person", "split"])

# one-hot encode 'person' (will be used as y)
dataset = pd.get_dummies(dataset, columns=['person'])

In [6]:
dataset.head()

Unnamed: 0,image,split,person_Ariel_Sharon,person_Colin_Powell,person_Donald_Rumsfeld,person_George_W_Bush,person_Gerhard_Schroeder,person_Hugo_Chavez,person_Tony_Blair
0,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",train,False,False,False,False,False,True,False
1,"[[[0, 4, 0], [0, 4, 0], [0, 4, 0], [0, 4, 0], ...",train,False,False,False,False,False,True,False
2,"[[[0, 1, 0], [10, 12, 10], [5, 7, 6], [4, 5, 4...",train,False,False,False,False,False,True,False
3,"[[[0, 0, 4], [0, 0, 3], [0, 0, 2], [0, 0, 1], ...",train,False,False,False,False,False,True,False
4,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",train,False,False,False,False,False,True,False


In [7]:
# specify X, y
X_train = dataset[dataset["split"] == "train"]["image"].tolist()
X_train = np.asarray(X_train).astype('float32')
X_test = dataset[dataset["split"] == "test"]["image"].tolist()
X_test = np.asarray(X_test).astype('float32')

y_train = dataset[dataset["split"] == "train"].drop(['image', 'split'], axis=1)
y_test = dataset[dataset["split"] == "test"].drop(['image', 'split'], axis=1)

## Model Training

In [8]:
model.fit(X_train, y_train, shuffle=True, epochs=10)

Epoch 1/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m213s[0m 6s/step - accuracy: 0.5724 - loss: 1.2069
Epoch 2/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m220s[0m 6s/step - accuracy: 0.9952 - loss: 0.0407
Epoch 3/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m221s[0m 6s/step - accuracy: 0.9981 - loss: 0.0134
Epoch 4/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 7s/step - accuracy: 0.9914 - loss: 0.0315
Epoch 5/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m231s[0m 7s/step - accuracy: 0.9901 - loss: 0.0410
Epoch 6/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m229s[0m 7s/step - accuracy: 0.9995 - loss: 0.0083
Epoch 7/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 7s/step - accuracy: 0.9937 - loss: 0.0186
Epoch 8/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 7s/step - accuracy: 0.9985 - loss: 0.0066
Epoch 9/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x178ed1810>

## Model Evaluation

In [9]:
loss, acc = model.evaluate(X_test, y_test)

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 1s/step - accuracy: 0.9958 - loss: 0.0282


In [12]:
print(f"Loss: {loss}\tAccuracy: {acc}")

Loss: 0.036618828773498535	Accuracy: 0.991304337978363
