# InceptionV3 Feature Extraction
Fill /data/features/ folder with spreadsheets of features.

In [1]:
# imports
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os
import cv2

import keras
import tensorflow as tf

from keras.applications import InceptionV3
from arcface import ArcFace
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# file paths
path = os.getcwd()
path = path[:-4]
data_dir = path + "/data/lfw-deepfunneled"

In [3]:
matchPairsTrain, mismatchPairsTrain = pd.read_csv(path + "/data/matchpairsDevTrain.csv"), pd.read_csv(path + "/data/mismatchpairsDevTrain.csv")
matchPairsTest, mismatchPairsTest = pd.read_csv(path + "/data/matchpairsDevTest.csv"), pd.read_csv(path + "/data/mismatchpairsDevTest.csv")

In [4]:
matchPairsTrain

Unnamed: 0,name,imagenum1,imagenum2
0,Aaron_Peirsol,1,2
1,Aaron_Peirsol,3,4
2,Aaron_Sorkin,1,2
3,Abdel_Nasser_Assidi,1,2
4,Abdullah,1,3
...,...,...,...
1095,Zhu_Rongji,1,9
1096,Zhu_Rongji,4,7
1097,Zhu_Rongji,4,8
1098,Zhu_Rongji,5,6


In [5]:
mismatchPairsTrain

Unnamed: 0,name,imagenum1,name.1,imagenum2
0,AJ_Cook,1,Marsha_Thomason,1
1,Aaron_Sorkin,2,Frank_Solich,5
2,Abdel_Nasser_Assidi,2,Hilary_McKay,1
3,Abdoulaye_Wade,4,Linda_Dano,1
4,Abdul_Rahman,1,Magui_Serna,1
...,...,...,...,...
1095,Tom_Vilsack,1,Wayne_Ferreira,5
1096,Trisha_Meili,1,Vladimiro_Montesinos,3
1097,Ty_Votaw,1,Wayne_Allard,1
1098,Vytas_Danelius,1,Zaini_Abdullah,1


In [6]:
# verify all pairs are in 'os.listdir(data_dir)'
assert len(matchPairsTrain) == len([item for item in matchPairsTrain.name.tolist() if item in os.listdir(data_dir)])
assert len(matchPairsTest) == len([item for item in matchPairsTest.name.tolist() if item in os.listdir(data_dir)])
assert len(mismatchPairsTrain["name"])  == len([item for item in mismatchPairsTrain["name"].tolist() if item in os.listdir(data_dir)])
assert len(mismatchPairsTest["name"])  == len([item for item in mismatchPairsTest["name"].tolist() if item in os.listdir(data_dir)])
assert len(mismatchPairsTrain["name.1"])  == len([item for item in mismatchPairsTrain["name.1"].tolist() if item in os.listdir(data_dir)])
assert len(mismatchPairsTest["name.1"])  == len([item for item in mismatchPairsTest["name.1"].tolist() if item in os.listdir(data_dir)])

In [7]:
# model architecture: pre-trained InceptionV3 transfer learning
inceptionv3 = InceptionV3(include_top=True, weights='imagenet')

## Data Preprocessing $\rightarrow$ Pairs DataFrame w/ Image Data & InceptionV3 Feature Extraction

In [8]:
def getImg(name, num):
    imgPath = data_dir + '/' + name + '/' + name + '_' +  "{:04d}".format(num) + '.jpg'
    img = cv2.imread(imgPath)
    img = cv2.resize(img, (299, 299))
    return img

def getFeatures(x):
    return inceptionv3(tf.expand_dims(x, axis=0))[0].numpy()

def matchPairs(X):
    df = X.copy(deep=False)
    imageCol1, imageCol2 = [], []
    f1Col, f2Col = [], []
    for _ , person in df.iterrows():
        name = person["name"]
        img1, img2 = person.imagenum1, person.imagenum2
        img1, img2 = getImg(name, img1), getImg(name, img2)
        imageCol1.append(img1)
        imageCol2.append(img2)
        f1Col.append(getFeatures(img1))
        f2Col.append(getFeatures(img2))
    df["image1"] = imageCol1
    df["image2"] = imageCol2
    df["image1Features"] = f1Col
    df["image2Features"] = f2Col
    return df

def mismatchPairs(X):
    df = X.copy(deep=False)
    imageCol1, imageCol2 = [], []
    f1Col, f2Col = [], []
    for _ , person in df.iterrows():
        name, name1 = person["name"], person["name.1"]
        img1, img2 = person.imagenum1, person.imagenum2
        img1, img2 = getImg(name, img1), getImg(name1, img2)
        imageCol1.append(img1)
        imageCol2.append(img2)
        f1Col.append(getFeatures(img1))
        f2Col.append(getFeatures(img2))
    df["image1"] = imageCol1
    df["image2"] = imageCol2
    df["image1Features"] = f1Col
    df["image2Features"] = f2Col
    return df

In [9]:
matchPairsTrain = matchPairs(matchPairsTrain)
matchPairsTest = matchPairs(matchPairsTest)

In [10]:
matchPairsTest.tail()

Unnamed: 0,name,imagenum1,imagenum2,image1,image2,image1Features,image2Features
495,Winona_Ryder,6,15,"[[[8, 3, 0], [8, 3, 0], [7, 2, 0], [7, 2, 0], ...","[[[0, 0, 3], [0, 0, 3], [0, 0, 3], [0, 0, 3], ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
496,Winona_Ryder,19,21,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...","[[[0, 0, 6], [0, 0, 4], [0, 0, 3], [0, 0, 2], ...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.2513033e-38, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
497,Yevgeny_Kafelnikov,3,4,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...","[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
498,Yoriko_Kawaguchi,3,10,"[[[247, 221, 205], [247, 221, 205], [247, 221,...","[[[0, 0, 4], [0, 0, 3], [0, 0, 1], [1, 0, 0], ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
499,Zoran_Djindjic,3,4,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...","[[[0, 0, 0], [0, 0, 0], [0, 0, 1], [0, 0, 1], ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.308...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [11]:
mismatchPairsTrain = mismatchPairs(mismatchPairsTrain)
mismatchPairsTest = mismatchPairs(mismatchPairsTest)

In [12]:
mismatchPairsTest.tail()

Unnamed: 0,name,imagenum1,name.1,imagenum2,image1,image2,image1Features,image2Features
495,Susan_Whelan,1,Wolfgang_Schneiderhan,1,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...","[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...","[0.0, 3.0844205e-32, 0.0, 0.0, 0.0, 3.3645422e...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
496,Takeo_Fukui,1,Will_Ofenheusle,1,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...","[[[2, 0, 0], [2, 0, 0], [2, 0, 0], [2, 0, 0], ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.741...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
497,Tamara_Mowry,1,Zach_Parise,1,"[[[0, 2, 0], [146, 157, 155], [196, 210, 208],...","[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...","[0.0, 0.0, 0.0, 0.0, 0.0, 4.28275e-34, 0.0, 0....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
498,Tatiana_Kennedy_Schlossberg,1,Thomas_Watjen,1,"[[[0, 2, 0], [0, 1, 0], [1, 0, 0], [3, 0, 0], ...","[[[5, 0, 0], [5, 0, 0], [5, 0, 0], [5, 0, 0], ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
499,Todd_Petit,1,Vicente_Fernandez,3,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...","[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [20]:
matchPairsTrain.to_csv(path + "/data/features/matchPairsTrain.csv")
matchPairsTest.to_csv(path + "/data/features/matchPairsTest.csv")
mismatchPairsTrain.to_csv(path + "/data/features/mismatchPairsTrain.csv")
mismatchPairsTest.to_csv(path + "/data/features/mismatchPairsTest.csv")

## Cosine Similarity

In [16]:
def sim(x1, x2):
    return cosine_similarity(tf.reshape(x1, (1, -1)), tf.reshape(x2, (1, -1)))

In [17]:
sim(f1, f2)

array([[1.]], dtype=float32)