In [38]:
import os
import pickle
from tensorflow.keras.preprocessing import image
from keras_vggface.utils import preprocess_input
from keras_vggface.vggface import VGGFace
import numpy as np
from tqdm import tqdm

In [39]:
actors=os.listdir('celeb_dataset')

In [40]:
##Code for creating a list containing file paths of all actors/actresses images present in dataset:
filenames=[]
for actor in actors:
    for file in os.listdir(os.path.join('celeb_dataset',actor)):
        filenames.append(os.path.join('celeb_dataset',actor,file))

print(len(filenames))

8541


In [41]:
## pickle up files for later use:
pickle.dump(filenames,open('filenames.pkl','wb'))

In [42]:
## Downloading model VGGFace:
model=VGGFace(model='resnet50',include_top=False,input_shape=(224,224,3),pooling='avg')

In [43]:
## This function preprpocess the image and return the 2048 facial features as the result of VGGFace architecture:
def feature_extractor(img_path,model):
    img=image.load_img(img_path,target_size=(224,224))
    img_toarray=image.img_to_array(img)
    expanded_img=np.expand_dims(img_toarray,axis=0)
    preprocessed_img=preprocess_input(expanded_img)

    result=model.predict(preprocessed_img).flatten()

    return result

##Sample Input:
len(feature_extractor('celeb_dataset\\Amitabh_Bachchan\\Amitabh_Bachchan.305.jpg',model))

2048

In [44]:
##Mapping and fetching 2048 features for all 8664 actors/actresses.
features=[]

for file in tqdm(filenames):
    features.append(feature_extractor(file,model))
## Already executed once.

  0%|          | 0/8541 [00:00<?, ?it/s]

100%|██████████| 8541/8541 [26:50<00:00,  5.30it/s]  


In [45]:
features

[array([0.       , 0.3215958, 0.       , ..., 5.9016385, 0.       ,
        1.5431414], dtype=float32),
 array([0.        , 0.        , 0.        , ..., 9.602612  , 0.09039889,
        2.4121993 ], dtype=float32),
 array([0.        , 0.48082742, 0.        , ..., 6.8811116 , 0.06310423,
        2.6463957 ], dtype=float32),
 array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 4.2845559e+00,
        2.8618404e-03, 6.4294398e-01], dtype=float32),
 array([0.       , 0.       , 0.3613013, ..., 2.962309 , 1.8906579,
        5.6339555], dtype=float32),
 array([0.        , 1.5675731 , 0.        , ..., 6.4235206 , 0.01378938,
        1.1813695 ], dtype=float32),
 array([0.       , 0.       , 0.       , ..., 5.3404703, 1.1721913,
        0.       ], dtype=float32),
 array([0.        , 0.42905435, 0.        , ..., 6.5473213 , 0.        ,
        4.300003  ], dtype=float32),
 array([0.       , 1.6851074, 0.       , ..., 4.4400277, 0.       ,
        0.7709058], dtype=float32),
 array([6.732712

In [46]:
pickle.dump(features,open('embedding.pkl','wb'))