In [1]:
import dlib
from skimage import io
from scipy.spatial import distance
import cv2
import glob
import sys
import pandas as pd
from tqdm import tqdm
import numpy as np

In [2]:
sp = dlib.shape_predictor('models/shape_predictor_68_face_landmarks.dat')
facerec = dlib.face_recognition_model_v1('models/dlib_face_recognition_resnet_model_v1.dat')
detector = dlib.get_frontal_face_detector()

In [3]:
def get_descriptors(files):
    pictures = []
    descriptors = []
    for file in tqdm(files):
        img = cv2.imread(file)
        pictures.append(img)
        dets = detector(img, 1)
        for k, d in enumerate(dets):
            #print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
                #k, d.left(), d.top(), d.right(), d.bottom()))
            shape = sp(img, d)
            face_descriptor = facerec.compute_face_descriptor(img, shape)
            item = {
                'descriptor':np.array(face_descriptor),
                'file' : file,
            }
            descriptors.append(item)
    return descriptors

In [4]:
def get_df(descriptors, name):
    df =pd.DataFrame(columns = ['img_path', 'class', 'descriptor'])

    for item in tqdm(descriptors):
        class1 = name
        desc = item['descriptor']
        descriptor = ' '.join([str(n) for n in desc])
        row ={
            'img_path': item['file'],
            'class': class1,
            'descriptor': descriptor,
        }

        df = df.append(row, ignore_index=True)
    return df

In [5]:
files1 = glob.glob('Harrison_Ford\\*.PNG')+glob.glob('Harrison_Ford\\*.jpg')
files2 = glob.glob('Emma_Watson\\*.PNG') + glob.glob('Emma_Watson\\*.jpg')
files3 = glob.glob('Rupert_Grint\\*.PNG')+glob.glob('Rupert_Grint\\*.jpg')
files4 = glob.glob('Will_Smith\\*.PNG')+glob.glob('Will_Smith\\*.jpg')
files5 = glob.glob('Halle_Berry\\*.PNG')+glob.glob('Halle_Berry\\*.jpg')
files6 = glob.glob('Dan_Radcliffe\\*.PNG')+glob.glob('Dan_Radcliffe\\*.jpg')

In [6]:
HF = get_descriptors(files1)
df_HF = get_df(HF, 'Harrison_Ford')

100%|████████████████████████████████████████████████████████████████████████████████| 584/584 [43:31<00:00,  4.47s/it]
100%|███████████████████████████████████████████████████████████████████████████████| 542/542 [00:05<00:00, 100.81it/s]


In [7]:
df_HF.to_csv('Harrison_Ford.csv')
df_HF.to_excel('Harrison_Ford.xlsx')

In [8]:
EW = get_descriptors(files2)
df_EW = get_df(EW, 'Emma_Watson')

100%|████████████████████████████████████████████████████████████████████████████████| 344/344 [18:58<00:00,  3.31s/it]
100%|███████████████████████████████████████████████████████████████████████████████| 318/318 [00:02<00:00, 107.16it/s]


In [9]:
df_EW.to_csv('Emma_Watson.csv')
df_EW.to_excel('Emma_Watson.xlsx')

In [10]:
RG = get_descriptors(files3)
df_RG = get_df(RG, 'Rupert_Grint')

100%|██████████████████████████████████████████████████████████████████████████████| 837/837 [1:02:42<00:00,  4.49s/it]
100%|███████████████████████████████████████████████████████████████████████████████| 757/757 [00:07<00:00, 100.43it/s]


In [11]:
df_RG.to_csv('Rupert_Grint.csv')
df_RG.to_excel('Rupert_Grint.xlsx')

In [12]:
WS = get_descriptors(files4)
df_WS = get_df(WS, 'Will_Smith')

100%|████████████████████████████████████████████████████████████████████████████████| 728/728 [57:08<00:00,  4.71s/it]
100%|███████████████████████████████████████████████████████████████████████████████| 706/706 [00:06<00:00, 103.69it/s]


In [13]:
df_WS.to_csv('Will_Smith.csv')
df_WS.to_excel('Will_Smith.xlsx')

In [14]:
HB = get_descriptors(files5)
df_HB = get_df(HB, 'Halle_Berry')
df_HB.to_csv('Halle_Berry.csv')
df_HB.to_excel('Halle_Berry.xlsx')

100%|██████████████████████████████████████████████████████████████████████████████| 662/662 [1:00:46<00:00,  5.51s/it]
100%|███████████████████████████████████████████████████████████████████████████████| 590/590 [00:05<00:00, 102.06it/s]


In [15]:
DR = get_descriptors(files6)
df_DR = get_df(DR, 'Dan_Radcliffe')
df_DR.to_csv('Dan_Radcliffe.csv')
df_DR.to_excel('Dan_Radcliffe.xlsx')

100%|████████████████████████████████████████████████████████████████████████████████| 411/411 [26:04<00:00,  3.81s/it]
100%|███████████████████████████████████████████████████████████████████████████████| 393/393 [00:03<00:00, 105.80it/s]


In [4]:
df2 = pd.read_csv('Dan_Radcliffe.csv', index_col = 0)

In [5]:
df2

Unnamed: 0,img_path,class,descriptor
0,Dan_Radcliffe\10.PNG,Dan_Radcliffe,-0.01745985820889473 0.10399903357028961 0.059...
1,Dan_Radcliffe\100.PNG,Dan_Radcliffe,-0.10777680575847626 0.07684307545423508 0.040...
2,Dan_Radcliffe\101.PNG,Dan_Radcliffe,-0.10304909199476242 0.08664718270301819 0.038...
3,Dan_Radcliffe\102.PNG,Dan_Radcliffe,-0.06392291933298111 0.09306670725345612 0.059...
4,Dan_Radcliffe\104.PNG,Dan_Radcliffe,-0.10605598986148834 0.07336850464344025 -0.03...
...,...,...,...
388,Dan_Radcliffe\472.jpg,Dan_Radcliffe,-0.040038757026195526 0.1165609285235405 0.013...
389,Dan_Radcliffe\473.jpg,Dan_Radcliffe,-0.005341207142919302 0.19764338433742523 0.04...
390,Dan_Radcliffe\474.jpg,Dan_Radcliffe,-0.011882415041327477 0.16236011683940887 0.02...
391,Dan_Radcliffe\475.jpg,Dan_Radcliffe,0.0005403229733929038 0.10192063450813293 0.01...


In [None]:
#adding new images to existed dataframe

In [86]:
files = glob.glob('Rupert_Grint\\*.jpg')

In [87]:
old_df = pd.read_csv('Rupert_Grint.csv', index_col = 0)

In [88]:
new_files = []
for file in files:
    if file in old_df['img_path'].values:
        continue
    else:
        new_files.append(file)

In [89]:
new_files

['Rupert_Grint\\242.jpg',
 'Rupert_Grint\\243.jpg',
 'Rupert_Grint\\244.jpg',
 'Rupert_Grint\\245.jpg',
 'Rupert_Grint\\246.jpg',
 'Rupert_Grint\\247.jpg',
 'Rupert_Grint\\248.jpg',
 'Rupert_Grint\\249.jpg',
 'Rupert_Grint\\250.jpg',
 'Rupert_Grint\\251.jpg',
 'Rupert_Grint\\252.jpg',
 'Rupert_Grint\\253.jpg',
 'Rupert_Grint\\254.jpg',
 'Rupert_Grint\\255.jpg',
 'Rupert_Grint\\256.jpg',
 'Rupert_Grint\\257.jpg',
 'Rupert_Grint\\258.jpg',
 'Rupert_Grint\\259.jpg',
 'Rupert_Grint\\260.jpg',
 'Rupert_Grint\\261.jpg',
 'Rupert_Grint\\262.jpg',
 'Rupert_Grint\\263.jpg',
 'Rupert_Grint\\267.jpg',
 'Rupert_Grint\\268.jpg',
 'Rupert_Grint\\269.jpg',
 'Rupert_Grint\\270.jpg',
 'Rupert_Grint\\271.jpg',
 'Rupert_Grint\\272.jpg',
 'Rupert_Grint\\273.jpg',
 'Rupert_Grint\\274.jpg',
 'Rupert_Grint\\275.jpg',
 'Rupert_Grint\\276.jpg',
 'Rupert_Grint\\277.jpg',
 'Rupert_Grint\\278.jpg',
 'Rupert_Grint\\279.jpg',
 'Rupert_Grint\\280.jpg',
 'Rupert_Grint\\281.jpg',
 'Rupert_Grint\\282.jpg',
 'Rupert_Gri

In [90]:
new_pictures = []
new_descriptors = []

In [91]:
for file in tqdm(new_files):
    img = cv2.imread(file)
    new_pictures.append(img)
    dets = detector(img, 1)
    for k, d in enumerate(dets):
        shape = sp(img, d)
        face_descriptor = facerec.compute_face_descriptor(img, shape)
        item = {
            'descriptor':face_descriptor,
            'file' : file,
        }
        new_descriptors.append(item)

100%|████████████████████████████████████████████████████████████████████████████████| 595/595 [58:04<00:00,  5.86s/it]


In [92]:
new_df =pd.DataFrame(columns = ['img_path', 'class', 'descriptor'])

for item in tqdm(new_descriptors):
    class1 = 'Rupert_Grint'
    desc = item['descriptor']
    descriptor = ' '.join([str(n) for n in desc])
    row ={
        'img_path': item['file'],
        'class': class1,
        'descriptor': descriptor,
    }

    new_df = new_df.append(row, ignore_index=True)

100%|████████████████████████████████████████████████████████████████████████████████| 593/593 [00:06<00:00, 88.89it/s]


In [93]:
new_df

Unnamed: 0,img_path,class,descriptor
0,Rupert_Grint\242.jpg,Rupert_Grint,-0.05237310379743576 0.07127876579761505 0.059...
1,Rupert_Grint\243.jpg,Rupert_Grint,-0.04176531732082367 0.05777474120259285 0.005...
2,Rupert_Grint\244.jpg,Rupert_Grint,-0.06902678310871124 0.09710950404405594 0.056...
3,Rupert_Grint\245.jpg,Rupert_Grint,-0.05518348887562752 0.05615098029375076 0.057...
4,Rupert_Grint\246.jpg,Rupert_Grint,-0.028674466535449028 -0.012040446512401104 0....
...,...,...,...
588,Rupert_Grint\835.jpg,Rupert_Grint,-0.06006047502160072 0.0023433445021510124 0.0...
589,Rupert_Grint\836.jpg,Rupert_Grint,-0.04914705455303192 0.06058773770928383 0.053...
590,Rupert_Grint\837.jpg,Rupert_Grint,-0.04880763217806816 0.10131653398275375 0.001...
591,Rupert_Grint\838.jpg,Rupert_Grint,-0.07579626888036728 0.07690523564815521 0.019...


In [94]:
total_df = pd.concat([old_df, new_df])

In [95]:
total_df

Unnamed: 0,img_path,class,descriptor
0,Rupert_Grint\10.PNG,Rupert_Grint,-0.04188723862171173 0.08876699954271317 -0.00...
1,Rupert_Grint\100.PNG,Rupert_Grint,0.0034476229920983315 0.04783119261264801 0.05...
2,Rupert_Grint\101.PNG,Rupert_Grint,-0.0802157074213028 0.026016555726528168 0.021...
3,Rupert_Grint\102.PNG,Rupert_Grint,-0.10715584456920624 0.024501046165823936 0.01...
4,Rupert_Grint\103.PNG,Rupert_Grint,-0.0665271133184433 0.039051465690135956 0.011...
...,...,...,...
588,Rupert_Grint\835.jpg,Rupert_Grint,-0.06006047502160072 0.0023433445021510124 0.0...
589,Rupert_Grint\836.jpg,Rupert_Grint,-0.04914705455303192 0.06058773770928383 0.053...
590,Rupert_Grint\837.jpg,Rupert_Grint,-0.04880763217806816 0.10131653398275375 0.001...
591,Rupert_Grint\838.jpg,Rupert_Grint,-0.07579626888036728 0.07690523564815521 0.019...


In [101]:
total_df.to_csv('Rupert_Grint.csv')
total_df.to_excel('Rupert_Grint.xlsx')

In [104]:
def add_new_descriptors(files, old_df):
    new_files = []
    new_pictures = []
    new_descriptors = []
    for file in files:
        if file in old_df['img_path'].values:
            continue
        else:
            new_files.append(file)
    for file in tqdm(new_files):
        img = cv2.imread(file)
        new_pictures.append(img)
        dets = detector(img, 1)
        for k, d in enumerate(dets):
            shape = sp(img, d)
            face_descriptor = facerec.compute_face_descriptor(img, shape)
            item = {
            'descriptor':face_descriptor,
            'file' : file,
            }
        new_descriptors.append(item)
    return new_descriptors

In [105]:
def add_new_images(new_descriptors, name):
    new_df =pd.DataFrame(columns = ['img_path', 'class', 'descriptor'])
    for item in tqdm(new_descriptors):
        class1 = name
        desc = item['descriptor']
        descriptor = ' '.join([str(n) for n in desc])
        row ={
            'img_path': item['file'],
            'class': class1,
            'descriptor': descriptor,
        }

        new_df = new_df.append(row, ignore_index=True)
    return new_df

In [106]:
def make_and_save_total(old_df, new_df, name):
    total_df = pd.concat([old_df, new_df])
    total_df
    total_df.to_csv(f'{name}.csv')
    total_df.to_excel(f'{name}.xlsx')

In [107]:
old_df_ew = pd.read_csv('Emma_Watson.csv', index_col = 0)
files_ew = glob.glob('Emma_Watson\\*.jpg')
ew = add_new_descriptors(files_ew, old_df_ew)
ewdf = add_new_images(ew, 'Emma_Watson')
make_and_save_total(old_df_ew,ewdf, 'Emma_Watson' )

100%|████████████████████████████████████████████████████████████████████████████████| 130/130 [13:31<00:00,  6.24s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 130/130 [00:01<00:00, 72.90it/s]


In [108]:
old_df_hb = pd.read_csv('Halle_Berry.csv', index_col = 0)
files_hb = glob.glob('Halle_Berry\\*.jpg')
hb = add_new_descriptors(files_hb, old_df_hb)
hbdf = add_new_images(hb, 'Halle_Berry')
make_and_save_total(old_df_hb,hbdf, 'Halle_Berry' )

 10%|████████                                                                         | 66/662 [06:35<59:30,  5.99s/it]


KeyboardInterrupt: 

In [None]:
old_df_ws = pd.read_csv('Will_Smith.csv', index_col = 0)
files_ws = glob.glob('Will_Smith\\*.jpg')
ws = add_new_descriptors(files_ws, old_df_ws)
wsdf = add_new_images(ws, 'Will_Smith')
make_and_save_total(old_df_ws,wsdf, 'Will_Smith' )