In [249]:
import numpy as np
import os
import conf as config
import cv2
from sklearn.model_selection import train_test_split
import random
from collections import Counter
from random import sample,seed
seed(10)

In [237]:
ANC_PATH = '/Users/julietastaryfurmanshalom/OneDrive/ITC/Face_Rec/data/anchor/'
POS_PATH = '/Users/julietastaryfurmanshalom/OneDrive/ITC/Face_Rec/data/positive/'
NEG_PATH = '/Users/julietastaryfurmanshalom/OneDrive/ITC/Face_Rec/data/negative/'
NEG_PATH_CLEANED = '/Users/julietastaryfurmanshalom/OneDrive/ITC/Face_Rec/data/negative_cleaned'

In [240]:
def rescale_resize(img):
    return cv2.resize(img, (105, 105)) / 255


def load_from_path(path, img_num):
    print(path)        
    dir_list = os.listdir(path)
    if 'negative_cleaned' in path:
        random.shuffle(dir_list)
    else:
        dir_list = sorted(dir_list)
    print(dir_list)
    imgs = []
    for i in range(img_num):
        img = cv2.imread(os.path.join(path, dir_list[i]))
        img = rescale_resize(img)
        imgs.append(img)
    return np.array(imgs)


def load_data(img_num=2221):
    """loads the data to three np.array (anchor, positive, negative)"""
    anchor = load_from_path(config.ANC_PATH, img_num)
    positive = load_from_path(config.POS_PATH, img_num)
    negative = load_from_path(NEG_PATH_CLEANED, img_num)

    # creating dataset
    X = np.concatenate([np.array([anchor, positive]), np.array([anchor, negative])], axis=1)
    y = np.concatenate([np.ones(img_num), np.zeros(img_num)], axis=0)

    return X, y


def train_test_img_split(imgs, labels, train_size=.8):
    ind = range(len(labels))
    train, test = train_test_split(ind, train_size=train_size)
    return imgs[:, train, :, :, :], imgs[:, test, :, :, :], labels[train], labels[test]

In [223]:
X.shape

(2, 4442, 105, 105, 3)

In [224]:
y.shape

(4442,)

In [None]:
# We create lists with all the people from LFW dataset

In [21]:
dir_list = os.listdir(config.NEG_PATH)

In [24]:
neg_list = sorted(dir_list)

In [227]:
# We get the names and we look for those people that appears more than 10 times

In [104]:
name = [person[:-9] for person in neg_list]

In [105]:
freq_person = Counter(name)

In [106]:
pairs = freq_person.items()
filtered_people = {key: value for key, value in pairs if value > 9}

In [None]:
# We save half of the pictures of those people that appears more than 10 times in the anchor folder 
#and the other half in the positive file

In [107]:
new_path_anchor = []
new_path_positive = []
for k,v in filtered_people.items():
    for i in range(1,v//2+1):
        new_path_anchor.append(k + '_' + '0'*(4-len(str(i))) + str(i) +'.jpg')
    for i in range(v//2+1,((v//2)*2)+1):
        new_path_positive.append(k + '_' + '0'*(4-len(str(i))) + str(i) +'.jpg')

In [108]:
len(new_path_anchor) == len(new_path_positive) 

True

In [None]:
# We changed the name of the pictures of ourselves so that it follows the same logic as in the lfw dataset.

In [149]:
path = '/Users/julietastaryfurmanshalom/OneDrive/ITC/Face_Rec/data_1/anchor'
name = 'Julieta_Staryfurman'

def changing_names(path,name):
    for count, f in enumerate(os.listdir(path)):
        f_name, f_ext = os.path.splitext(f)
        new_name = name + '_' + '0'*(4-len(str(count+1))) + str(count+1) + f_ext
        os.rename(path + '/' + f,path + '/' + new_name)

In [150]:
changing_names(path,name)

In [159]:
src_path = '/Users/julietastaryfurmanshalom/OneDrive/ITC/Face_Rec/data/negative/'
dest_path = '/Users/julietastaryfurmanshalom/OneDrive/ITC/Face_Rec/data/anchor/'

In [None]:
# We add the images to the folders

In [160]:
import glob
import shutil
import os

def add_to_folder(new_path, src_path, dest_path):
    for i in new_path:
        src_dir = src_path + i
        dst_dir = dest_path
        shutil.copy(src_dir, dst_dir)

In [161]:
add_to_folder(new_path_anchor, src_path, dest_path)

In [167]:
pos_images = os.listdir(config.POS_PATH)
anchor_images = os.listdir(config.ANC_PATH)

In [168]:
all_images = pos_images + anchor_images

In [176]:
src_path = '/Users/julietastaryfurmanshalom/OneDrive/ITC/Face_Rec/data/positive/'
dest_path = '/Users/julietastaryfurmanshalom/OneDrive/ITC/Face_Rec/data/negative/'

In [191]:
sample_all_images = sample(list(set(all_images)),len(pos_images))

In [185]:
add_to_folder(pos_images, src_path, dest_path)

In [190]:
src_path = '/Users/julietastaryfurmanshalom/OneDrive/ITC/Face_Rec/data/negative/'
dest_path = '/Users/julietastaryfurmanshalom/OneDrive/ITC/Face_Rec/data/negative_cleaned/'

In [194]:
add_to_folder(sample_all_images, src_path, dest_path)

In [228]:
# We create the dataset

In [241]:
X,y = load_data()

/Users/julietastaryfurmanshalom/OneDrive/ITC/Face_Rec/data/anchor
['Abdullah_Gul_0001.jpg', 'Abdullah_Gul_0002.jpg', 'Abdullah_Gul_0003.jpg', 'Abdullah_Gul_0004.jpg', 'Abdullah_Gul_0005.jpg', 'Abdullah_Gul_0006.jpg', 'Abdullah_Gul_0007.jpg', 'Abdullah_Gul_0008.jpg', 'Abdullah_Gul_0009.jpg', 'Adrien_Brody_0001.jpg', 'Adrien_Brody_0002.jpg', 'Adrien_Brody_0003.jpg', 'Adrien_Brody_0004.jpg', 'Adrien_Brody_0005.jpg', 'Adrien_Brody_0006.jpg', 'Alejandro_Toledo_0001.jpg', 'Alejandro_Toledo_0002.jpg', 'Alejandro_Toledo_0003.jpg', 'Alejandro_Toledo_0004.jpg', 'Alejandro_Toledo_0005.jpg', 'Alejandro_Toledo_0006.jpg', 'Alejandro_Toledo_0007.jpg', 'Alejandro_Toledo_0008.jpg', 'Alejandro_Toledo_0009.jpg', 'Alejandro_Toledo_0010.jpg', 'Alejandro_Toledo_0011.jpg', 'Alejandro_Toledo_0012.jpg', 'Alejandro_Toledo_0013.jpg', 'Alejandro_Toledo_0014.jpg', 'Alejandro_Toledo_0015.jpg', 'Alejandro_Toledo_0016.jpg', 'Alejandro_Toledo_0017.jpg', 'Alejandro_Toledo_0018.jpg', 'Alejandro_Toledo_0019.jpg', 'Alvaro

/Users/julietastaryfurmanshalom/OneDrive/ITC/Face_Rec/data/positive
['Abdullah_Gul_0010.jpg', 'Abdullah_Gul_0011.jpg', 'Abdullah_Gul_0012.jpg', 'Abdullah_Gul_0013.jpg', 'Abdullah_Gul_0014.jpg', 'Abdullah_Gul_0015.jpg', 'Abdullah_Gul_0016.jpg', 'Abdullah_Gul_0017.jpg', 'Abdullah_Gul_0018.jpg', 'Adrien_Brody_0007.jpg', 'Adrien_Brody_0008.jpg', 'Adrien_Brody_0009.jpg', 'Adrien_Brody_0010.jpg', 'Adrien_Brody_0011.jpg', 'Adrien_Brody_0012.jpg', 'Alejandro_Toledo_0020.jpg', 'Alejandro_Toledo_0021.jpg', 'Alejandro_Toledo_0022.jpg', 'Alejandro_Toledo_0023.jpg', 'Alejandro_Toledo_0024.jpg', 'Alejandro_Toledo_0025.jpg', 'Alejandro_Toledo_0026.jpg', 'Alejandro_Toledo_0027.jpg', 'Alejandro_Toledo_0028.jpg', 'Alejandro_Toledo_0029.jpg', 'Alejandro_Toledo_0030.jpg', 'Alejandro_Toledo_0031.jpg', 'Alejandro_Toledo_0032.jpg', 'Alejandro_Toledo_0033.jpg', 'Alejandro_Toledo_0034.jpg', 'Alejandro_Toledo_0035.jpg', 'Alejandro_Toledo_0036.jpg', 'Alejandro_Toledo_0037.jpg', 'Alejandro_Toledo_0038.jpg', 'Alva

/Users/julietastaryfurmanshalom/OneDrive/ITC/Face_Rec/data/negative_cleaned
['Eduardo_Duhalde_0001.jpg', 'Michael_Bloomberg_0020.jpg', 'James_Blake_0012.jpg', 'Tom_Ridge_0030.jpg', 'Hamid_Karzai_0008.jpg', 'Jennifer_Lopez_0004.jpg', 'George_W_Bush_0517.jpg', 'Donald_Rumsfeld_0067.jpg', 'Richard_Myers_0016.jpg', 'George_W_Bush_0129.jpg', 'Ann_Veneman_0008.jpg', 'Tony_Blair_0038.jpg', 'Kofi_Annan_0024.jpg', 'Serena_Williams_0020.jpg', 'Richard_Gere_0006.jpg', 'Mohammed_Al-Douri_0002.jpg', 'Trent_Lott_0015.jpg', 'Gloria_Macapagal_Arroyo_0042.jpg', 'Tiger_Woods_0002.jpg', 'George_W_Bush_0271.jpg', 'James_Blake_0010.jpg', 'Tony_Blair_0065.jpg', 'Laura_Bush_0021.jpg', 'Tony_Blair_0103.jpg', 'George_W_Bush_0510.jpg', 'Keanu_Reeves_0005.jpg', 'Gloria_Macapagal_Arroyo_0018.jpg', 'Julianne_Moore_0013.jpg', 'John_Kerry_0012.jpg', 'Jacques_Chirac_0037.jpg', 'George_W_Bush_0087.jpg', 'Roh_Moo-hyun_0016.jpg', 'Ariel_Sharon_0062.jpg', 'Ariel_Sharon_0076.jpg', 'Fidel_Castro_0004.jpg', 'Gray_Davis_0010

In [250]:
np.save('X_array', X)

In [251]:
np.save('y_array', y)

In [252]:
y

array([1., 1., 1., ..., 0., 0., 0.])

In [255]:
np.load('y_array.npy')

array([1., 1., 1., ..., 0., 0., 0.])

In [256]:
X

array([[[[[0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          ...,
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ]],

         [[0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          ...,
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ]],

         [[0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          ...,
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ]],

         ...,

         [[0.24313725, 0.21960784, 0.22352941],
          [0.21568627,

In [257]:
np.load('X_array.npy')

array([[[[[0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          ...,
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ]],

         [[0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          ...,
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ]],

         [[0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          ...,
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ],
          [0.        , 0.        , 0.        ]],

         ...,

         [[0.24313725, 0.21960784, 0.22352941],
          [0.21568627,