In [1]:


import random

from itertools import permutations, combinations

from os.path import join
from os import listdir



In [2]:
root_path = './master/biometrics-CMPE58Z/FaceRecognition/somedata/lfw'

In [3]:
def get_triplets(folder, ap_pair_per_person, n_per_ap_pair):
    """
        folder is dict: person_name:[<image_names>]
        ap_pair_per_person : number of anchor - positive pairs
        n_per_ap_pair : number of negatives per ap_pair_per_person
        returns ap_pair_per_person * n_per_ap_pair triplets if possible
        but there may not be enough, see the warning massegaes in the code
    """
    
    triplets = []
    
    for person in folder:
        perms = list(permutations(folder[person],2))
        if len(perms) < ap_pair_per_person:
            print('Warning, for ', person,  ' there are only', len(perms), 'permutations. Returning them all!')
            ap_pairs = perms
        else:
            ap_pairs = random.sample(perms, k=ap_pair_per_person)
        
        other_peoples_images = [value for name in folder for value in folder[name] if name!=person]
        for anchor, pos in ap_pairs:
            try:
                o_p_i_samples = random.sample(other_peoples_images,n_per_ap_pair)
            except ValueError as ve:
                wehave = len(other_peoples_images)
                print('Not enough images from other people, using', wehave, 'per ap_pair instead for', person)
                o_p_i_samples = other_peoples_images
                print(ve)
                
            for neg in o_p_i_samples:
                triplets.append((anchor,pos,neg))
    
    return triplets

In [4]:
def get_folder_as_dict(folder_path, lower_limit=2, upper_limit=1000,for_test=0.2):
    assert 2 <= lower_limit <= upper_limit
    folder_train = dict()
    folder_test = dict()
    
    subfolds = listdir(folder_path)
    limit = int(len(subfolds)*(1-for_test))
    
    for subfolder in subfolds[:limit]:
        images = listdir(join(folder_path,subfolder))
        if lower_limit <= len(images) <= upper_limit:
            folder_train[subfolder] = images
    
    for subfolder in subfolds[limit:]:
        images = listdir(join(folder_path,subfolder))
        if lower_limit <= len(images) <= upper_limit:
            folder_test[subfolder] = images

    

    return folder_train, folder_test    

In [5]:
folder_train, folder_test = get_folder_as_dict(root_path,lower_limit=2,upper_limit=1000)

print(len(folder_train))
print(len(folder_test))


1337
343


In [6]:
triplets = get_triplets(folder_train,10,10)
print(len(triplets))











73940


In [7]:
def evaluation_pairs(folder):
    genuines = []
    for person in folder:
        comb = list(combinations(folder[person],2))
        genuines.extend(s for s in random.sample(comb,k=min(2,len(comb))))

    
    imposters = set()
    for person in folder:
        for other_person in random.sample(folder.keys(),k=2):
            if other_person == person:
                continue
            p = random.sample(folder[person], k=1)[0]
            op = random.sample(folder[other_person],k=1)[0]
            imposters.add(tuple(sorted((p, op))))
            
    return genuines, sorted(imposters)
            
    

In [8]:
g, i  = evaluation_pairs(folder_test)
g_i = [g, i]

In [9]:
len(g), len(i)

(538, 683)

In [10]:
import json
with open('ll2_ul1000_triplets_10_10_alpha.json', 'w') as outfile:
    json.dump(triplets, outfile)
with open('g_i_538_684_alpha.json', 'w') as outfile:
    json.dump(g_i, outfile)