# Creating data from image folders.

In [1]:
import os, itertools, random, imageio, sklearn
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle

faces_folder = "faces"
image_size = 128
num_labels = 2
pixel_depth = 255.0

def get_subdirs(a_dir):
  return [name for name in os.listdir(a_dir) 
          if os.path.isdir(os.path.join(a_dir, name))]

def get_files(a_dir):
  return [os.path.join(a_dir, name) for name in os.listdir(a_dir) 
          if os.path.isfile(os.path.join(a_dir, name)) and name != ".DS_Store"]

def same_person_combinations():
  combinations = []
  people_dirs = get_subdirs(faces_folder)
  for people_dir in people_dirs:
    people_path = faces_folder + "/" + people_dir
    files_for_person = get_files(people_path)
    same_person_all_combinations = list(itertools.permutations(files_for_person, 2))
    combinations.append(same_person_all_combinations)
  return combinations

def different_people_combinations(unique_people, cap_at):
  different_people_combinations = list(itertools.permutations(unique_people, 2))
  random.shuffle(different_people_combinations)
  return different_people_combinations[:cap_at]

In [2]:
people_combinations = same_person_combinations()

same_person_count = 0
for person in people_combinations:
  for person_combination in person:
    same_person_count += 1
    
dataset = np.ndarray(shape=(same_person_count * 2, image_size * image_size * 2), dtype=np.float32)
labels = np.ndarray(shape=(same_person_count * 2, 2), dtype=np.float32)

unique_people = []

i = 0
for person in people_combinations:
  chose_one_from_this_person = False
  for person_combination in person:
    img_1 = person_combination[0]
    img_2 = person_combination[1]
    try:
      image_data_1 = (imageio.imread(img_1).astype(float) - pixel_depth / 2) / pixel_depth
      image_data_2 = (imageio.imread(img_1).astype(float) - pixel_depth / 2) / pixel_depth
      if image_data_1.shape != (image_size, image_size) or image_data_2.shape != (image_size, image_size):
        raise Exception('Unexpected image shape: %s' % str(image_data.shape))
      else:
        img_1_flattened = image_data_1.reshape(image_size * image_size)
        img_2_flattened = image_data_2.reshape(image_size * image_size)
        dataset[i] = np.concatenate((img_1_flattened, img_2_flattened), axis=0)
        labels[i] = np.array([1, 0]) # same person
        i += 1
        
        if not chose_one_from_this_person:
          unique_people.append(img_1)
          chose_one_from_this_person = True
    except (IOError, ValueError) as e:
      print("skipping")
      
print("Done same person")

Done same person


In [3]:
# get as many different people as the same person count
different_people_combinations = different_people_combinations(unique_people, same_person_count)

for dperson in different_people_combinations:
  img_1 = dperson[0]
  img_2 = dperson[1]
  try:
    image_data_1 = (imageio.imread(img_1).astype(float) - pixel_depth / 2) / pixel_depth
    image_data_2 = (imageio.imread(img_1).astype(float) - pixel_depth / 2) / pixel_depth
    if image_data_1.shape != (image_size, image_size) or image_data_2.shape != (image_size, image_size):
      raise Exception('Unexpected image shape: %s' % str(image_data.shape))
    else:
      img_1_flattened = image_data_1.reshape(image_size * image_size)
      img_2_flattened = image_data_2.reshape(image_size * image_size)
      dataset[i] = np.concatenate((img_1_flattened, img_2_flattened), axis=0)
      labels[i] = np.array([0, 1]) # different person
      i += 1
  except (IOError, ValueError) as e:
      print("skipping")
      
print("Done different people")

Done different people


In [4]:
# shuffle labels and dataset in unison.
dataset, labels = sklearn.utils.shuffle(dataset, labels)
print("data", dataset[0], "label", labels[0])
print("dataset", dataset.shape)
print("labels", labels.shape)

data [ 0.2254902   0.23333333  0.24509804 ... -0.26862746 -0.26862746
 -0.2647059 ] label [1. 0.]
dataset (25564, 32768)
labels (25564, 2)


In [6]:
train_size = 23014
valid_size = 1275
test_size = 1275

pickle_file = "faces.pickle"

try:
  save = {
    'train_dataset': dataset[:train_size],
    'train_labels': labels[:train_size],
    'valid_dataset': dataset[train_size:(train_size + valid_size)],
    'valid_labels': labels[train_size:(train_size + valid_size)],
    'test_dataset': dataset[(train_size + valid_size):],
    'test_labels': labels[(train_size + valid_size):],
    }
  print("train", save["train_dataset"].shape, save["train_labels"].shape)
  print("valid", save["valid_dataset"].shape, save["valid_labels"].shape)
  print("test", save["test_dataset"].shape, save["test_labels"].shape)
  
  n_bytes = 2**31
  max_bytes = 2**31 - 1
  data = bytearray(n_bytes)

  bytes_out = pickle.dumps(save)
  with open(pickle_file, 'wb') as f_out:
      for idx in range(0, len(bytes_out), max_bytes):
          f_out.write(bytes_out[idx:idx+max_bytes])
          
except Exception as e:
  print('Unable to save data to', pickle_file, ':', e)
  raise

train (23014, 32768) (23014, 2)
valid (1275, 32768) (1275, 2)
test (1275, 32768) (1275, 2)
