In [None]:
from scipy.io import loadmat
import os
from datetime import datetime
import numpy as np
from tensorflow.keras.preprocessing import image
from tqdm import tqdm_notebook
import h5py
import time


import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
def calculate_age(taken, dob):
    birth = datetime.fromordinal(max(int(dob) - 366, 1))
    
    if birth.month < 7:
        return taken - birth.year
    else:
        return taken - birth.year - 1

In [None]:
def load_data(data_dir, dataset):
    
    meta = loadmat(os.path.join(data_dir, "{}.mat".format(dataset)))
    
    full_path = meta[dataset][0, 0]["full_path"][0]
    
    dob = meta[dataset][0, 0]["dob"][0]
    
    photo_taken = meta[dataset][0, 0]["photo_taken"][0]
    
    name = meta[dataset][0, 0]["name"][0]
    
    age = np.array([calculate_age(photo_taken[i], dob[i]) for i in range(len(dob))])
    
    images = []
    for img_path in tqdm_notebook(full_path):
        images.append(img_path[0])
    
    images = np.array(images)
    
    return images, age

In [None]:
def load_images(data_dir, image_paths, image_shape):
    
    images = None
    num_images = len(image_paths)
    
    for i, image_path in tqdm_notebook(enumerate(image_paths), total=num_images, leave=False):
    
        try:
            # Load image
            loaded_image = image.load_img(os.path.join(data_dir, image_path), target_size=image_shape)
            
            # Convert PIL image to numpy ndarray
            loaded_image = image.img_to_array(loaded_image)

            # Add another dimension (Add batch dimension)
            loaded_image = np.expand_dims(loaded_image, axis=0)
            
            if images is None:
                images = loaded_image
            else:
                images = np.concatenate([images, loaded_image], axis=0)
        except Exception as e:
            print("Error:", i, e)

    return images

In [None]:
#images = load_images("data/wiki_crop")
images, ages = load_data("data/wiki_crop", "wiki")

In [None]:
n = -1
images = images[:n]
ages = ages[:n]
print(images.shape)
print(ages.shape)

In [None]:
data_generator = image.ImageDataGenerator(rescale=1. / 255)
image_shape = (64, 64, 3)

start = time.time()
loaded_images = load_images("data/wiki_crop", images, (image_shape[0], image_shape[1]))
loaded_images = data_generator.standardize(loaded_images)
end = time.time()
print(end - start)

In [None]:
with h5py.File("wiki_preprocesed.hdf5", "w") as f:
    f.create_dataset("images", data=loaded_images, compression="gzip", compression_opts=9)
    f.create_dataset("ages", data=ages)

In [None]:
start = time.time()
with h5py.File('wiki_preprocesed.hdf5', 'r') as f:
    n_ages = np.array(f.get('ages'))
    n_images = np.array(f.get('images'))
    
end = time.time()
print(end - start)

In [None]:
n_ages.shape
n_images.shape

In [None]:
plt.imshow(n_images[62323])

In [None]:
def image_chunk_generator(images, chunksize):
    for i in range(0, images.shape[0], chunksize):
        yield images[i:i + chunksize]

In [None]:
data_generator = image.ImageDataGenerator(rescale=1. / 255)
image_shape = (64, 64, 3)
#images = load_images("data/wiki_crop")
images, ages = load_data("data/imdb_crop", "imdb")
num_imgs = images.shape[0]
img_pr_process = num_imgs // 400

chunk_generator = image_chunk_generator(images[num_imgs//2:], img_pr_process)
#chunk_generator = image_chunk_generator(images[num_imgs//2:], img_pr_process)

finnished_images = []
for image_chunk in tqdm_notebook(chunk_generator, total = 201, desc="Image chunks"):
    loaded_images = load_images("data/imdb_crop", image_chunk, (image_shape[0], image_shape[1]))
    finnished_images.append(data_generator.standardize(loaded_images))

loaded_images = np.concatenate(finnished_images)

with h5py.File("imdb_preprocesed_2.hdf5", "w") as f:
    f.create_dataset("images", data=loaded_images, compression="gzip", compression_opts=9)
    f.create_dataset("ages", data=ages[num_imgs//2:])   

print("done!")
    
#for image_chunk in tqdm_notebook(chunk_generator2, total = 201, desc="Image chunks part 2"):
    #loaded_images = load_images("data/imdb_crop", image_chunk, (image_shape[0], image_shape[1]))
    #finnished_images.append(data_generator.standardize(loaded_images))

In [None]:
images = np.concatenate(finnished_images)
print(images.shape)
print(ages.shape)

In [None]:
with h5py.File("imdb_preprocesed.hdf5", "w") as f:
    f.create_dataset("images", data=loaded_images, compression="gzip", compression_opts=9)
    f.create_dataset("ages", data=ages)

In [None]:
start = time.time()
with h5py.File('imdb_preprocesed_1.hdf5', 'r') as f:
    n_ages = np.array(f.get('ages'))
    n_images = np.array(f.get('images'))
end = time.time()
print(end - start)

In [None]:
print(n_ages.shape)
print(n_images.shape)