In [None]:
import os
from PIL import Image

import h5py
import numpy as np
import matplotlib.pyplot as plt

from db_helper import get_files_paths_recursive

In [None]:
categories_and_paths = {"fake": R"F:\master-thesis-databases\classification_db\fake", 
                        "real" :R"F:\master-thesis-databases\classification_db\real"}
result_dir = R"C:\Users\Marcin\Dysk Google\masterDB"

In [None]:
def load_dataset_h5(path, dataset_name):
    with h5py.File(path, "r") as hf:
        X = hf[dataset_name][:]
        hf.close()
    return X

In [None]:
def convert_dataset_to_h5(images_paths, h5_path, dataset_name, img_in_cycle=10, mode='w'):
    print(f"To {h5_path} for dataset:{dataset_name}, with flag: {mode}")
    images_list = []
    assert len(images_paths) > img_in_cycle, "Database to small for specified saving cycle"
    i=0
    for image_path in images_paths:
        if i % 100 == 0:
            print(f"Image number: {i}")
        i += 1
        # loading images 
        pil_image = Image.open(image_path)
        np_image = np.array(pil_image).astype((np.uint8))
        images_list.append(np_image)
        # saving to h5 file
        if i % img_in_cycle == 0 and i > 0:
            # h5 dataset creation
            np_images_list = np.asarray(images_list)
            if i == img_in_cycle:
                with h5py.File(h5_path, mode) as hf:
                    hf.create_dataset(dataset_name, 
                                    np_images_list.shape, 
                                    data=np_images_list,
                                    maxshape=((None,)+np_images_list.shape[1:]),
                                    chunks=True)
                    hf.close()   
            # h5 dataset append
            else:
                with h5py.File(h5_path, "a") as hf:
                    hf[dataset_name].resize(i, axis=0)
                    hf[dataset_name][-img_in_cycle:] = np.asarray(images_list)
                    hf.close()   
            images_list.clear()

In [None]:
def make_dataset_for_keras_h5(categories, result_directory, split_factor=0.8):
    print(f"Make dataset from: {categories} to {result_directory}")
    flag='w'
    train_path = os.path.join(result_directory, 'train.h5')
    val_path = os.path.join(result_directory, 'val.h5')
    for name, path in categories.items():
        images_paths = get_files_paths_recursive(path)
        train_val_border = int(split_factor*len(images_paths))
        convert_dataset_to_h5(images_paths[:train_val_border], train_path, name, mode=flag)
        convert_dataset_to_h5(images_paths[train_val_border:], val_path, name, mode=flag)
        flag='a'


In [None]:
make_dataset_for_keras_h5(categories_and_paths, result_dir)