# 우리가 나눈 train/val/test dataset을 h5파일로 만들기

# Load Data

In [10]:
import glob
import h5py
import numpy as np
import cv2

train_dirs = glob.glob('../data/db_cropped_face/train/*')
val_dirs = glob.glob('../data/db_cropped_face/val/*')
test_dirs = glob.glob('../data/db_cropped_face/test/*')

print(len(train_dirs))
print(len(val_dirs))
print(len(test_dirs))

def get_image_index_count(dirs):
    index_list = [0]
    count_list = []
    image_list = []
    
    for dir_name in dirs:
        paths = glob.glob(dir_name+'/*.jpg')

        count = len(paths)
        count_list.append(count)
        index_list.append(index_list[-1] + count)

        for path in paths:
            image = cv2.imread(path)
            image_list.append(image)
    del index_list[-1]
    
    return np.array(image_list), np.array(index_list), np.array(count_list)

train_image, train_index, train_count = get_image_index_count(train_dirs)
val_image, val_index, val_count = get_image_index_count(val_dirs)
test_image, test_index, test_count = get_image_index_count(test_dirs)

print(train_image.shape, train_index.shape, train_count.shape)
print(val_image.shape, val_index.shape, val_count.shape)
print(test_image.shape, test_index.shape, test_count.shape)

923
198
198
(19242, 120, 120, 3) (923,) (923,)
(2109, 120, 120, 3) (198,) (198,)
(2122, 120, 120, 3) (198,) (198,)


# Data save

In [3]:
data_path = '../data/db_cropped_face/data.h5'
with h5py.File(data_path, mode='w') as data_file:
    data_file.create_dataset('train_images', train_image.shape, np.uint8)
    data_file.create_dataset('val_images', val_image.shape, np.uint8)
    data_file.create_dataset('test_images', test_image.shape, np.uint8)
    
    data_file.create_dataset('train_index', train_index.shape, np.uint32)
    data_file.create_dataset('val_index', val_index.shape, np.uint32)
    data_file.create_dataset('test_index', test_index.shape, np.uint32)
    
    data_file.create_dataset('train_count', train_count.shape, np.uint32)
    data_file.create_dataset('val_count', val_count.shape, np.uint32)
    data_file.create_dataset('test_count', test_count.shape, np.uint32)
    
    
    data_file['train_images'][...] = train_image
    data_file['val_images'][...] = val_image
    data_file['test_images'][...] = test_image
    
    data_file['train_index'][...] = train_index
    data_file['val_index'][...] = val_index
    data_file['test_index'][...] = test_index
    
    data_file['train_count'][...] = train_count
    data_file['val_count'][...] = val_count
    data_file['test_count'][...] = test_count

# Visualization data

In [9]:
import h5py
import numpy as np
import cv2

path = '../data/db_cropped_face/data.h5'
file = h5py.File(path, 'r')

def test_data(file, data_type):
    images = file[data_type+'_images']
    index = file[data_type+'_index']
    count = file[data_type+'_count']
    
    for i, c in zip(index, count):
        for j in range(c):
            image = images[i+j]
            cv2.imshow('Image', image)
            key = cv2.waitKey(0)
            if key == 27:
                break
            elif key == 13:
                break
        if key == 27:
            break
    cv2.destroyAllWindows()
    
test_data(file, 'train')
test_data(file, 'test')
test_data(file, 'val')

file.close()