In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Loading .h5 file

import h5py
from pathlib import Path
hdf5_dir = Path("/StokesVertexBeam/Data/hdf5/")
hdf5_dir.mkdir(parents=True, exist_ok=True)

def read_many_hdf5(num_images):
    """ Reads image from HDF5.
        Parameters:
        ---------------
        num_images   number of images to read

        Returns:
        ----------
        images      images array, (N, 32, 32, 3) to be stored
        labels      associated meta data, int label (N, 1)
    """
    images, labels = [], []

    # Open the HDF5 file
    # file = h5py.File(hdf5_dir / f"{num_images}_many.h5", "r+")
    # file = h5py.File(hdf5_dir / f"experimental_{num_images}_many.h5", "r+")
    file = h5py.File(hdf5_dir / f"{num_images}_From10000each_303_303_many.h5", "r+")    

    images = np.array(file["/images"]).astype("uint8")
    labels = np.array(file["/meta"]).astype("uint8")

    return images, labels
    
corr_img, labels = read_many_hdf5(15000)

corr_img.shape, labels .shape

((15000, 303, 303, 3), (15000, 1))

In [5]:
# Data split in 3 parts - Train, Val, Test : 70%, 20%, 10%
import random

train_sample = random.sample(list(range(0, corr_img.shape[0])), int(corr_img.shape[0]*.7))
left_sample = list(set(list(range(0, corr_img.shape[0]))) - set(train_sample))
val_sample = random.sample(left_sample, int(corr_img.shape[0]*.2))
test_sample = list(set(left_sample) - set(val_sample))

train_img, train_label = corr_img[train_sample], labels[train_sample]
val_img, val_label = corr_img[val_sample], labels[val_sample]
test_img, test_label = corr_img[test_sample], labels[test_sample]

# experimental_img, experimental_label = corr_img, labels

del corr_img, labels

In [4]:
# plt.imshow(corr_img[0])

In [12]:
# class_dir = ["Class1_phi12_01/", "Class2_phi12_02/", "Class3_phi12_10/",
#             "Class4_phi12_12/", "Class5_phi12_23/", "Class6_phi23_01/",
#             "Class7_phi23_02/", "Class8_phi23_10/", "Class9_phi23_12/",
#             "Class10_phi23_23/", "Class11_phi31_01/", "Class12_phi31_02/",
#             "Class13_phi31_10/", "Class14_phi31_12/", "Class15_phi31_23/"]

In [7]:
train_path = "/StokesVertexBeam/Data/DesignData/Design1_1000From10000/train/"

from PIL import Image
for i in range(train_img.shape[0]):
    im = Image.fromarray(train_img[i])
    image_path = train_path + "Class" + str(train_label[i][0]) + "/" + str(i) + ".jpg"
    im.save(image_path)

val_path = "/StokesVertexBeam/Data/DesignData/Design1_1000From10000/val/"

from PIL import Image
for i in range(val_img.shape[0]):
    im = Image.fromarray(val_img[i])
    image_path = val_path + "Class" + str(val_label[i][0]) + "/" + str(i) + ".jpg"
    im.save(image_path)

test_path = "/StokesVertexBeam/Data/DesignData/Design1_1000From10000/test/"

from PIL import Image
for i in range(test_img.shape[0]):
    im = Image.fromarray(test_img[i])
    image_path = test_path + "Class" + str(test_label[i][0]) + "/" + str(i) + ".jpg"
    im.save(image_path)

# experimental_path = "/StokesVertexBeam/Data/NewCodeData/experimental/"

# from PIL import Image
# for i in range(experimental_img.shape[0]):
#     im = Image.fromarray(experimental_img[i])
#     image_path = experimental_path + "Class" + str(experimental_label[i][0]) + "/" + str(i) + ".jpg"
#     im.save(image_path)

In [None]:

# store_many_hdf5(corr_img, labels)