In [1]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

In [2]:
# Loading .h5 file

import h5py
from pathlib import Path
hdf5_dir = Path("/StokesVertexBeam/Data/hdf5/")
hdf5_dir.mkdir(parents=True, exist_ok=True)

def read_many_hdf5(num_images, which_type = "simulated"):
    """ Reads image from HDF5.
        Parameters:
        ---------------
        num_images   number of images to read

        Returns:
        ----------
        images      images array, (N, 32, 32, 3) to be stored
        labels      associated meta data, int label (N, 1)
    """
    images, labels = [], []

    # Open the HDF5 file
    if which_type == "simulated":
        # file = h5py.File(hdf5_dir / f"{num_images}_many.h5", "r+")
        file = h5py.File(hdf5_dir / f"{num_images}_303_303_many.h5", "r+")
    else:
        # file = h5py.File(hdf5_dir / f"experimental_{num_images}_many.h5", "r+")
        file = h5py.File(hdf5_dir / f"experimental_{num_images}_303_303_many.h5", "r+")    

    images = np.array(file["/images"]).astype("uint8")
    labels = np.array(file["/meta"]).astype("uint8")

    return images, labels

def store_many_hdf5(images, labels):
    """ Stores an array of images to HDF5.
        Parameters:
        ---------------
        images       images array, (N, 32, 32, 3) to be stored
        labels       labels array, (N, 1) to be stored
    """
    num_images = len(images)

    # Create a new HDF5 file
    file = h5py.File(hdf5_dir / f"{num_images}_303_303many.h5", "w")

    # Create a dataset in the file
    dataset = file.create_dataset(
        "images", np.shape(images), h5py.h5t.STD_U8BE, data=images
    )
    meta_set = file.create_dataset(
        "meta", np.shape(labels), h5py.h5t.STD_U8BE, data=labels
    )
    file.close()

In [3]:
img_sim, labels_sim = read_many_hdf5(15000, "simulated")
img_exp, labels_exp = read_many_hdf5(300, "experimental")
img_exp.shape, labels_exp.shape, img_sim.shape, labels_sim.shape

((300, 303, 303, 3), (300, 1), (15000, 303, 303, 3), (15000, 1))

In [4]:
class_dir = ["Class1", "Class2", "Class3",
            "Class4", "Class5", "Class6",
            "Class7", "Class8", "Class9",
            "Class10", "Class11", "Class12",
            "Class13", "Class14", "Class15"]

sim_data_dict = dict()
exp_data_dict = dict()

sim_labels_dict = dict()
exp_labels_dict = dict()

for i in range(15):
    sim_data_dict[class_dir[i]] = img_sim[i*1000:(i+1)*1000]
    # sim_labels_dict[class_dir[i]] = labels_sim[i*1000:(i+1)*1000]

    exp_data_dict[class_dir[i]] = img_exp[i*20:(i+1)*20]
    # exp_labels_dict[class_dir[i]] = labels_exp[i*20:(i+1)*20]

In [5]:
# # Data split in 3 parts - Train, Val, Test : 897, 3, 100
import random



No_of_repition_exp_train_image = 18
data_dir = "/StokesVertexBeam/Data/DesignData/Design3_MixTrain/"

for i in range(len(class_dir)):

    train_img = []
    train_label = []
    val_img = []
    val_label = []
    test_sim_img = []
    test_sim_label = []
    test_exp_img = []
    test_exp_label = []

    train_sample_sim = random.sample(list(range(0, 1000)), 897)
    train_sample_exp = random.sample(list(range(0, 20)), 7)

    left_sample_sim = list(set(list(range(0, 1000))) - set(train_sample_sim))
    left_sample_exp = list(set(list(range(0, 20))) - set(train_sample_exp))

    val_sample_sim = random.sample(left_sample_sim, 3)
    val_sample_exp = random.sample(left_sample_exp, 3)


    test_sample_sim = list(set(left_sample_sim) - set(val_sample_sim))
    test_sample_exp = list(set(left_sample_exp) - set(val_sample_exp))

    train_sample_exp = train_sample_exp*No_of_repition_exp_train_image

    train_img = sim_data_dict[class_dir[i]][train_sample_sim]
    train_img = np.concatenate((train_img, exp_data_dict[class_dir[i]][train_sample_exp]), 0)
    np.random.shuffle(train_img)

    val_img = sim_data_dict[class_dir[i]][val_sample_sim]
    val_img = np.concatenate((val_img, exp_data_dict[class_dir[i]][val_sample_exp]), 0)
    np.random.shuffle(val_img)

    test_sim_img = sim_data_dict[class_dir[i]][test_sample_sim]

    test_exp_img = exp_data_dict[class_dir[i]][test_sample_exp]

    for j in range(train_img.shape[0]):
        im = Image.fromarray(train_img[j])
        image_path = data_dir + "train/" + class_dir[i] + "/" + str(j) + ".jpg"
        im.save(image_path)

    for j in range(val_img.shape[0]):
        im = Image.fromarray(val_img[j])
        image_path = data_dir + "val/" + class_dir[i] + "/" + str(j) + ".jpg"
        im.save(image_path)

    for j in range(test_sim_img.shape[0]):
        im = Image.fromarray(test_sim_img[j])
        image_path = data_dir + "test_sim/" + class_dir[i] + "/" + str(j) + ".jpg"
        im.save(image_path)

    for j in range(test_exp_img.shape[0]):
        im = Image.fromarray(test_exp_img[j])
        image_path = data_dir + "test_exp/" + class_dir[i] + "/" + str(j) + ".jpg"
        im.save(image_path)