In [26]:
import glob
import imageio.v2 as imageio
import numpy as np
import matplotlib.pyplot as plt
import random

In [62]:
# Converting Data into hdf5 format and loading from there. See: https://realpython.com/storing-images-in-python/
import h5py
from pathlib import Path
hdf5_dir = Path("/StokesVertexBeam/Data/hdf5/")
hdf5_dir.mkdir(parents=True, exist_ok=True)

def store_many_hdf5(images, labels):
    """ Stores an array of images to HDF5.
        Parameters:
        ---------------
        images       images array, (N, 32, 32, 3) to be stored
        labels       labels array, (N, 1) to be stored
    """
    num_images = len(images)

    # Create a new HDF5 file
    file = h5py.File(hdf5_dir / f"{num_images}_From10000each_many.h5", "w")
    # file = h5py.File(hdf5_dir / f"{num_images}_303_303_many.h5", "w")
    # file = h5py.File(hdf5_dir / f"experimental_{num_images}_many.h5", "w")
    # file = h5py.File(hdf5_dir / f"experimental_{num_images}_303_303_many.h5", "w")

    # Create a dataset in the file
    dataset = file.create_dataset(
        "images", np.shape(images), h5py.h5t.STD_U8BE, data=images
    )
    meta_set = file.create_dataset(
        "meta", np.shape(labels), h5py.h5t.STD_U8BE, data=labels
    )
    file.close()

def read_many_hdf5(num_images):
    """ Reads image from HDF5.
        Parameters:
        ---------------
        num_images   number of images to read

        Returns:
        ----------
        images      images array, (N, 32, 32, 3) to be stored
        labels      associated meta data, int label (N, 1)
    """
    images, labels = [], []

    # Open the HDF5 file
    # file = h5py.File(hdf5_dir / f"{num_images}_many.h5", "r+")
    file = h5py.File(hdf5_dir / f"simulated_{num_images}_many.h5", "r+")

    images = np.array(file["/images"]).astype("uint8")
    labels = np.array(file["/meta"]).astype("uint8")

    return images, labels

In [72]:
# Load the png files

def load_png_files(root_dir, class_dir, file_dir, no_of_classes):
    count_dict = dict()
    
    folder_name = root_dir+class_dir[0]+file_dir
    folder_image = []
    count = 0
    for filename in glob.glob(folder_name):
        im=imageio.imread(filename)
        count = count + 1
        folder_image.append(im[:101,:707,:3])
    count_dict[0] = count 
    image_list = np.asarray(folder_image)

    for i in range(1, 15):
        print(i)
        folder_name = root_dir+class_dir[i]+file_dir
        folder_image = []
        count = 0
        for filename in glob.glob(folder_name):
            im=imageio.imread(filename)
            count = count + 1
            folder_image.append(im[:101,:707,:3])
        count_dict[i] = count 
        print(image_list.shape, np.asarray(folder_image).shape)
        image_list = np.concatenate((image_list, np.asarray(folder_image)), axis = 0)
        # image_list = np.asarray(folder_image)
    
    labels_list = []
    for key, values in enumerate(count_dict):
        labels_list = labels_list + [values + 1]*count_dict[values]
    labels_list = np.asarray(labels_list).reshape(len(labels_list), 1)

    return image_list, labels_list


# Load the png files 303 * 300

def load_png_files_303x303(root_dir, class_dir, file_dir, no_of_classes):
    count_dict = dict()
    
    folder_name = root_dir+class_dir[0]+file_dir
    folder_image = []
    count = 0
    for filename in glob.glob(folder_name):
        im=imageio.imread(filename)
        count = count + 1
        folder_image.append(im[:303,:303,:3])
    count_dict[0] = count 
    image_list = np.asarray(folder_image)

    for i in range(1, 15):
        print(i)
        folder_name = root_dir+class_dir[i]+file_dir
        folder_image = []
        count = 0
        for filename in glob.glob(folder_name):
            im=imageio.imread(filename)
            count = count + 1
            folder_image.append(im[:303,:303,:3])
        count_dict[i] = count 
        print(image_list.shape, np.asarray(folder_image).shape)
        image_list = np.concatenate((image_list, np.asarray(folder_image)), axis = 0)
        # image_list = np.asarray(folder_image)
    
    labels_list = []
    for key, values in enumerate(count_dict):
        labels_list = labels_list + [values + 1]*count_dict[values]
    labels_list = np.asarray(labels_list).reshape(len(labels_list), 1)

    return image_list, labels_list


def load_png_files_from_10000each(root_dir, class_dir, file_dir, no_of_classes):

    count_dict = dict()
    folder_name = root_dir+class_dir[0]+file_dir
    folder_image = []
    file_list = []
    for filename in glob.glob(folder_name):
        file_list.append(filename)
    random.shuffle(file_list)
    chosen_file = file_list[:1000]
    print(len(file_list))
    count = 0
    for f in chosen_file:
        im=imageio.imread(f)
        count = count + 1
        folder_image.append(im[:101,:707,:3])
    count_dict[0] = count 
    image_list = np.asarray(folder_image)

    for i in range(1, 15):
        print(i)
        folder_name = root_dir+class_dir[i]+file_dir
        folder_image = []
        file_list = []
        for filename in glob.glob(folder_name):
            file_list.append(filename)
        print(len(file_list))
        random.shuffle(file_list)
        chosen_file = file_list[:1000]
        count = 0
        for f in chosen_file:
            im=imageio.imread(f)
            count = count + 1
            folder_image.append(im[:101,:707,:3])
        count_dict[i] = count
        print(image_list.shape, np.asarray(folder_image).shape)
        image_list = np.concatenate((image_list, np.asarray(folder_image)), axis = 0)

    labels_list = []
    for key, values in enumerate(count_dict):
        labels_list = labels_list + [values + 1]*count_dict[values]
    labels_list = np.asarray(labels_list).reshape(len(labels_list), 1)

    return image_list, labels_list

In [60]:
# Get the png files

no_of_classes = 15

# Data path 

# root_dir = '/StokesVertexBeam/Data/1000eachClass/'
# class_dir = ["Revised_Sim_phi_12_01/", "Revised_Sim_phi_12_02/", "Revised_Sim_phi_12_10/",
#             "Revised_Sim_phi_12_12/", "Revised_Sim_phi_12_23/", "Revised_Sim_phi_23_01/",
#             "Revised_Sim_phi_23_02/", "Revised_Sim_phi_23_10/", "Revised_Sim_phi_23_12/",
#             "Revised_Sim_phi_23_23/", "Revised_Sim_phi_31_01/", "Revised_Sim_phi_31_02/",
#             "Revised_Sim_phi_31_10/", "Revised_Sim_phi_31_12/", "Revised_Sim_phi_31_23/"]

root_dir = "/StokesVertexBeam/Data/RawData/10000Each/"
class_dir = ["Class1_phi12_01/", "Class2_phi12_02/", "Class3_phi12_10/",
            "Class4_phi12_12/", "Class5_phi12_23/", "Class6_phi23_01/",
            "Class7_phi23_02/", "Class8_phi23_10/", "Class9_phi23_12/",
            "Class10_phi23_23/", "Class11_phi31_01/", "Class12_phi31_02/",
            "Class13_phi31_10/", "Class14_phi31_12/", "Class15_phi31_23/"]

# root_dir = "/StokesVertexBeam/Data/Transformed303x303/Experimental/"
# root_dir = "/StokesVertexBeam/Data/Transformed303x303/Simulated/"
# class_dir = ["Class1/", "Class2/", "Class3/",
#             "Class4/", "Class5/", "Class6/",
#             "Class7/", "Class8/", "Class9/",
#             "Class10/", "Class11/", "Class12/",
#             "Class13/", "Class14/", "Class15/"]


file_dir = '*.png'
file_dir = '*.jpg'

# image_list, labels_list = load_png_files_303x303(root_dir, class_dir, file_dir, no_of_classes)
# image_list, labels_list = load_png_files(root_dir, class_dir, file_dir, no_of_classes)
# image_list, labels_list = load_png_files_from_10000each(root_dir, class_dir, file_dir, no_of_classes)

In [63]:
# # # Working with hdf5 files

# Store the array of images and labels into hdf5 file
# store_many_hdf5(image_list, labels_list)

# # # Load the images and labels as array from hdf5 file
# # # dfo