## Build a .h5 dateset from pregenerated data
The data include 3 categories, each has 100 for training, 20 for testing and 20 for validation

In [1]:
import numpy as np
import h5py
import scipy.io
import os
from scipy.io import loadmat, savemat
import matplotlib.pyplot as plt

In [4]:
dataset_name = 'training_data.h5'
file_list = ['stub', 'step', 'radial']
idx = 0
length = 100
for file in file_list:
    for i in range(length):
        file_name = './' + file + '_data/data' + str(i+1)+'.mat'
        data_temp = loadmat(file_name)
        
        with h5py.File(dataset_name, 'a') as hdf:
            group = hdf.create_group(f'data_group_{length*idx+i}')
            group.create_dataset('pattern', data=data_temp['eps_pattern'])
            group.create_dataset('sub_thickness', data=data_temp['hp_pattern'][0,0])
            group.create_dataset('S11C', data=data_temp['S11C3'][:1000])
            group.create_dataset('S21C', data=data_temp['S21C3'][:1000])
            group.create_dataset('S11D', data=data_temp['S11D'][:1000])
            group.create_dataset('S21D', data=data_temp['S21D'][:1000])
            
            
    idx = idx +1

In [21]:
import torch
from torch.utils.data import Dataset
import h5py
import numpy as np

class H5Dataset(Dataset):
    def __init__(self, h5_filename):
        self.h5_filename = h5_filename
        self.h5_file = h5py.File(h5_filename, 'r')
        self.groups = list(self.h5_file.keys())

    def __len__(self):
        return len(self.groups)
    
    def log_normalize(self, x):
        return (20*np.log10(np.abs(x))+50)/50

    def __getitem__(self, idx):
        if not self.h5_file.id:
            self.h5_file = h5py.File(self.h5_filename, 'r')

        group = self.h5_file[self.groups[idx]]

        pattern = group['pattern'][:]/10
        sub_thickness = group['sub_thickness'][()]*10
        sub_info = sub_thickness*np.zeros_like(pattern)
        CNN_in = np.stack([pattern, sub_info], axis = 0)
        CNN_in = torch.tensor(CNN_in, dtype=torch.float32)
        
        S21C = self.log_normalize(group['S21C'][:180]).reshape((10,18))
        S21D = self.log_normalize(group['S21D'][:130]).reshape((10,13))
        
        LSTM_in = torch.tensor(S21C, dtype=torch.float32)
        HybridNN_out = torch.tensor(S21D, dtype=torch.float32)

        return CNN_in, LSTM_in, HybridNN_out

    def close(self):
        self.h5_file.close()



In [22]:
from torch.utils.data import DataLoader

# Initial dataset
dataset = H5Dataset('training_data.h5')

# Create DataLoader
data_loader = DataLoader(dataset, batch_size=10, shuffle=True)

# Use DataLoader
for CNN_in, LSTM_in, HybridNN_out in data_loader:
    print("CNN input:", CNN_in.shape)
    print("LSTM input:", LSTM_in.shape)
    print("Hybrid output:", HybridNN_out.shape)
    break  # only for the first batch

dataset.close()


CNN input: torch.Size([10, 2, 100, 100])
LSTM input: torch.Size([10, 10, 18])
Hybrid output: torch.Size([10, 10, 13])
