In [None]:
import os
import h5py
import numpy as np

dataset_root = "../train_case"
dataset_files = os.listdir(dataset_root)

for (index,dataset_file) in enumerate(dataset_files):
    with h5py.File(f"{dataset_root}/{dataset_file}","r") as hdf_file:
        data = hdf_file["data"][:]
        if index == 0:
            all_data = data
        else:
            all_data = np.concatenate((all_data,data),axis=0)

means = np.mean(all_data,axis=0)
stds = np.std(all_data,axis=0)

In [None]:
from typing import Tuple

def generate_dataset(file_path:str,input_len:int,label_len:int,
                     stride: int,
                     means=means,stds=stds,
                     input_dim=12,
                     label_dim=7) -> Tuple[np.ndarray, np.ndarray]:
    '''
    inputs:
        file_path: path of h5py file
    outputs:
        inputs: N x input_len x input_dim(12)
        labels: N x label_len x label_dim(7)
    '''

    with h5py.File(file_path,"r") as hdf_file:
        data = hdf_file["data"][:]
    
    # z-score
    data_normalized = np.copy(data)
    data_normalized[:,:7] = (data[:,:7] - means[:7])/stds[:7]

    # generate slice
    data_len = data_normalized.shape[0]
    dataset_len = (data_len - label_len - 1)//stride

    inputs = np.empty((dataset_len,input_len,input_dim))
    labels = np.empty((dataset_len,label_len,label_dim))

    for i in range(dataset_len):
        start_index = i*stride
        inputs[i,:,:] = data_normalized[start_index:start_index+input_len,:]

        xt = data_normalized[start_index:start_index+label_len,:7]
        xtp1 = data_normalized[start_index+1:start_index+label_len+1,:7]

        labels[i,:,:] = xtp1 - xt

    return inputs,labels

In [20]:
for (index,dataset_file) in enumerate(dataset_files):
    file_path = f"{dataset_root}/{dataset_file}"
    inputs, labels = generate_dataset(file_path=file_path,
                                      input_len=300,
                                      label_len=600,
                                      stride=1)
    if index == 0:
        all_inputs = inputs
        all_labels = labels
    else:
        all_inputs = np.concatenate((all_inputs,inputs),axis=0)
        all_labels = np.concatenate((all_labels,labels),axis=0)

with h5py.File("../cooling_data.h5","w") as hdf_file:
    hdf_file.create_dataset("inputs",data=all_inputs)
    hdf_file.create_dataset("labels",data=all_labels)
    hdf_file.create_dataset("means",data=means)
    hdf_file.create_dataset("stds",data=stds)