In [11]:
from scipy.signal import decimate
import h5py
import numpy as np
import os

In [12]:
def log(message: tuple, verbose_true: int, verbose_min):
    """Helper print function, based on verbose params"""
    if verbose_true >= verbose_min:
        for entry in message:
            print(entry, end=' ')
        print()

In [13]:
def load_data(filename):
    """Load data from .h5 file"""
    with h5py.File(filename, 'r') as f:
        data = f[next(iter(f.keys()))][()]
    return data

def downsample(data, factor):
    """Downsample data based on factor"""
    return decimate(data, factor, axis=1)

In [14]:
def find_means_and_stds(data) -> tuple[np.ndarray, np.ndarray]:
    """Get means and stds for every sensor dimension"""
    means = np.mean(data, axis=1, keepdims=True)
    stds = np.std(data, axis=1, keepdims=True)
    return means, stds

def z_score_normalize(data, means, stds):
    """Z-score normalization for every sensor dimension"""
    return (data - means) / stds

In [15]:
def downsample_and_concat(input_dir, output_dir, factor, verbose):
    """
    Downsample and save the downsampled data for every file in the directory.
    Return the concatenation of the downsampled data from the files.
    """
    os.makedirs(output_dir, exist_ok=True)
    data_concatenated = None

    for file in os.listdir(input_dir):
        if not file.endswith('.h5'):
            continue
        path = os.path.join(input_dir, file)
        data = load_data(path)
        data = downsample(data, factor)
        log((file, np.shape(data)), verbose, 2)

        output_path = os.path.join(output_dir, f"ds_{file}")
        with h5py.File(output_path, 'w') as f:
            f.create_dataset(file.rsplit('.', 1)[0], data=data)

        if data_concatenated is None:
            data_concatenated = data
        else:
            data_concatenated = np.concat((data_concatenated, data), axis=1)
        log((file, np.shape(data_concatenated)), verbose, 1)

    return data_concatenated

In [22]:
def normalize_and_save(input_dir, output_dir, means, stds, verbose):
    """Normalize data in each file and save them in a new file"""
    os.makedirs(output_dir, exist_ok=True)

    for file in os.listdir(input_dir):
        if not file.endswith('.h5'):
            continue
        path = os.path.join(input_dir, file)
        data = load_data(path)
        data = z_score_normalize(data, means, stds)

        output_path = os.path.join(output_dir, f"norm_{file}")
        with h5py.File(output_path, 'w') as f:
            f.create_dataset(file.rsplit('.', 1)[0], data=data)

In [17]:
VERBOSE = 0  # constant for debugging, higher number means more printing

In [18]:
data_train = downsample_and_concat('Intra/train', 'Intra/train_ds', 10, VERBOSE)

In [19]:
means, stds = find_means_and_stds(data_train)

In [23]:
normalize_and_save('Intra/train_ds', 'Intra/train_ds_norm', means, stds, VERBOSE)