In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import glob
import h5py
import numpy as np
from scipy.signal import decimate, resample

In [None]:
def downsample_then_upsample_hdf5(in_filepath, out_filepath, factor=2):
    """
    Downsamples the EMG data in 'emg2qwerty/timeseries' by a given factor,
    then upsamples it back to the original number of samples.

    The structured array is assumed to have fields:
      - 'emg_right' with shape (16,)
      - 'emg_left'  with shape (16,)
      - 'time'      as a single float

    Args:
        in_filepath (str): Path to the original HDF5 file.
        out_filepath (str): Path to the output HDF5 file.
        factor (int): Downsampling factor.
    """
    with h5py.File(in_filepath, 'r') as f_in:

        orig_grp = f_in["emg2qwerty"]
        arr = orig_grp["timeseries"][:]

        orig_N = arr.shape[0]

        emg_right = arr["emg_right"]  # shape: (orig_N, 16)
        emg_left  = arr["emg_left"]   # shape: (orig_N, 16)
        time_data = arr["time"]       # shape: (orig_N,)

        ds_emg_right = decimate(emg_right, q=factor, axis=0, zero_phase=True)
        ds_emg_left  = decimate(emg_left,  q=factor, axis=0, zero_phase=True)

        ds_time = time_data[::factor]

        us_emg_right = resample(ds_emg_right, orig_N, axis=0)
        us_emg_left  = resample(ds_emg_left,  orig_N, axis=0)

        new_time = time_data

        new_arr = np.empty(orig_N, dtype=arr.dtype)
        new_arr["emg_right"] = us_emg_right
        new_arr["emg_left"]  = us_emg_left
        new_arr["time"]      = new_time

        with h5py.File(out_filepath, 'w') as f_out:
            new_grp = f_out.create_group("emg2qwerty")
            for attr in orig_grp.attrs:
                new_grp.attrs[attr] = orig_grp.attrs[attr]
            if "metadata" in orig_grp:
                orig_grp.copy("metadata", new_grp)
            new_grp.create_dataset("timeseries", data=new_arr)

            for key in f_in.keys():
                if key != "emg2qwerty":
                    f_in.copy(key, f_out)

    print(f"Downsampling then upsampling complete. File saved as: {out_filepath}")

In [None]:
def batch_downsample_then_upsample(input_folder, output_folder, factor=2):
    """
    Processes all HDF5 files in input_folder, applies downsampling then upsampling,
    and saves each processed file to output_folder with "_ds" appended to the filename.
    It skips processing for the two specified validation/testing files.

    Args:
        input_folder (str): Folder containing the original HDF5 files.
        output_folder (str): Folder to save the processed HDF5 files.
        factor (int): Downsampling factor.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Files to skip (validation/testing files)
    skip_files = {
        "2021-06-04-1622862148-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f.hdf5",
        "2021-06-02-1622682789-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f.hdf5"
    }

    hdf5_files = glob.glob(os.path.join(input_folder, "*.hdf5"))
    if not hdf5_files:
        print("No HDF5 files found in", input_folder)
        return

    for in_filepath in hdf5_files:
        filename = os.path.basename(in_filepath)
        if filename in skip_files:
            print(f"Skipping validation/testing file: {filename}")
            continue

        # Append '_ds' before the .hdf5 extension
        if filename.endswith(".hdf5"):
            out_filename = filename[:-5] + "_ds.hdf5"
        else:
            out_filename = filename + "_ds"
        out_filepath = os.path.join(output_folder, out_filename)

        downsample_then_upsample_hdf5(in_filepath, out_filepath, factor=factor)

In [None]:
# Example usage in a Jupyter Notebook cell:
input_folder = "/content/drive/MyDrive/C147_down_up/data"    # Update this with the path to your original data
output_folder = "/content/drive/MyDrive/C147_down_up/data"   # Update this with the path for saving processed files
batch_downsample_then_upsample(input_folder, output_folder, factor=2)

Downsampling then upsampling complete. File saved as: /content/drive/MyDrive/C147_down_up/data/2021-07-21-1626916256-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f_ds.hdf5
Downsampling then upsampling complete. File saved as: /content/drive/MyDrive/C147_down_up/data/2021-07-21-1626915176-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f_ds.hdf5
Downsampling then upsampling complete. File saved as: /content/drive/MyDrive/C147_down_up/data/2021-07-21-1626917264-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f_ds.hdf5
Downsampling then upsampling complete. File saved as: /content/drive/MyDrive/C147_down_up/data/2021-06-02-1622679967-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f_ds.hdf5
Downsampling then upsampling complete. File saved as: /content/drive/MyDrive/C147_down_up/data/2021-06-02-1622681518-keystrokes-dca-study@1-0efbe614-9ae6-4131-9192-4398359b4f5f_ds.hdf5
Skipping validation/testing file: 2021-06-02-1622682789-keystrokes-dca-stud