In [1]:
import numpy as np
from scipy.signal import resample
import matplotlib.pyplot as plt
import h5py

In [2]:
def save_h5(filepath, dataset_name, data):
        """
        Save data to an .h5 file.

        Parameters:
        - filepath (str): Path to the .h5 file.
        - dataset_name (str): Name of the dataset to save.
        - data (numpy.ndarray): Data to save.
        """
        with h5py.File(filepath, 'w') as h5file:
            h5file.create_dataset('data', data=data, compression='gzip', compression_opts=9)
        print(f"Data saved to {filepath} under dataset '{dataset_name}'")

In [3]:
# Read the datasets and combine them
def combine_channels(h5_filepath):
    with h5py.File(h5_filepath, 'r') as h5file:
        # List all channel datasets
        datasets = sorted(h5file.keys())  # Ensure channels are loaded in order
        num_channels = len(datasets)

        # Read the first channel to get the shape
        first_channel = h5file[datasets[0]]
        new_num_seq, new_samples = first_channel.shape

        # Initialize an empty array to hold the combined data
        combined_data = np.empty((new_num_seq, num_channels, new_samples), dtype=first_channel.dtype)

        # Load each channel and stack into the combined array
        for i, dataset_name in enumerate(datasets):
            print(f"Loading {dataset_name}...")
            combined_data[:, i, :] = h5file[dataset_name][:]
    
    return combined_data

In [3]:
# def read_h5(filepath, dataset_name):
#     with h5py.File(filepath, 'r') as h5file:
#         data = h5file[dataset_name][:]
#     return data

In [5]:
data_64hz_path = r'D:\Ali_Thesis\synthetic_data_generation\Data\Process_canada_data\P13_5_sec_sequences.h5'

# Combine channels
X64 = combine_channels(data_64hz_path)
print("Combined data shape:", X64.shape)

Loading channel_0...
Loading channel_1...
Loading channel_2...
Loading channel_3...
Loading channel_4...
Loading channel_5...
Loading channel_6...
Loading channel_7...
Combined data shape: (417840, 8, 320)


In [None]:
# # Open the .h5 file and inspect its contents
# with h5py.File(data_64hz_path, 'r') as h5file:
#     # List all datasets in the file
#     datasets = list(h5file.keys())
#     print("Datasets in the file:", datasets)

#     # Check the shape and dtype of each channel
#     for dataset_name in datasets:
#         data = h5file[dataset_name]
#         print(f"Dataset '{dataset_name}' shape: {data.shape}, dtype: {data.dtype}")
#         print(data[0])


Datasets in the file: ['channel_0', 'channel_1', 'channel_2', 'channel_3', 'channel_4', 'channel_5', 'channel_6', 'channel_7']
Dataset 'channel_0' shape: (417840, 320), dtype: float64
[3.5165000e+13 3.5165016e+13 3.5165031e+13 3.5165047e+13 3.5165063e+13
 3.5165078e+13 3.5165094e+13 3.5165109e+13 3.5165125e+13 3.5165141e+13
 3.5165156e+13 3.5165172e+13 3.5165187e+13 3.5165203e+13 3.5165219e+13
 3.5165234e+13 3.5165250e+13 3.5165266e+13 3.5165281e+13 3.5165297e+13
 3.5165313e+13 3.5165328e+13 3.5165344e+13 3.5165359e+13 3.5165375e+13
 3.5165391e+13 3.5165406e+13 3.5165422e+13 3.5165438e+13 3.5165453e+13
 3.5165469e+13 3.5165484e+13 3.5165500e+13 3.5165516e+13 3.5165531e+13
 3.5165547e+13 3.5165562e+13 3.5165578e+13 3.5165594e+13 3.5165609e+13
 3.5165625e+13 3.5165641e+13 3.5165656e+13 3.5165672e+13 3.5165688e+13
 3.5165703e+13 3.5165719e+13 3.5165734e+13 3.5165750e+13 3.5165766e+13
 3.5165781e+13 3.5165797e+13 3.5165812e+13 3.5165828e+13 3.5165844e+13
 3.5165859e+13 3.5165875e+13 3.5165

In [17]:
# with h5py.File(data_64hz_path, 'r') as h5file:
#     channel_0_data = h5file['channel_0'][:]
#     print("Channel 0 data shape:", channel_0_data.shape)
#     # print("First segment of Channel 0:", channel_0_data[0])
#     start_t = np.array(channel_0_data[0], 'datetime64[ns]')
#     end_t = np.array(channel_0_data[1], 'datetime64[ns]')
#     print(start_t[0])
#     print(start_t[319])
#     print(end_t[0])
#     print(end_t[319])


Channel 0 data shape: (417840, 320)
1970-01-01T09:46:05.000000000
1970-01-01T09:46:09.984000000
1970-01-01T09:46:10.000000000
1970-01-01T09:46:14.984000000


In [6]:
X64[0,:,47]

array([ 3.51657340e+13,  1.47366726e-01, -5.00429487e-01,  1.56169145e-01,
        8.27696470e-01,  1.43000000e+00,  3.30649179e+01,  0.00000000e+00])

In [7]:
X64_acc = X64[:, 2:5, :]
X64_acc.shape, X64_acc[0, :, 47]

((417840, 3, 320), array([-0.50042949,  0.15616915,  0.82769647]))

In [8]:
# Current sampling rate and target sampling rate
original_sample_rate = 64  # 64 Hz
target_sample_rate = 30  # 30 Hz

windows, channels, data_points = X64_acc.shape
print(windows, channels, data_points)

# Calculate the new number of data points for resampling
new_data_points = int(data_points * target_sample_rate / original_sample_rate)
print(new_data_points)

417840 3 320
150


In [9]:
# Resampled data has correct shape
resampled_data = np.zeros((windows, channels, new_data_points))
print(resampled_data.shape)

(417840, 3, 150)


In [10]:
# Resample each window and each channel
for w in range(windows):
    for c in range(channels):
        resampled_data[w, c, :] = resample(X64_acc[w, c, :], new_data_points)

# Resampled data now has shape (windows, channels, new_data_points)
print(resampled_data.shape)

(417840, 3, 150)


In [None]:
# # Define the original and target sample sizes
# original_length = 320
# target_length = 150

# # Compute fractional indices for the target length
# fractional_indices = np.linspace(0, original_length - 1, target_length)

# # Round the indices to the nearest integers
# indices_to_keep = np.round(fractional_indices).astype(int)
# indices_to_keep

array([  0,   2,   4,   6,   9,  11,  13,  15,  17,  19,  21,  24,  26,
        28,  30,  32,  34,  36,  39,  41,  43,  45,  47,  49,  51,  54,
        56,  58,  60,  62,  64,  66,  69,  71,  73,  75,  77,  79,  81,
        83,  86,  88,  90,  92,  94,  96,  98, 101, 103, 105, 107, 109,
       111, 113, 116, 118, 120, 122, 124, 126, 128, 131, 133, 135, 137,
       139, 141, 143, 146, 148, 150, 152, 154, 156, 158, 161, 163, 165,
       167, 169, 171, 173, 176, 178, 180, 182, 184, 186, 188, 191, 193,
       195, 197, 199, 201, 203, 206, 208, 210, 212, 214, 216, 218, 221,
       223, 225, 227, 229, 231, 233, 236, 238, 240, 242, 244, 246, 248,
       250, 253, 255, 257, 259, 261, 263, 265, 268, 270, 272, 274, 276,
       278, 280, 283, 285, 287, 289, 291, 293, 295, 298, 300, 302, 304,
       306, 308, 310, 313, 315, 317, 319])

In [None]:
# # Subsample the data using these indices
# X30 = X64[:, :, indices_to_keep]

# print("Original shape:", X64.shape)
# print("Downsampled shape:", X30.shape)

Original shape: (417840, 8, 320)
Downsampled shape: (417840, 8, 150)


In [None]:
# X30[0,5,:]

array([ 13.63,  27.98,  38.07,  44.92,  53.  ,  55.87,  55.76,  52.25,
        45.31,  37.56,  32.07,  27.34,  25.04,  23.09,  20.59,  18.1 ,
        17.52,  18.43,  16.82,  12.69,   7.66,   3.71,   1.43,  -2.03,
        -9.4 , -23.88, -32.36, -39.19, -43.12, -43.84, -43.34, -43.37,
       -44.38, -45.57, -46.57, -44.29, -35.4 , -21.47,  -7.86,   2.76,
        15.  ,  21.22,  25.83,  28.57,  28.46,  26.36,  25.33,  27.4 ,
        26.77,  21.61,  13.04,   3.36,  -5.95, -12.92, -14.64, -13.48,
       -14.94, -17.35, -17.16, -15.4 , -16.1 , -24.78, -34.63, -46.59,
       -57.78, -66.03, -70.41, -63.19, -15.65,  18.  ,  29.59,  24.8 ,
        17.46,  10.  ,   4.31,  12.39,  22.14,  24.94,  23.46,  24.26,
        27.99,  32.75,  39.34,  39.06,  33.3 ,  26.52,  23.19,  22.05,
        20.1 ,  16.44,  11.62,   0.99, -14.39, -28.62, -31.3 , -13.85,
        31.44,  48.76,  47.7 ,  30.42,   3.87, -16.36, -18.41,  -2.82,
         4.26,   6.25,   0.76, -15.03, -34.52, -44.94, -36.24, -24.51,
      

In [11]:
resampled_data[0, :, :]

array([[-0.5020571 , -0.49900004, -0.50070229, -0.49946417, -0.50041967,
        -0.49967205, -0.5002488 , -0.499825  , -0.50010009, -0.49996656,
        -0.49995924, -0.50012314, -0.49978011, -0.50034175, -0.49948966,
        -0.50077759, -0.49869015, -0.50309197, -0.51390049, -0.49775895,
        -0.50122576, -0.49916226, -0.50062818, -0.49950613, -0.50039753,
        -0.49967973, -0.50027097, -0.49978087, -0.50016701, -0.49987827,
        -0.50006614, -0.5000171 , -0.49982383, -0.50061343, -0.49703134,
        -0.48071347, -0.49139242, -0.50159061, -0.49913016, -0.50075367,
        -0.49907797, -0.5016764 , -0.48731916, -0.4833831 , -0.48482223,
        -0.49909511, -0.50045683, -0.49976811, -0.4999715 , -0.50061386,
        -0.49623385, -0.4802237 , -0.49277483, -0.50133907, -0.49936095,
        -0.5005003 , -0.4994037 , -0.50114909, -0.48659834, -0.4833576 ,
        -0.50003231, -0.4997993 , -0.50021169, -0.49980058, -0.50018553,
        -0.49982653, -0.50017627, -0.49983128, -0.5

In [12]:
data_30hz_path = r'D:\Ali_Thesis\synthetic_data_generation\Data\Process_canada_data\P13_5_sec_30hz_acc_sequences.h5'

save_h5(data_30hz_path, 'data', resampled_data)

Data saved to D:\Ali_Thesis\synthetic_data_generation\Data\Process_canada_data\P13_5_sec_30hz_acc_sequences.h5 under dataset 'data'
