In [38]:
import h5py
import matplotlib.pyplot as plt
from pprint import pprint
import pandas as pd

In [39]:
# define constants
SAMPLE_RATE = 25000
DT = 1 / SAMPLE_RATE
FILE_PATH = "/Volumes/WD Passport/DataScienceProject/220113_S1_m.h5"
PRIMARY_COLOUR = "black"
T = 100000

Splitting the data so it is easier to store (for testing etc.).

Each contains 100000 datapoints at a sample rate of 25000Hz resulting in 4 second long clips

In [40]:
# the times which we want to look at (seconds)
slicing_start_timestamps = {
    "spontaneous": 500,
    "grp": 2000,
    "ttx": 4500
}

# the data is stored in hdf5 format
def print_structure(name, obj):
    print(name)

with h5py.File(FILE_PATH, "r") as f:

    # view the hdf5 file structure
    f.visititems(print_structure)

    # get the channel data
    channel_data = f['Data/Recording_0/AnalogStream/Stream_0/ChannelData']

    # split the channel data into 3 small slices of different areas of interest
    spontaneous_data = channel_data[:, slicing_start_timestamps["spontaneous"] * SAMPLE_RATE : (slicing_start_timestamps["spontaneous"] * SAMPLE_RATE) + T ]
    grp_data = channel_data[:, slicing_start_timestamps["grp"] * SAMPLE_RATE : (slicing_start_timestamps["grp"] * SAMPLE_RATE) + T ]
    ttx_data = channel_data[:, slicing_start_timestamps["ttx"] * SAMPLE_RATE : (slicing_start_timestamps["ttx"] * SAMPLE_RATE) + T ]

Data
Data/Recording_0
Data/Recording_0/AnalogStream
Data/Recording_0/AnalogStream/Stream_0
Data/Recording_0/AnalogStream/Stream_0/ChannelData
Data/Recording_0/AnalogStream/Stream_0/ChannelDataTimeStamps
Data/Recording_0/AnalogStream/Stream_0/InfoChannel


In [42]:
# Convert to a pandas DataFrame
spontaneous_df = pd.DataFrame(spontaneous_data.T)
grp_df = pd.DataFrame(grp_data.T)
ttx_df = pd.DataFrame(ttx_data.T)

In [44]:
spontaneous_df.to_csv('../data/spontaneous_data.csv', index=False)
grp_df.to_csv('../data/grp_data.csv', index=False)
ttx_df.to_csv('../data/ttx_data.csv', index=False)