## Getting Started with Multi-modal Dyskinesia data

Loading DataFrames containing pre-processed ephys-data alligned with pre-processed accelerometer-data

In [1]:
# Import public packages and functions
import os
import pandas as pd
import numpy as np
import sys
import json
from pathlib import Path

### 1A) Define file location and data details

Keep files in the given naming structure and subject folders

FOLDER
- sub-XXX
    - XXX_mergedData_v4.0_lfp_left.P
    - XXX_mergedData_v4.0_lfp_right.P
    - XXX_mergedData_v4.0_acc_left.P
    - etc

In [2]:
PATH_CURR  = os.path.abspath(os.curdir)    # current code
PATH       = (str(Path(PATH_CURR).parent)) # data repository: upper directory where datasets situated
SUB = '016'
DAT_SOURCE_1 = 'acc_right'
DAT_SOURCE_2 = 'ecog_right'



### 1B) Load required scripts

- `utils_fileManagement.py` needs to be unchanged present in PATH/code/
- `__init__.py` needs to be present

In [3]:
# set working directory to folder with code
os.chdir(os.path.join(PATH, 'code'))

from utils.utils_fileManagement import load_class_pickle, mergedData

In [4]:
def check__and_load_file(PATH, SUB, DAT_SOURCE):

    assert os.path.exists(PATH), f'PATH doesnot exist ({PATH})'
    
    assert DAT_SOURCE in [
        'lfp_left', 'lfp_right', 'ecog_left',
        'ecog_right', 'acc_left', 'acc_right'
    ], f'incorrect DAT_SOURCE ({DAT_SOURCE})'

    folder = os.path.join(PATH, 'data', f'sub-{SUB}')
    fname = f'{SUB}_mergedData_v4.0_{DAT_SOURCE}.P'

    assert fname in os.listdir(folder), (
        f'FILE {fname} not in {folder}'
    )
    
    dat = load_class_pickle(os.path.join(folder, fname))

    return dat

### 1C) Load Data

Pickle structure

dat
- 'sub': str  # subject code (all starting with 0 incl ECoG, starting with 1 excl ECoG)
- 'data_version': str  # always v4.0, final version containing bandpass 2 - 98 Hz and notch filter 50 Hz
- 'data': np.ndarray  # array in long format, sampled in freq dat.fs
- 'colnames': list  # colnames correspond with the columns in the array
- 'times': np.ndarray  # times/dopa_times are alligned between subjects across data sources
- 'fs': int  # downsampled sampling freq (differs between ephys and acc)

In [5]:
dat_acc = check__and_load_file(PATH, SUB, DAT_SOURCE_1)

... pickle loading: C:\Users\a.kaymak\Desktop\Papers\2025 Parkinson Dyskinesia LFP-ECG\files\data\sub-016\016_mergedData_v4.0_acc_right.P


In [6]:
dat_ecog = check__and_load_file(PATH, SUB, DAT_SOURCE_2)

... pickle loading: C:\Users\a.kaymak\Desktop\Papers\2025 Parkinson Dyskinesia LFP-ECG\files\data\sub-016\016_mergedData_v4.0_ecog_right.P


In [7]:
dat_acc.data.shape

(1875040, 10)

In [8]:
dat_ecog.data.shape

(7294976, 11)

In [7]:
dat.colnames

['dopa_time',
 'ACC_L_X',
 'ACC_L_Y',
 'ACC_L_Z',
 'task',
 'left_tap',
 'right_tap',
 'left_move',
 'right_move',
 'no_move']

In [10]:
dat

mergedData(sub='016', data_version='v4.0', data=array([[-1300.0, -2.6937041650979007e-09, -3.4340199412578524e-09, ...,
        0.0, 0.0, True],
       [-1299.998046875, 2.0728217908944372e-09, 8.840608877314612e-10,
        ..., 0.0, 0.0, True],
       [-1299.99609375, -2.9149476979894637e-09, -6.0424217066724235e-09,
        ..., 0.0, 0.0, True],
       ...,
       [3735.421875, -7.1174364689064416e-09, -8.278413974739822e-08,
        ..., 0.0, 0.0, True],
       [3735.423828125, -2.5287205372818662e-08, -1.806309345184209e-08,
        ..., 0.0, 0.0, True],
       [3735.42578125, -1.9514632709694573e-08, -1.100571890517367e-08,
        ..., 0.0, 0.0, True]], dtype=object), colnames=['dopa_time', 'ACC_L_X', 'ACC_L_Y', 'ACC_L_Z', 'task', 'left_tap', 'right_tap', 'left_move', 'right_move', 'no_move'], times=array([-1300.0, -1299.998046875, -1299.99609375, ..., 3735.421875,
       3735.423828125, 3735.42578125], dtype=object), fs=512)

In [11]:
dat.times == dat.data[:,dat.colnames.index('dopa_time')]

array([ True,  True,  True, ...,  True,  True,  True])

In [None]:
dat.data[:,dat.colnames.index('dopa_time')]