In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os 

from scipy.signal import find_peaks

# PyTorch
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

In [2]:

data_directory = 'Data/hyperaktiv_with_controls/hyperaktiv_with_controls/'

In [6]:
demographic_data = pd.read_csv(f'{data_directory}patient_info.csv', sep=';')
patient_activity = pd.read_csv(f'{data_directory}activity_data/patient_activity_7.csv', sep=';')
patient_hr = pd.read_csv(f'{data_directory}hrv_data/patient_hr_7.csv')

In [7]:
#print shapes
print("demographic data overall shape:", demographic_data.shape)
print("patient 1 activity data shape:", patient_activity.shape)
print("patient 1 hrv data shape:", patient_hr.shape)


demographic data overall shape: (134, 33)
patient 1 activity data shape: (11475, 2)
patient 1 hrv data shape: (92721, 1)


In [None]:

# Feature extraction function
def extract_features(df, value_column, is_activity):
    features = {}
    features['mean_acc' if is_activity else 'mean_hrv'] = df[value_column].mean()
    features['std_acc' if is_activity else 'std_hrv'] = df[value_column].std()
    features['median_acc' if is_activity else 'median_hrv'] = df[value_column].median()
    features['min_acc' if is_activity else 'min_hrv'] = df[value_column].min()
    features['max_acc' if is_activity else 'max_hrv'] = df[value_column].max()
    features['iqr_acc' if is_activity else 'iqr_hrv'] = df[value_column].quantile(0.75) - df[value_column].quantile(0.25)
    features['autocorr_acc' if is_activity else 'autocorr_hrv'] = df[value_column].autocorr(lag=1)
    
    # Detect peaks and count them
    peaks, _ = find_peaks(df[value_column])
    features['num_peaks_acc' if is_activity else 'num_peaks_hrv'] = len(peaks)
    
    return features

In [None]:
# Initialize an empty list to store feature data for all patients
all_patient_features = []

# Iterate over each patient in the demographic data
for patient_id in demographic_data['ID']:
    try:
        # Load the datasets
        patient_activity = pd.read_csv(f'{data_directory}activity_data/patient_activity_{patient_id}.csv', sep=';')
        patient_hrv = pd.read_csv(f'{data_directory}hrv_data/patient_hr_{patient_id}.csv', sep=';')

        # Extract features
        activity_features = extract_features(patient_activity, 'ACTIVITY', is_activity=True)
        hrv_features = extract_features(patient_hrv, 'HRV', is_activity=False)

        # Combine features and add patient ID and ADHD diagnosis
        patient_features = {'ID': patient_id}
        patient_features.update(activity_features)
        patient_features.update(hrv_features)
        patient_features['ADHD'] = demographic_data[demographic_data['ID'] == patient_id]['ADHD'].values[0]

        # Append the combined features to the list
        all_patient_features.append(patient_features)
    except:
        print(f'Error processing patient {patient_id}')

# Convert the list of feature dictionaries to a DataFrame
features_df = pd.DataFrame(all_patient_features)

In [29]:
features_df

Unnamed: 0,ID,mean_acc,std_acc,median_acc,min_acc,max_acc,iqr_acc,autocorr_acc,num_peaks_acc,mean_hrv,std_hrv,median_hrv,min_hrv,max_hrv,iqr_hrv,autocorr_hrv,num_peaks_hrv,ADHD
0,1,50.200324,108.179926,0.0,0,1110,48.0,0.733732,1401,707.652854,139.830794,701.17,323.24,2697.27,118.17,0.854030,26228,1
1,3,501.205767,637.850867,267.0,0,6110,745.0,0.743513,2612,759.529730,158.316375,740.23,191.41,2469.73,213.87,0.668906,23651,1
2,5,60.107012,91.807369,17.0,0,1076,86.0,0.580125,1957,1003.211186,486.852776,825.20,188.48,3062.50,308.60,0.868125,17716,1
3,7,55.891678,101.724606,0.0,0,1300,79.0,0.726426,2037,828.531332,188.033565,833.98,192.38,3033.20,196.29,0.757883,23479,0
4,9,68.533853,110.460080,9.0,0,1042,102.0,0.741379,2719,1002.283732,584.068574,813.48,166.02,3078.13,825.19,0.836427,15804,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,97,177.378974,300.160375,18.0,0,3022,242.0,0.727082,2277,976.823104,269.875858,933.59,197.27,3115.23,250.00,0.917586,23397,1
65,98,156.031199,293.309376,9.0,0,4129,202.0,0.727919,1961,673.331962,193.650122,656.25,177.73,5000.00,96.68,0.722989,24721,1
66,101,244.928216,387.405119,59.0,0,5249,360.0,0.706902,2081,832.504639,228.985068,804.69,223.63,2687.50,277.34,0.823121,24144,0
67,104,355.351822,438.648252,168.0,0,2845,586.0,0.752820,2458,722.369675,151.624460,679.69,246.09,2528.32,194.34,0.959325,24072,0


In [30]:
features_df.to_csv('temporal_features.csv', index=False)