# BEAT-PD Challenge

## Team JHU-CLSP 

- Marie-Philippe Gill
- Nanxin Chen
- Saurabhchand Bhati
- Sonal Joshi
- Laureano Moro-Velazquez

Team page : https://www.synapse.org/#!Team:3404266

## Useful Links

Challenge website : https://www.synapse.org/#!Synapse:syn20825169/wiki/596118

Data information : https://www.synapse.org/#!Synapse:syn20825169/wiki/600405


In [15]:
# Import required libraries

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.display import HTML, display

# Imports for the high pass signal
from scipy.signal import butter, freqz, lfilter

# KFold
from sklearn.model_selection import KFold

# Import required modules
from sklearn.preprocessing import StandardScaler

import os.path

# To write WAV File
from scipy.io.wavfile import write

# To make derivative work on multiple CPUs
from concurrent.futures import ProcessPoolExecutor
from functools import partial

import sys



%load_ext autoreload
%autoreload 2

# import transform_data
from transform_data import *
from create_graphs import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Extract initial data 

In [16]:
# Data paths
data_dir = "<your-path-to-data>" 

In [None]:
!cd {data_dir}
!pwd
!tar -xvf {data_dir}cis-pd.data_labels.tar.bz2 --directory {data_dir}; mv {data_dir}data_labels {data_dir}cis-pd.data_labels
!tar -xvf {data_dir}real-pd.data_labels.tar.bz2 --directory {data_dir}; mv {data_dir}data_labels {data_dir}real-pd.data_labels
!tar -xvf {data_dir}real-pd.training_data_updated.tar.bz2 --directory {data_dir}; mv {data_dir}training_data/ {data_dir}real-pd.training_data; 
!tar -xvf {data_dir}cis-pd.training_data.tar.bz2 --directory {data_dir}; mv {data_dir}training_data/ {data_dir}cis-pd.training_data; 
!tar -xvf {data_dir}cis-pd.ancillary_data.tar.bz2 --directory {data_dir}; mv {data_dir}ancillary_data/ {data_dir}cis-pd.ancillary_data;
!tar -xvf {data_dir}real-pd.ancillary_data_updated.tar.bz2 --directory {data_dir}; mv {data_dir}ancillary_data {data_dir}real-pd.ancillary_data;
!tar -xvf {data_dir}cis-pd.testing_data.tar.bz2 --directory {data_dir}; mv {data_dir}testing_data/ {data_dir}cis-pd.testing_data/;
!tar -xvf {data_dir}real-pd.testing_data_updated.tar.bz2 --directory {data_dir}; mv {data_dir}testing_data/ {data_dir}real-pd.testing_data/;
!mv {data_dir}cis-pd.CIS-PD_Test_Data_IDs.csv {data_dir}CIS-PD_Test_Data_IDs_Labels.csv; mv {data_dir}CIS-PD_Test_Data_IDs_Labels.csv {data_dir}cis-pd.data_labels/;
!mv {data_dir}real-pd.REAL-PD_Test_Data_IDs.csv {data_dir}REAL-PD_Test_Data_IDs_Labels.csv; mv {data_dir}REAL-PD_Test_Data_IDs_Labels.csv {data_dir}real-pd.data_labels/


In [None]:
# !rm {data_dir}cis-pd.data_labels.tar.bz2
# !rm {data_dir}real-pd.data_labels.tar.bz2
# !rm {data_dir}real-pd.training_data_updated.tar.bz2;
# !rm {data_dir}cis-pd.training_data.tar.bz2;
# !rm {data_dir}cis-pd.ancillary_data.tar.bz2;
# !rm {data_dir}real-pd.ancillary_data_updated.tar.bz2;
# !rm {data_dir}cis-pd.testing_data.tar.bz2;
# !rm {data_dir}real-pd.testing_data_updated.tar.bz2;

# CIS-PD Database

### CIS-PD: Create High Pass Data

For the high pass filter, three parameters can be tuned, but we used these:

- `order` = 10
- `fs` = 50.0  # sample rate, Hz
- `cutoff` = 0.5  # desired cutoff frequency of the filter, Hz

In [11]:
# This will create the following folders: 
# cis-pd.training_data.high_pass/ 
# cis-pd.ancillary_data.high_pass/ 
# cis-pd.testing_data.high_pass/ 

data_type = "cis"

# data_subset is to switch between training_data, ancillary_data or testing_data
for data_subset in ['training_data', 'ancillary_data', 'testing_data']:
    path_train_data, df_train_label = define_data_type(data_type, data_dir, data_subset)

    # Path where to save the data with high pass filter applied 
    high_pass_path=data_dir+'/cis-pd.'+data_subset+'.high_pass/'

    high_pass_filter(df_train_label, high_pass_path, path_train_data, data_type)

path_train_data :  /export/b19/mpgill/BeatPD_data/cis-pd.training_data/
Working on  cc7b822c-e310-46f0-a8ea-98c95fdb67a1
The high pass folder was created :  /export/b19/mpgill/BeatPD_data//cis-pd.training_data.high_pass/
Working on  5163afe8-a6b0-4ea4-b2ba-9b4501dd5912
Working on  5cf68c8e-0b7a-4b73-ad4f-015c7a20fb5a
Working on  fb188ae2-2173-4137-9236-19a137a402c2
Working on  19a3e9ea-fce1-40b7-9457-2618970beb7b
Working on  e2973da8-1250-4a7c-98d5-b165570a8aeb
Working on  8548d34c-4771-4ca4-bee4-d47bde435bdc
Working on  0c579a72-bac5-46a2-8671-1a50620723bf
Working on  bb59d008-25fe-43cc-bf05-6bd6b874eea3
Working on  4a1ca52c-2895-4094-bade-246fd474762f
Working on  f53cfd9b-8c52-4d22-a35c-504542170ed3
Working on  dc90dc36-b4e5-43ec-b3e8-47c39c763c71
Working on  e31db4f8-f9a5-4273-a874-4bdbc6fcae2c
Working on  d1a9294c-05ad-4eac-9915-7052c2ad98a3
Working on  cc0d147f-94ea-4637-91d7-d4ceceaf1728
Working on  20f1dbcd-0954-4bfd-ad92-9bac1b15beb0
Working on  c05991ea-ed30-45ee-96a2-8a44d6ac

Working on  2ece7820-359e-4e81-a624-9e8dace9a584
Working on  f7b180d9-77ff-40c8-b718-28b12670c247
Working on  f946ec6a-74d3-4297-a3a4-b165f2d72f13
Working on  5f2d1332-846c-48f6-a6ab-188d3737dcef
Working on  622a3d54-f542-4f3f-8a31-df0bbfc929ad
Working on  3d0f965c-9d72-43d1-9369-1ea3acf963cc
Working on  4553b646-092b-4d8b-aee4-c4a074059541
Working on  a235ba27-a9a5-4d9a-931c-554950de9105
Working on  b0c7435a-5963-4ad4-a5e4-b0e17d5223cf
Working on  fb394080-2cba-454c-ad45-120fed0086e6
Working on  13239e7a-1c09-4bfe-a041-d739da36dff1
Working on  1e5738bf-e918-4025-9d84-8df71e8d4810
Working on  aee216fc-15d6-42ee-8e91-c8db3b16f0f6
Working on  b467614e-a297-4780-aec5-003fa5e8416f
Working on  b6f6ea11-7815-4c26-b8b0-1ce444327b5a
Working on  c722ebb1-d798-41ae-adeb-b4a57bd41172
Working on  0159954c-1bd2-47a6-9e6f-94f8d179b712
Working on  6867bf10-f489-4c7f-abd1-06a47dd2cdd9
Working on  3b0ee95f-d87b-4699-b34f-74e7dc20ab01
Working on  02389c9e-8821-4c54-a1a7-912f0b66dd95
Working on  6fc11b99

Working on  05a3b9dd-bbe4-4c4d-8571-1c692640aead
Working on  7ec2bf09-a4a9-45e6-b2e3-296327177a7b
Working on  b91a582c-d54d-4975-b2bd-15f2c194a4b8
Working on  ee9656df-4384-48de-8832-9dec7a255f8d
Working on  38392387-d09a-40a4-afa7-9f3654066f28
Working on  2a96609a-48dc-4d3d-918b-da84d3d166ca
Working on  eb63512f-b1df-4c35-bfad-417f7c230f86
Working on  4679db13-6ca0-4a40-971b-06d537cb65cf
Working on  6f48272b-3843-49bc-a41e-13fc6a4b2aed
Working on  a62d64ec-d1da-40bf-a86f-d7cd15f1a7dd
Working on  a2b0d14d-2f55-4e66-a187-37e2ef15f116
Working on  b5c4f79b-409a-455d-b9ad-0999274ca110
Working on  9873444a-c5be-4975-9ba6-4f0e209b82bc
Working on  300ca6f5-70d1-4928-8330-a9a86c3681f5
Working on  2da6f0c2-274a-4bfe-b386-5ca925000351
Working on  ced67e0d-1743-406f-9f5d-10aeb9c1f1fc
Working on  45419562-4d6f-44fe-ab6c-be92d90b1d4a
Working on  95051425-36e8-46c6-ad8a-2e91a6c71ffd
Working on  7d00f2f8-d21e-4985-b992-b3400f71b294
Working on  6b2d9f57-dd46-42ba-8365-f50d5cc9db62
Working on  b2ebe3ce

Working on  b8a95161-27bd-45b4-be65-b8158eff0062
Working on  c1125327-7855-4338-83dd-5c8d7ae0acde
Working on  a47a5857-8bac-4511-8557-5e1d72299cef
Working on  60fb3090-06d2-4a1d-a5a7-8c099a55d8f2
Working on  93fe6ada-fffa-4725-a2a1-f30041e9e99d
Working on  c9538c25-9630-44c9-ad62-2a39c63ef6fd
Working on  f2cb291d-8c3e-4b76-9adc-6b68252b425d
Working on  c305fe91-7bd7-418e-90b9-c58f67519d8b
Working on  60dc2ed3-5a45-4377-9bda-d2642e7acc6f
Working on  b30d2a16-b07e-4a0c-bddc-44456f2f040e
Working on  f14af6fa-d803-4b95-9ef3-17812b4cd730
Working on  57872a43-a942-4dba-b422-945343dd61bb
Working on  aaa9c1fa-536c-4829-84da-98da71efa0e6
Working on  9c25b03f-24b4-47bc-804d-126e148db07d
Working on  35a4e02b-05a4-4537-8375-d5ce70f93329
Working on  d16bace0-da6f-4593-9f16-0bc40e7c286d
Working on  b9236b01-f4f7-40a3-b54b-884a4b58c7cc
Working on  7cfd4140-fd42-49cc-a5d6-5c8e8fc8ba0a
Working on  93ec3e66-b237-480b-a698-c85156d38724
Working on  ecca1868-87a1-4ee0-ba0a-acb919d47efe
Working on  d483d93d

Working on  db2e053a-0fb8-4206-891a-6f079fb14e3a
Working on  f5871446-90f6-416d-ab41-3fe178b200b9
Working on  252e7ed9-59c8-4e53-bd42-dbe33dce7513
Working on  9d09e15a-315a-47b1-a853-43807a2dc63c
Working on  4b5a9a01-b30a-4d42-8af2-48a75ffb1c96
Working on  44486b20-fb08-4987-b0a4-d8d18121eade
Working on  fdedc1d7-cbdb-4cf4-aa5f-c224b1495236
Working on  922b3bf8-47a3-4fe9-b4f8-b18827993d82
Working on  5fbc4925-9d81-4873-be04-8bae8f631555
Working on  5a222ab4-a001-44c3-8ce3-7a97da8c9989
Working on  c9ef5b41-6659-4015-8a9b-20b5ef5ccf7a
Working on  fa1419ce-c48d-432e-90ad-913944f2bfb5
Working on  3766b3c0-1091-40f4-af91-bb5d3f64fbe6
Working on  9f6b5745-ab24-4652-9bed-f802a4b13df2
Working on  ff1fa00c-8af0-49be-9195-e86da7f26d86
Working on  1ab44d60-4c2d-487e-a76e-fdca168133ac
Working on  18061b76-7ad8-43cc-ad72-6fd94bb423c2
Working on  ff4bb4df-b798-4ff1-947c-0131b5c925b6
Working on  cf81a880-b965-4797-8996-f32554166ee2
Working on  d3d14fda-81b2-49a8-a3f5-72966be2065a
Working on  3b529cab

Working on  428f179f-6a1a-4d78-9e78-8b04fc14515f
Working on  800726b8-4db1-4fa5-b787-cf94a5c077d7
Working on  583606b3-7d94-467a-a806-258134f8743e
Working on  0972be68-49d5-4154-a294-56abfa0ec447
Working on  43c49b7e-fef5-4896-aca8-cdb7460ec7cf
Working on  96f5866f-a5ff-477a-814c-f688f410b036
Working on  cf58f3cc-ae3c-4f3a-b21c-f5cc3dba300f
Working on  23bfab44-78ec-4ca3-a879-4355d5ad8f86
Working on  a83c3212-2481-4fcb-8cc4-df1b7700b6a1
Working on  9f73ebfa-c8bb-4b9f-a5a5-f19c4cfe8fc3
Working on  ac859251-7091-4176-955e-78a032c937e4
Working on  49b642a0-5c9d-4225-aef8-ebfc3fa871c2
Working on  4e5a09f8-9f27-4e96-81fe-d6a615d4f3b2
Working on  65d97101-73af-44ed-b130-2727722e58c6
Working on  696d57a6-167f-4933-afae-1670238b4065
Working on  9ead992d-a438-4833-a743-a489b97d2da9
Working on  3b56e771-a528-497f-b544-f84306872e68
Working on  bb7abe3c-93e6-45a4-a808-63162bfadd0e
Working on  0f0afa74-8241-4cf0-bf24-b5027fc1df67
Working on  2782debe-e64e-4392-9842-ecfe2d08ffe1
Working on  132b73a8

Working on  4576dcf8-b444-4bbd-9157-8b23db3baa5b
Working on  a2d2a7f9-0955-4575-8696-0de072057906
Working on  3b243715-d79f-4461-90cc-481218b443c8
Working on  757536d4-5ec6-4e09-ba4e-b319169ea923
Working on  6d109c96-6f92-4521-806c-7f2dd88364e6
Working on  17aaea59-7172-49e9-bff3-e07d2108baf4
Working on  44b1cb54-f555-4480-ab41-3e7662a5f223
Working on  6c8e1d93-24e3-453d-8bb9-449925c5f5bf
Working on  f4409271-bf88-4cfb-a37a-00a2e3587822
Working on  a969532e-714c-41f2-83e8-41583e41e491
Working on  34dacc92-8fc9-45a1-bc99-117de653292d
Working on  7558283d-b11c-48ca-9b83-90ec3d536ed9
Working on  9f61501d-d94b-4526-8352-188d09665e4f
Working on  144d2c4e-b894-4511-86f4-34c373780574
Working on  ee60b3d9-545d-4f65-8811-345824a361c7
Working on  5667bc64-5d56-4b74-a20a-c8e5c6aaf2e6
Working on  9025187d-1cb4-449c-be06-0e2726e17d07
Working on  689d21a2-176f-4a28-b51e-c8bbcb3b98f3
Working on  8e09d113-3149-47a3-b0ef-b8a785168e1d
Working on  af65e35a-24b2-4f0f-a902-e824b5f0422f
Working on  1fa86b72

Working on  b8ed54ce-9fca-4d3c-8ca6-5fb666d824d4
Working on  9e841c49-141f-40bf-b14f-6a3379565f24
Working on  67d01d60-e2e9-460f-9e4c-d6877c9ee7c0
Working on  63505e9e-c015-4638-af05-75af3af4105d
Working on  3271a740-d67e-4709-94ad-ad8d14fd8547
Working on  1b1c49a9-fba2-4dcb-9f20-5c3711a4c182
Working on  417f2e72-2364-4066-994f-914f7478edbd
Working on  35f92561-618f-471b-ab6b-f5b5b3994f6f
Working on  5cae48e7-7a55-4028-a10d-98ed71fb29b3
Working on  3d146ca0-50d0-471d-824d-f292d10bf678
Working on  6afab4b2-3080-4301-891d-8ae5afbab21c
Working on  9f0d3904-5399-4af6-8d59-f9371a5c9b23
Working on  18184ef0-3c71-4aff-9663-d5076c8192d7
Working on  c5eaf3f3-eb1a-4b5d-ab66-79dbd67eb2db
Working on  d9960aac-7f77-488b-a27c-74fa86dac220
Working on  788aa60a-f33f-46fc-bafb-c27b78205f2e
Working on  bc339c17-0f1a-4e79-b68e-1b6884a32814
Working on  0e9689d7-4344-4f17-8b84-42f4b53d7ea1
Working on  df8f969b-5f9b-49e3-9ef8-883776e3bbf1
Working on  e32bbd29-6933-4bdd-9d01-d84ced190a64
Working on  e6434482

Working on  f59d2b7c-c6d2-4726-a244-f33a15c8f2d5
Working on  0f5bc0ae-118b-4338-afd1-653fc5bbd3a3
Working on  ae5c4a71-dd03-4c8e-b905-357b48d14f1e
Working on  222fec69-7615-4d08-bad0-d47751014f14
Working on  edbbd001-ae29-4208-a7f0-cf088ac7db94
Working on  08381404-b1b3-44f7-8e26-40a2183cfd28
Working on  ceb52190-d81b-44d4-94cc-5823a918edfa
Working on  858b51e0-1b31-4a93-8b8d-79d02b3606b7
Working on  738e62aa-6552-4f8c-bee0-72fb96b7581e
Working on  bbadb13c-ea0e-44e8-8908-cb1a18eebf40
Working on  f28c38d1-cedf-4bd3-b488-bca6a64bb512
Working on  50cd6ad2-9998-4dd7-83e7-ddb7da528256
Working on  7027ed4d-9c95-4bf5-9aeb-d8f69a9821cf
Working on  88662a33-82ec-40a4-a2fc-332cff45c5af
Working on  83fd5aa0-e585-4200-b90c-60bb443cb2c4
Working on  585f9628-039a-47af-adbc-d722b717a60a
Working on  c1dda61b-9287-4e41-9742-fea19250bf2e
Working on  005685b2-3de4-469b-b673-8f937ee8196e
Working on  3e06f01c-a386-40c0-b452-50921b8f4e8a
Working on  1c3b72c7-24d5-427c-9400-be1554688a4a
Working on  f6fde688

Working on  40f713fe-0ac9-4cd0-809c-0a8024955a2a
Working on  4913062f-b696-40c6-a0f2-87efe1dd4810
Working on  3ed5ad09-579c-48bb-b9ec-b14f44fa47f0
Working on  3c79c86e-c792-46c0-98d0-55f3753017fc
Working on  b9ada852-4ff5-4ee1-8529-96a79ced0610
Working on  5f8103c5-120d-4736-8959-f368d74acb65
Working on  abd3be20-bd13-49be-bd47-87821955bfb6
Working on  88185aa3-3a84-40e4-a780-40a4cef198b4
Working on  a38bf8c5-617d-4512-8c9e-70bc9195cc06
Working on  b1b084cd-88ab-485d-af7e-e5a6e2c73c03
Working on  1f57b4bc-bcd5-4d65-8a2c-7acfd94afaa2
Working on  3cd93936-b55b-4b4c-9ea6-083db0729b20
Working on  31956275-5c34-47ec-ba83-d5d107211af6
Working on  7b2698a6-f3a5-4345-9630-25d377d58992
Working on  22ba7c99-9274-41fd-9170-4364e00973a0
Working on  8390b805-6091-4f12-8d03-a853c3405ddb
Working on  7b9f5ce7-a152-4809-8930-c6bfaff0da8a
Working on  dd261c90-aab6-40c3-9da7-1b8b320c7f7a
Working on  fb86493b-1893-4be4-8707-1e97d56bb049
Working on  dd140d80-bcab-458b-bd0e-82cd584d0f28
Working on  e0441156

Working on  231d76f1-ced5-4e44-9dd7-5e50c110dabe
Working on  cc169337-66c7-46d3-baf3-309be443d6a9
Working on  bffbc14f-f554-4c4b-9c6d-2634da5ed12f
Working on  4fce6c8b-e4a7-4dbc-8e26-75b759f0d51d
Working on  678a7bb1-1fd5-4d13-8139-dd60b19bcbfb
Working on  250b8a7f-b1a4-4c9a-be79-a8c8beab749b
Working on  471297c5-054e-4ff2-ac05-19753428a609
Working on  c068ccf1-e513-4277-897c-4b1835245963
Working on  e0ab2320-5a20-4e04-b57c-f9d1c6cf1e2b
Working on  5674a8be-ebc0-4759-85d9-f7fcd603538b
Working on  62cd05cc-35d3-459b-b7ed-df89ee789e67
Working on  270e838a-3df7-46b0-9fe8-e2a46c1a2a37
Working on  5783b4ea-308a-4340-bd3d-2276e4916805
Working on  a4228918-a3a8-4ecd-92a5-3a4916dc3556
Working on  fb0f0e1b-d3d8-44e3-9fc4-3da3e78cbc81
Working on  3f0adcee-5e29-4eb1-8fc5-b721bce160c8
Working on  fe7d95fb-fca9-4944-b64f-a5cd171401f7
Working on  345810ba-7b25-4783-8633-521bac4da97e
Working on  8338ec01-467c-48fc-85bf-c1f912b51076
Working on  bbecc570-e0ef-45ae-b0ee-c0664c3cced0
Working on  8990585f

Working on  92263d96-7feb-4487-aada-901642773e7d
Working on  4bb4ed8a-c0f9-45c4-bc78-9760930caa46
Working on  690459b2-b589-4a6e-8b43-2448ba34dff7
Working on  c7f714db-859a-4c48-ad02-cc80acf55eb1
Working on  33100219-8c5d-4995-bdd1-fcd5182862a2
Working on  640ce323-6459-42d4-86a2-cc252416b9f5
Working on  28d9106f-f0ae-4a5f-9737-f20c2c97e02a
Working on  70c8fe97-b9d4-4fd8-8d90-54a86def4bb5
Working on  b84d7888-3e95-40ff-8546-b18e131f854c
Working on  ab618d1f-eb27-442b-a3b7-e438fde70db1
Working on  25e8bca2-051f-4216-826b-810bbddfdb2e
Working on  7f7bb7b9-8656-40dd-94f9-9d546ac75722
Working on  c29c2d91-c294-4655-a2a7-d4c1f456c3a2
Working on  a97693fd-7024-4e68-b00a-3700306c7bc8
path_train_data :  /export/b19/mpgill/BeatPD_data/cis-pd.ancillary_data/
Working on  10005cf2-85cf-47ad-8441-f15fb1509f69
The high pass folder was created :  /export/b19/mpgill/BeatPD_data//cis-pd.ancillary_data.high_pass/
Working on  f25cfad6-1835-4f78-9192-d28581ffa4d8
Working on  5cf1a9ff-b13c-457b-a661-ccd617

Working on  dfd75e7a-0be7-431d-beea-392833c806dc
Working on  79b1badc-2d5d-4fc5-98cb-8295febf9b22
Working on  94a52eff-6d96-4d57-926b-622afbebcdda
Working on  b701fa5b-ecba-4ec1-a912-341ce985b200
Working on  79607fe9-4bd6-4479-bbfd-2d6447caf3a2
Working on  416e9ece-0966-4ba4-9ce8-996101f7621d
Working on  ee22a53f-f84b-43e4-a0c6-92dde0e5c81f
Working on  dbe48222-3701-4217-9f73-c5cac11cedbe
Working on  6147ff24-e3ce-42f2-b1be-54a878f18caa
Working on  579adf7a-8366-4891-bbf1-7d6c4d4d0c5e
Working on  bb2c1a12-4135-48e6-be82-8b220b766ada
Working on  122b70a3-7449-46a1-9d42-7cd82551d11a
Working on  aa612858-4e9b-4f5f-98c4-289914efd3a3
Working on  9b9ad9d1-4286-4349-bffe-fbcd2f9651fd
Working on  5956ed5a-b310-47a3-9799-3a5f475bb1c0
Working on  5a67b031-e3a7-4d0b-92e5-748bd0c89330
Working on  c4fff7ba-27fa-47a9-81e8-15393197eb61
Working on  1ac99d8a-b77a-4ac1-9292-cade8f2f13d3
Working on  13aeee8f-d413-4f13-87aa-e69c55b86e3f
Working on  cd5b4e50-f2ab-42b2-b713-32550ef5a921
Working on  fbfac8ca

Working on  9d509a8b-1c7f-4db6-b379-f4126ae229ec
Working on  a7fb8876-faa5-42bd-b1ec-210a710a664a
Working on  94c4086e-0757-4f20-9c53-f0ecaf9844f8
Working on  3ce594c1-d224-4eb7-935d-4fa407ba5127
Working on  0a8684b3-ae58-4a85-b064-6be9eeb410f9
Working on  d5160a7a-1121-4615-a4da-c3a44f952807
Working on  132b839a-5383-446f-8120-a439e18cae47
Working on  310a15c9-1863-4524-904e-2dc730d58329
Working on  db80e345-ad48-43d6-b7a9-4d2518e9831d
Working on  a1612a58-a5e1-43bb-8f31-0cbd97a5d806
Working on  5d0ea615-e03f-4d1c-9772-e7e83e5f40ef
Working on  63c0aeb7-0d28-4dd4-b80b-58e158b99016
Working on  edcb47d7-33d9-4fe3-be2d-9afd08b90fc5
Working on  4c698281-9b7e-4e6d-85ce-3f2d4f534f13
Working on  a3e66b20-f79e-44e3-bef8-c0e6b0f57960
Working on  ce025971-1458-4b94-83c3-45f050b21448
Working on  69fdc5d8-1e8f-4a58-8408-ffd2e71bbf95
Working on  67223861-117f-4c7f-9288-5222d57d0d07
Working on  b0fd1f8e-368c-42ab-a6e1-1580257679da
Working on  f03b9f53-9132-480c-8c36-29b2db6d29fa
Working on  3a8fc6b2

Working on  38516741-f356-4912-8bfc-c8af2ff4889c
Working on  b2f697c8-7c9e-48a2-a6ac-4a4e4cf21286
Working on  79245cb6-80f1-4503-a9f3-4a6d8046c498
Working on  b455e3ab-cbd9-4282-b464-10b5664636a6
Working on  e14037e2-aabb-4e06-a065-ed5648decaae
Working on  bd97529c-932c-43e3-bead-874907102176
Working on  5dd7f0e8-e06a-4aaa-b18d-44e01390731e
Working on  a9108b24-3601-4cc6-8b31-cb7e644e5fbf
Working on  b42aface-e443-486a-8fbd-99308083d9ed
Working on  d7df7471-2cbd-462e-b642-c00e0505a604
Working on  39f230e3-1afc-44c3-82eb-0f14f4ace351
Working on  d2c15ffc-499b-457d-8689-d7e4eaa93d59
Working on  70de5779-e5ce-4056-953c-bcd7cd2b5c95
Working on  a3832cfa-0cc9-40ba-ab33-ba0afd18aa4e
Working on  9e4592bc-4cce-4fa1-b7c5-dd40d77d66ee
Working on  1a276bf5-db32-457f-ac8e-a343b2b6cc1b
Working on  38185b7b-24bd-4c88-94a9-8744df26aa70
Working on  bd12c63a-61e2-4868-8bc5-d2288ab1afe7
Working on  9c6906ba-381d-46a4-afd2-52ec66bdd3ae
Working on  71c889a1-56f6-4924-bdb1-8dfa987f785e
Working on  853289c4

Working on  7ec8f788-b8df-431b-8c2e-520e558b0cf8
Working on  1999c2d0-4b75-4825-b93a-ddaca83f7902
Working on  ebc94d5e-559b-4dd4-b73c-426eafc4fb32
Working on  e19233ad-5568-43b2-8a3f-d989dd2bea52
Working on  5d9d2edc-ace3-4a12-8758-e62f365a07bd
Working on  8523eebb-cf24-49ac-a2d1-2e8a1529673f
Working on  7fd0b9f0-8adc-43d4-8c58-daa54898794c
Working on  cee2fb65-aaac-4767-86e1-66424c087e58
Working on  e45aab0d-88b9-4ea7-b981-026d18e10892
Working on  ca058351-4892-4ef7-9ac1-f9f849b2622b
Working on  18514552-b180-48c1-b179-dfb81621acab
Working on  1af545b3-9673-40e7-bf8e-9b798f11bc8d
Working on  39910578-e327-486d-8d18-22d11d09e402
Working on  2f3ddc79-1cc3-4cb1-86c7-d1cc164cc9aa
Working on  772d218a-bfc0-4de5-848e-903b5c267728
Working on  75ac223a-480d-4160-8555-b9ee5113e9e4
Working on  0c4d4991-c1a0-4bd0-8cc7-c88af43bc471
Working on  bbee12eb-294a-451d-84d1-7e6f4227280b
Working on  70b99897-a95d-4d66-b651-c76aeebfcb48
Working on  fa3cb32a-5efb-4860-b676-564b0b3b30a8
Working on  e088de9e

Working on  b2d17e6e-5fd0-4797-abf9-c3dfa18a2dd5
Working on  18ff6a3f-ee6d-4753-8a5f-a7c11a4abe29
Working on  433d1066-4e27-4434-a50b-c3d6b1a0e7fb
Working on  bd3f0deb-cabc-4cf0-919f-aead8aa7c172
Working on  f2505f3d-956a-4cd2-a13b-1b254eb09147
Working on  56b0a8ca-6032-46db-a0ec-195161a67a5e
Working on  2753487a-f031-4cde-935a-ec4f685feb74
Working on  06be279e-ffce-46bc-bc9e-03e98f78234a
Working on  7649b5bd-f2e3-4409-bf88-5ae039cad726
Working on  b74eb6eb-baf2-4b95-9e25-653a6cda7c41
Working on  63c32aab-f52f-4c55-b18a-984bdd8b404b
Working on  dccbefcf-c299-4b4d-8384-9b8261f6ab2a
Working on  e9d80a3c-b703-400e-8ba9-efd17272ffb1
Working on  65279a78-a666-4efa-9912-d7b86b1b8631
Working on  944ab715-bcf7-4e4c-98bd-290b000bc6b9
Working on  8de202a1-c98d-4699-bc4f-e524c48f2209
Working on  d14a73c3-27a2-4e18-b25f-f5496ecda784
Working on  67769eae-310d-48a1-9d5c-1842c62eb1af
Working on  62daf92a-cd99-42b1-83e8-d2a00266d299
Working on  e6d49982-d354-484d-9a27-10e67af295e7
Working on  643c299a

In [None]:
# Use the following code if one high pass file is empty because of a bug

# data_type = "cis"
# path_train_data, df_train_label = define_data_type(data_type, data_dir, data_subset)

# list_measurement_id = ["dc90dc36-b4e5-43ec-b3e8-47c39c763c71"]

# # Filter df_train_label according to the measurement_id we are most interested in
# df_train_label = interesting_patients(df_train_label=df_train_label, list_measurement_id=list_measurement_id)

# high_pass_filter(df_train_label, high_pass_path, path_train_data, data_type)

### CIS-PD: Create Masks for inactivity removal 

For the masks, two parameters can be tuned:

- `energy_threshold` : what percentage of the max energy do we consider as inactivity? The current masks generated have used the threshold of 5%

- `duration_threshold` : how long do we want to have inactivity before we remove it? For example 3000x0.02ms=1min of inactivity minimum before those candidates are considered inactivty and will be removed.

In [13]:
# This will create the following folders: 
# cis-pd.training_data.high_pass_mask/ 
# cis-pd.ancillary_data.high_pass_mask/ 
# cis-pd.testing_data.high_pass_mask/ 

data_type = "cis"

# data_subset is to switch between training_data, ancillary_data or testing_data
for data_subset in ['training_data', 'ancillary_data', 'testing_data']:
    path_train_data, df_train_label = define_data_type(data_type, data_dir, data_subset)

    remove_inactivity_highpass(
        df_train_label,
        path_train_data,
        data_type,
        energy_threshold=5,
        duration_threshold=3000,
        plot_frequency_response=False,
        mask_path=data_dir+'/cis-pd.'+data_subset+'.high_pass_mask/')

path_train_data :  /export/b19/mpgill/BeatPD_data/cis-pd.training_data/
Working on  cc7b822c-e310-46f0-a8ea-98c95fdb67a1
The mask path folder was created :  /export/b19/mpgill/BeatPD_data//cis-pd.training_data.high_pass_mask/
Working on  5163afe8-a6b0-4ea4-b2ba-9b4501dd5912
Working on  5cf68c8e-0b7a-4b73-ad4f-015c7a20fb5a
Working on  fb188ae2-2173-4137-9236-19a137a402c2
Working on  19a3e9ea-fce1-40b7-9457-2618970beb7b
Working on  e2973da8-1250-4a7c-98d5-b165570a8aeb
Working on  8548d34c-4771-4ca4-bee4-d47bde435bdc
Working on  0c579a72-bac5-46a2-8671-1a50620723bf
Working on  bb59d008-25fe-43cc-bf05-6bd6b874eea3
Working on  4a1ca52c-2895-4094-bade-246fd474762f
Working on  f53cfd9b-8c52-4d22-a35c-504542170ed3
Working on  dc90dc36-b4e5-43ec-b3e8-47c39c763c71
Working on  e31db4f8-f9a5-4273-a874-4bdbc6fcae2c
Working on  d1a9294c-05ad-4eac-9915-7052c2ad98a3
Working on  cc0d147f-94ea-4637-91d7-d4ceceaf1728
Working on  20f1dbcd-0954-4bfd-ad92-9bac1b15beb0
Working on  c05991ea-ed30-45ee-96a2-8a4

Working on  f7b180d9-77ff-40c8-b718-28b12670c247
Working on  f946ec6a-74d3-4297-a3a4-b165f2d72f13
Working on  5f2d1332-846c-48f6-a6ab-188d3737dcef
Working on  622a3d54-f542-4f3f-8a31-df0bbfc929ad
Working on  3d0f965c-9d72-43d1-9369-1ea3acf963cc
Working on  4553b646-092b-4d8b-aee4-c4a074059541
Working on  a235ba27-a9a5-4d9a-931c-554950de9105
Working on  b0c7435a-5963-4ad4-a5e4-b0e17d5223cf
Working on  fb394080-2cba-454c-ad45-120fed0086e6
Working on  13239e7a-1c09-4bfe-a041-d739da36dff1
Working on  1e5738bf-e918-4025-9d84-8df71e8d4810
Working on  aee216fc-15d6-42ee-8e91-c8db3b16f0f6
Working on  b467614e-a297-4780-aec5-003fa5e8416f
Working on  b6f6ea11-7815-4c26-b8b0-1ce444327b5a
Working on  c722ebb1-d798-41ae-adeb-b4a57bd41172
Working on  0159954c-1bd2-47a6-9e6f-94f8d179b712
Working on  6867bf10-f489-4c7f-abd1-06a47dd2cdd9
Working on  3b0ee95f-d87b-4699-b34f-74e7dc20ab01
Working on  02389c9e-8821-4c54-a1a7-912f0b66dd95
Working on  6fc11b99-5d79-4c68-a400-4327721b1025
Working on  9ac46426

Working on  b91a582c-d54d-4975-b2bd-15f2c194a4b8
Working on  ee9656df-4384-48de-8832-9dec7a255f8d
Working on  38392387-d09a-40a4-afa7-9f3654066f28
Working on  2a96609a-48dc-4d3d-918b-da84d3d166ca
Working on  eb63512f-b1df-4c35-bfad-417f7c230f86
Working on  4679db13-6ca0-4a40-971b-06d537cb65cf
Working on  6f48272b-3843-49bc-a41e-13fc6a4b2aed
Working on  a62d64ec-d1da-40bf-a86f-d7cd15f1a7dd
Working on  a2b0d14d-2f55-4e66-a187-37e2ef15f116
Working on  b5c4f79b-409a-455d-b9ad-0999274ca110
Working on  9873444a-c5be-4975-9ba6-4f0e209b82bc
Working on  300ca6f5-70d1-4928-8330-a9a86c3681f5
Working on  2da6f0c2-274a-4bfe-b386-5ca925000351
Working on  ced67e0d-1743-406f-9f5d-10aeb9c1f1fc
Working on  45419562-4d6f-44fe-ab6c-be92d90b1d4a
Working on  95051425-36e8-46c6-ad8a-2e91a6c71ffd
Working on  7d00f2f8-d21e-4985-b992-b3400f71b294
Working on  6b2d9f57-dd46-42ba-8365-f50d5cc9db62
Working on  b2ebe3ce-3d5f-45a0-80c3-4cb5d4291d5d
Working on  3774a40c-4544-45f0-8878-fcf7b1c724ae
Working on  8a7a1b7d

Working on  a47a5857-8bac-4511-8557-5e1d72299cef
Working on  60fb3090-06d2-4a1d-a5a7-8c099a55d8f2
Working on  93fe6ada-fffa-4725-a2a1-f30041e9e99d
Working on  c9538c25-9630-44c9-ad62-2a39c63ef6fd
Working on  f2cb291d-8c3e-4b76-9adc-6b68252b425d
Working on  c305fe91-7bd7-418e-90b9-c58f67519d8b
Working on  60dc2ed3-5a45-4377-9bda-d2642e7acc6f
Working on  b30d2a16-b07e-4a0c-bddc-44456f2f040e
Working on  f14af6fa-d803-4b95-9ef3-17812b4cd730
Working on  57872a43-a942-4dba-b422-945343dd61bb
Working on  aaa9c1fa-536c-4829-84da-98da71efa0e6
Working on  9c25b03f-24b4-47bc-804d-126e148db07d
Working on  35a4e02b-05a4-4537-8375-d5ce70f93329
Working on  d16bace0-da6f-4593-9f16-0bc40e7c286d
Working on  b9236b01-f4f7-40a3-b54b-884a4b58c7cc
Working on  7cfd4140-fd42-49cc-a5d6-5c8e8fc8ba0a
Working on  93ec3e66-b237-480b-a698-c85156d38724
Working on  ecca1868-87a1-4ee0-ba0a-acb919d47efe
Working on  d483d93d-a4fd-428b-8e8a-d7078ca5804a
Working on  1c3b43f8-a065-4961-a697-135a5c4d4eba
Working on  fea345ae

Working on  9d09e15a-315a-47b1-a853-43807a2dc63c
Working on  4b5a9a01-b30a-4d42-8af2-48a75ffb1c96
Working on  44486b20-fb08-4987-b0a4-d8d18121eade
Working on  fdedc1d7-cbdb-4cf4-aa5f-c224b1495236
Working on  922b3bf8-47a3-4fe9-b4f8-b18827993d82
Working on  5fbc4925-9d81-4873-be04-8bae8f631555
Working on  5a222ab4-a001-44c3-8ce3-7a97da8c9989
Working on  c9ef5b41-6659-4015-8a9b-20b5ef5ccf7a
Working on  fa1419ce-c48d-432e-90ad-913944f2bfb5
Working on  3766b3c0-1091-40f4-af91-bb5d3f64fbe6
Working on  9f6b5745-ab24-4652-9bed-f802a4b13df2
Working on  ff1fa00c-8af0-49be-9195-e86da7f26d86
Working on  1ab44d60-4c2d-487e-a76e-fdca168133ac
Working on  18061b76-7ad8-43cc-ad72-6fd94bb423c2
Working on  ff4bb4df-b798-4ff1-947c-0131b5c925b6
Working on  cf81a880-b965-4797-8996-f32554166ee2
Working on  d3d14fda-81b2-49a8-a3f5-72966be2065a
Working on  3b529cab-e514-42a1-bf89-da7342307839
Working on  daf6924f-5294-4988-99a6-a2fc7bc5bd22
Working on  21a995e5-78c8-4c6e-805c-1f7690be586a
Working on  8358b98e

Working on  0972be68-49d5-4154-a294-56abfa0ec447
Working on  43c49b7e-fef5-4896-aca8-cdb7460ec7cf
Working on  96f5866f-a5ff-477a-814c-f688f410b036
Working on  cf58f3cc-ae3c-4f3a-b21c-f5cc3dba300f
Working on  23bfab44-78ec-4ca3-a879-4355d5ad8f86
Working on  a83c3212-2481-4fcb-8cc4-df1b7700b6a1
Working on  9f73ebfa-c8bb-4b9f-a5a5-f19c4cfe8fc3
Working on  ac859251-7091-4176-955e-78a032c937e4
Working on  49b642a0-5c9d-4225-aef8-ebfc3fa871c2
Working on  4e5a09f8-9f27-4e96-81fe-d6a615d4f3b2
Working on  65d97101-73af-44ed-b130-2727722e58c6
Working on  696d57a6-167f-4933-afae-1670238b4065
Working on  9ead992d-a438-4833-a743-a489b97d2da9
Working on  3b56e771-a528-497f-b544-f84306872e68
Working on  bb7abe3c-93e6-45a4-a808-63162bfadd0e
Working on  0f0afa74-8241-4cf0-bf24-b5027fc1df67
Working on  2782debe-e64e-4392-9842-ecfe2d08ffe1
Working on  132b73a8-82ef-4556-aa62-a42bee096f3d
Working on  b542a21f-9f8d-44f7-b62f-5b4abfe4c389
Working on  1236bcd3-37a0-4d32-beb9-6ce42ff1d878
Working on  717eb309

Working on  6d109c96-6f92-4521-806c-7f2dd88364e6
Working on  17aaea59-7172-49e9-bff3-e07d2108baf4
Working on  44b1cb54-f555-4480-ab41-3e7662a5f223
Working on  6c8e1d93-24e3-453d-8bb9-449925c5f5bf
Working on  f4409271-bf88-4cfb-a37a-00a2e3587822
Working on  a969532e-714c-41f2-83e8-41583e41e491
Working on  34dacc92-8fc9-45a1-bc99-117de653292d
Working on  7558283d-b11c-48ca-9b83-90ec3d536ed9
Working on  9f61501d-d94b-4526-8352-188d09665e4f
Working on  144d2c4e-b894-4511-86f4-34c373780574
Working on  ee60b3d9-545d-4f65-8811-345824a361c7
Working on  5667bc64-5d56-4b74-a20a-c8e5c6aaf2e6
Working on  9025187d-1cb4-449c-be06-0e2726e17d07
Working on  689d21a2-176f-4a28-b51e-c8bbcb3b98f3
Working on  8e09d113-3149-47a3-b0ef-b8a785168e1d
Working on  af65e35a-24b2-4f0f-a902-e824b5f0422f
Working on  1fa86b72-2bb0-4b00-9dab-8ad5f9c49517
Working on  95e4feed-0121-4e17-aa38-62bff20aa44d
Working on  1f670779-a541-4e83-8fd2-268fdfb6606b
Working on  a9bf65d0-082c-40c8-b46d-4c94072babce
Working on  571452ff

Working on  1b1c49a9-fba2-4dcb-9f20-5c3711a4c182
Working on  417f2e72-2364-4066-994f-914f7478edbd
Working on  35f92561-618f-471b-ab6b-f5b5b3994f6f
Working on  5cae48e7-7a55-4028-a10d-98ed71fb29b3
Working on  3d146ca0-50d0-471d-824d-f292d10bf678
Working on  6afab4b2-3080-4301-891d-8ae5afbab21c
Working on  9f0d3904-5399-4af6-8d59-f9371a5c9b23
Working on  18184ef0-3c71-4aff-9663-d5076c8192d7
Working on  c5eaf3f3-eb1a-4b5d-ab66-79dbd67eb2db
Working on  d9960aac-7f77-488b-a27c-74fa86dac220
Working on  788aa60a-f33f-46fc-bafb-c27b78205f2e
Working on  bc339c17-0f1a-4e79-b68e-1b6884a32814
Working on  0e9689d7-4344-4f17-8b84-42f4b53d7ea1
Working on  df8f969b-5f9b-49e3-9ef8-883776e3bbf1
Working on  e32bbd29-6933-4bdd-9d01-d84ced190a64
Working on  e6434482-4e09-4021-a256-1fe7aa0768c9
Working on  93eac078-c686-4532-89e7-b0cd17eae12e
Working on  4ef75ae1-08dc-4d63-bae5-880e0341b86f
Working on  096e8557-3c9f-4406-8afc-afc174f6b6e6
Working on  2bf17d8f-53c0-4d61-957d-2495f26228a5
Working on  2a84b61b

Working on  ceb52190-d81b-44d4-94cc-5823a918edfa
Working on  858b51e0-1b31-4a93-8b8d-79d02b3606b7
Working on  738e62aa-6552-4f8c-bee0-72fb96b7581e
Working on  bbadb13c-ea0e-44e8-8908-cb1a18eebf40
Working on  f28c38d1-cedf-4bd3-b488-bca6a64bb512
Working on  50cd6ad2-9998-4dd7-83e7-ddb7da528256
Working on  7027ed4d-9c95-4bf5-9aeb-d8f69a9821cf
Working on  88662a33-82ec-40a4-a2fc-332cff45c5af
Working on  83fd5aa0-e585-4200-b90c-60bb443cb2c4
Working on  585f9628-039a-47af-adbc-d722b717a60a
Working on  c1dda61b-9287-4e41-9742-fea19250bf2e
Working on  005685b2-3de4-469b-b673-8f937ee8196e
Working on  3e06f01c-a386-40c0-b452-50921b8f4e8a
Working on  1c3b72c7-24d5-427c-9400-be1554688a4a
Working on  f6fde688-844b-4c4c-8c5b-2c1709e265bd
Working on  d7f747bf-1abd-4149-abb1-429bc99d8de7
Working on  43c6a806-e912-43f7-aa5b-1091d398a836
Working on  2c6285cf-8c39-431a-9db3-522f2c74fb4b
Working on  c3bc0e8b-af62-4408-8e9b-f11b7e5f7672
Working on  12f25bd0-efbc-4c38-9d72-62092f6a1e2d
Working on  3adb3a14

Working on  88185aa3-3a84-40e4-a780-40a4cef198b4
Working on  a38bf8c5-617d-4512-8c9e-70bc9195cc06
Working on  b1b084cd-88ab-485d-af7e-e5a6e2c73c03
Working on  1f57b4bc-bcd5-4d65-8a2c-7acfd94afaa2
Working on  3cd93936-b55b-4b4c-9ea6-083db0729b20
Working on  31956275-5c34-47ec-ba83-d5d107211af6
Working on  7b2698a6-f3a5-4345-9630-25d377d58992
Working on  22ba7c99-9274-41fd-9170-4364e00973a0
Working on  8390b805-6091-4f12-8d03-a853c3405ddb
Working on  7b9f5ce7-a152-4809-8930-c6bfaff0da8a
Working on  dd261c90-aab6-40c3-9da7-1b8b320c7f7a
Working on  fb86493b-1893-4be4-8707-1e97d56bb049
Working on  dd140d80-bcab-458b-bd0e-82cd584d0f28
Working on  e0441156-c4b8-467c-8f4f-3b532d594d8f
Working on  e021c06a-263d-403b-ba89-71e1c345d042
Working on  bead5c67-03dc-4bf6-8710-6130d64a1608
Working on  f66cb02b-88b6-4dcd-bfda-391d506c69d8
Working on  161c5ebc-8830-480c-a846-7213190f019a
Working on  c46cfa20-9994-4dda-b258-17f1e0fd8fe3
Working on  b0b10be2-e918-41c2-b031-b47ba56a2762
Working on  e3c3fb59

Working on  c068ccf1-e513-4277-897c-4b1835245963
Working on  e0ab2320-5a20-4e04-b57c-f9d1c6cf1e2b
Working on  5674a8be-ebc0-4759-85d9-f7fcd603538b
Working on  62cd05cc-35d3-459b-b7ed-df89ee789e67
Working on  270e838a-3df7-46b0-9fe8-e2a46c1a2a37
Working on  5783b4ea-308a-4340-bd3d-2276e4916805
Working on  a4228918-a3a8-4ecd-92a5-3a4916dc3556
Working on  fb0f0e1b-d3d8-44e3-9fc4-3da3e78cbc81
Working on  3f0adcee-5e29-4eb1-8fc5-b721bce160c8
Working on  fe7d95fb-fca9-4944-b64f-a5cd171401f7
Working on  345810ba-7b25-4783-8633-521bac4da97e
Working on  8338ec01-467c-48fc-85bf-c1f912b51076
Working on  bbecc570-e0ef-45ae-b0ee-c0664c3cced0
Working on  8990585f-cc41-4072-9269-0b06b6b775cf
Working on  908f2ad3-8151-4533-91c8-f9580274e466
Working on  e8aa1efe-2b82-47d2-9069-2fa5013afbad
Working on  90a06412-d1fb-4505-b529-bebd822f40b3
Working on  6fe82fd1-7452-4f65-a12d-892b6b5e89a9
Working on  ab81f947-74b5-4739-966c-80a5ed013e0d
Working on  f2be9894-4b4e-410b-874f-4be1c28ff313
Working on  7b25f580

Working on  ab618d1f-eb27-442b-a3b7-e438fde70db1
Working on  25e8bca2-051f-4216-826b-810bbddfdb2e
Working on  7f7bb7b9-8656-40dd-94f9-9d546ac75722
Working on  c29c2d91-c294-4655-a2a7-d4c1f456c3a2
Working on  a97693fd-7024-4e68-b00a-3700306c7bc8
path_train_data :  /export/b19/mpgill/BeatPD_data/cis-pd.ancillary_data/
Working on  10005cf2-85cf-47ad-8441-f15fb1509f69
The mask path folder was created :  /export/b19/mpgill/BeatPD_data//cis-pd.ancillary_data.high_pass_mask/
Working on  f25cfad6-1835-4f78-9192-d28581ffa4d8
Working on  5cf1a9ff-b13c-457b-a661-ccd617842027
Working on  f55eac42-c808-447b-9b9b-02c10cee579f
Working on  541740be-1158-4169-becb-5ca4926ecd0c
Working on  78428c75-bb3f-48c9-87f1-fe7e9d9f864f
Working on  e9fa3c88-3eba-45a6-93c7-3ef4e5c8f846
Working on  bdda2ebb-1b42-4394-94bd-6994e81c7a09
Working on  415651db-922d-4077-8136-030919aadcd4
Working on  f0719d3c-5203-4ed6-b1b9-448485382ae9
Working on  f8dae5a0-5efa-4ce3-9697-85caafa33acb
Working on  6e5ea446-6c44-4a9d-8ced-c

Working on  579adf7a-8366-4891-bbf1-7d6c4d4d0c5e
Working on  bb2c1a12-4135-48e6-be82-8b220b766ada
Working on  122b70a3-7449-46a1-9d42-7cd82551d11a
Working on  aa612858-4e9b-4f5f-98c4-289914efd3a3
Working on  9b9ad9d1-4286-4349-bffe-fbcd2f9651fd
Working on  5956ed5a-b310-47a3-9799-3a5f475bb1c0
Working on  5a67b031-e3a7-4d0b-92e5-748bd0c89330
Working on  c4fff7ba-27fa-47a9-81e8-15393197eb61
Working on  1ac99d8a-b77a-4ac1-9292-cade8f2f13d3
Working on  13aeee8f-d413-4f13-87aa-e69c55b86e3f
Working on  cd5b4e50-f2ab-42b2-b713-32550ef5a921
Working on  fbfac8ca-2208-4325-9fef-c910fdb4af69
Working on  e10e93d5-d13f-42d9-b552-d5699b159213
Working on  075c4acf-5681-4fa5-9fb4-458d93ae924c
Working on  d6f96c9d-05a2-4590-8c2a-9faf4f4e5f43
Working on  9ea2e6ba-8bda-4154-85e1-a2b99973ae0e
Working on  24557a7b-a693-45c2-9fbe-8c18a02fe8bb
Working on  a6824efb-5d9c-4a0b-b081-4cdde6be23ac
Working on  f2db25a4-88b8-4cd9-ae14-0e1e9396d039
Working on  04dee214-b21b-42a8-b1fa-508c45e4582a
Working on  2a2bc8ca

Working on  a1612a58-a5e1-43bb-8f31-0cbd97a5d806
Working on  5d0ea615-e03f-4d1c-9772-e7e83e5f40ef
Working on  63c0aeb7-0d28-4dd4-b80b-58e158b99016
Working on  edcb47d7-33d9-4fe3-be2d-9afd08b90fc5
Working on  4c698281-9b7e-4e6d-85ce-3f2d4f534f13
Working on  a3e66b20-f79e-44e3-bef8-c0e6b0f57960
Working on  ce025971-1458-4b94-83c3-45f050b21448
Working on  69fdc5d8-1e8f-4a58-8408-ffd2e71bbf95
Working on  67223861-117f-4c7f-9288-5222d57d0d07
Working on  b0fd1f8e-368c-42ab-a6e1-1580257679da
Working on  f03b9f53-9132-480c-8c36-29b2db6d29fa
Working on  3a8fc6b2-335a-44ce-863e-e3218c1a655e
Working on  2aa830f1-84b7-4c8e-a5df-4f97704bd745
Working on  a648d949-7347-478b-9a4c-fc23f3d4d17a
Working on  5baad59e-ba93-4a28-a39c-2e12e2ccfc5e
Working on  acae590a-5c75-4dfa-97ac-8c7dcd949590
Working on  00d5a06e-1719-4d7d-bcbc-c63991a5b1cd
Working on  1205a4e8-1ca0-437f-9cb7-df04e9b18b69
Working on  1cd368b3-5052-4a29-9a1f-630c97522a32
Working on  6deebc3c-d168-4e30-9474-b1cdba6ea826
Working on  a4172c4b

Working on  39f230e3-1afc-44c3-82eb-0f14f4ace351
Working on  d2c15ffc-499b-457d-8689-d7e4eaa93d59
Working on  70de5779-e5ce-4056-953c-bcd7cd2b5c95
Working on  a3832cfa-0cc9-40ba-ab33-ba0afd18aa4e
Working on  9e4592bc-4cce-4fa1-b7c5-dd40d77d66ee
Working on  1a276bf5-db32-457f-ac8e-a343b2b6cc1b
Working on  38185b7b-24bd-4c88-94a9-8744df26aa70
Working on  bd12c63a-61e2-4868-8bc5-d2288ab1afe7
Working on  9c6906ba-381d-46a4-afd2-52ec66bdd3ae
Working on  71c889a1-56f6-4924-bdb1-8dfa987f785e
Working on  853289c4-c0fc-449c-a8f4-3640ea137a61
Working on  10d6259a-10d6-4146-a7a2-55bb498bb28b
Working on  a6b53b97-cfeb-48bd-af3a-0aa645b68f3b
Working on  66699874-6f3c-479f-b10a-efe64c580ed8
Working on  3bbce5cb-b339-40ec-8101-d0d44e9fff22
Working on  3093d067-d59c-4ba0-9b69-12290e693f46
Working on  ae3fef33-c656-4214-bcc2-27dc251870ed
Working on  82ded7de-c120-43af-8379-0d05aff836d4
Working on  da395648-0452-423c-9009-4a8f3feebb46
Working on  dbf25d01-dbc6-499c-b7e5-f0f31a62e56d
Working on  bdcb3256

Working on  18514552-b180-48c1-b179-dfb81621acab
Working on  1af545b3-9673-40e7-bf8e-9b798f11bc8d
Working on  39910578-e327-486d-8d18-22d11d09e402
Working on  2f3ddc79-1cc3-4cb1-86c7-d1cc164cc9aa
Working on  772d218a-bfc0-4de5-848e-903b5c267728
Working on  75ac223a-480d-4160-8555-b9ee5113e9e4
Working on  0c4d4991-c1a0-4bd0-8cc7-c88af43bc471
Working on  bbee12eb-294a-451d-84d1-7e6f4227280b
Working on  70b99897-a95d-4d66-b651-c76aeebfcb48
Working on  fa3cb32a-5efb-4860-b676-564b0b3b30a8
Working on  e088de9e-769e-4820-b86c-91f2b99d61a4
Working on  6626fbc8-c875-4df8-b92f-e06a2223e236
Working on  cf1f3a28-d711-407c-bd52-ce34bef4a9b5
Working on  bf7585f9-fd2c-48df-8e30-aad8eba7c282
Working on  6b6ae726-bd67-4f87-8526-000c03c1285b
Working on  cbdb3aff-d2ca-4b9e-94c1-0faa0005671e
Working on  9df1706f-4e3a-47bb-a408-718d6ca749bb
Working on  4d135e04-29bf-476e-aa5b-a7d60ba4df14
Working on  30fd6182-6369-4c08-981c-cf217959488f
Working on  4078789d-a318-4b94-bec6-b2f19e617d3b
Working on  0e16cf6d

Working on  dccbefcf-c299-4b4d-8384-9b8261f6ab2a
Working on  e9d80a3c-b703-400e-8ba9-efd17272ffb1
Working on  65279a78-a666-4efa-9912-d7b86b1b8631
Working on  944ab715-bcf7-4e4c-98bd-290b000bc6b9
Working on  8de202a1-c98d-4699-bc4f-e524c48f2209
Working on  d14a73c3-27a2-4e18-b25f-f5496ecda784
Working on  67769eae-310d-48a1-9d5c-1842c62eb1af
Working on  62daf92a-cd99-42b1-83e8-d2a00266d299
Working on  e6d49982-d354-484d-9a27-10e67af295e7
Working on  643c299a-6ae2-4361-b48c-1e3700d6a52c
Working on  7e2ca6f0-26f1-442d-9006-3b84fa5c86d3
Working on  bdd5e022-74fe-49c1-81e1-ccb05339a899
Working on  7fd4268f-79e8-48fb-97f2-46653eb6954e
Working on  ddcc42ba-780c-4063-945d-d2926733cc86
Working on  0ddae13f-3e4e-4dd1-8e09-4891b79921a2
Working on  845c70a3-c541-4522-93a8-8d992d514b0d
Working on  b761d0c2-7b65-4ec4-bb65-c3745cef5018
Working on  cb0f8c5f-9a8d-4159-add4-0b06f953212f
Working on  15fa874e-5393-4da8-888a-4b50da210ccd
Working on  20c8d23e-fbda-46a8-adcc-2d132c7ef73e
Working on  358ff1c5

### Facultative : Create Combhpfnoinact

In [1]:
def save_removenoinact(measurement_id, path_train_data, mask_path, path_removeinact):
    """
    Quick function just to remove inactivity on the provided data from path_train_data 
    and saves it in the provided path (path_removeinact)
    """
    
    df_train_data = pd.read_csv(path_train_data + measurement_id + ".csv")
    
    df_train_data = apply_mask(path_train_data, measurement_id, mask_path)
    
    # If the folder doesn't exists, we need to create it 
    if not os.path.exists(path_removeinact):
        os.makedirs(path_removeinact)
        print('The folder was created : ', path_removeinact)
    
    # Save to a folder 
    df_train_data.to_csv(
        path_removeinact + measurement_id + ".csv",
        index=False
    )

In [None]:
# This will create the following folders: 
# cis-pd.training_data.combhpfnoinact/ 

path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)
do_work = partial(
        save_removenoinact, 
        path_train_data="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass/",
        mask_path = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/",
        path_removeinact = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact/"
    )

num_jobs = 8
with ProcessPoolExecutor(num_jobs) as ex:
    results = list(ex.map(do_work, df_train_label['measurement_id']))

### Falcutative : Create Orig No Inact 

In [None]:
# This will create the following folders: 
# cis-pd.training_data.orignoinact/ 

path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

do_work = partial(
        save_removenoinact, 
        path_train_data="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data/",
        mask_path = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/",
        path_removeinact = "/export/fs02/mpgill/BeatPD/cis-pd.training_data.orignoinact/"
    )

num_jobs = 8
with ProcessPoolExecutor(num_jobs) as ex:
    results = list(ex.map(do_work, df_train_label['measurement_id']))

### Facultative : CIS-PD: Create first derivative

This step is facultative as it is something we experimented with, but never ended up using

In [None]:
# This will create the following folders: 
# cis-pd.training_data.derivative_original_data/
# cis-pd.ancillary_data.derivative_original_data/
# cis-pd.testing_data.derivative_original_data/

# Edit line 21 and mention how many CPU you want to use simultaneously 

data_type = "cis"

# This is only to switch between training_data or ancillary_data which is additional data provided 
for data_subset in ["training_data", "ancillary_data", "testing_data"]:
    path_train_data, df_train_label = define_data_type(data_type, data_dir, data_subset)

    do_work = partial(
        get_first_derivative, 
        path_train_data=path_train_data,
        derivative_path=data_dir+"cis-pd."+data_subset+".derivative_original_data/",
        padding=True, 
        mask_path=data_dir+"cis-pd."+data_subset+".high_pass_mask/",
    )

    num_jobs = 6
    with ProcessPoolExecutor(num_jobs) as ex:
        results = list(ex.map(do_work, df_train_label['measurement_id']))

path_train_data :  /export/b19/mpgill/BeatPD_data/cis-pd.training_data/


# REAL-PD Database

### REAL-PD: Create High-Pass data 

In [None]:
# This will create the following folders: 
# real-pd.training_data.high_pass/ 
# real-pd.ancillary_data.high_pass/ 
# real-pd.testing_data.high_pass/ 

data_type = "real"

for data_subset in ["training_data", "ancillary_data", "testing_data"]:
    for data_real_subtype in ['smartphone_accelerometer','smartwatch_accelerometer','smartwatch_gyroscope']:
        path_train_data, df_train_label = define_data_type(data_type,
                                                           data_dir,
                                                           data_subset,
                                                           data_real_subtype)

        high_pass_path=data_dir+'/real-pd.'+data_subset+'.high_pass/'+data_real_subtype+'/'

        high_pass_filter(df_train_label, high_pass_path, path_train_data, data_type)

### REAL-PD: Create Masks for inactivity removal for all subtypes

In [None]:
# This will create the following folders: 
# real-pd.training_data.high_pass_mask/ 
# real-pd.ancillary_data.high_pass_mask/ 
# real-pd.testing_data.high_pass_mask/ 

data_type = "real"

for data_subset in ["training_data", "ancillary_data", "testing_data"]:
    for data_real_subtype in ['smartphone_accelerometer','smartwatch_accelerometer','smartwatch_gyroscope']:
        path_train_data, df_train_label = define_data_type(data_type,
                                                           data_dir,
                                                           data_subset,
                                                           data_real_subtype)
        remove_inactivity_highpass(
            df_train_label,
            path_train_data=path_train_data,
            data_type=data_type,
            energy_threshold=5,
            duration_threshold=3000,
            plot_frequency_response=False,
            plot_accelerometer_after_removal=False,
            mask_path=data_dir+'/real-pd.'+data_subset+'.high_pass_mask/'+data_real_subtype+'/')

### Facultative : REAL-PD: Create first derivative for all subtypes 

This step is facultative as it is something we experimented with, but never ended up using

In [None]:
# This will create the following folders: 
# real-pd.training_data.derivative_original_data// 
# real-pd.ancillary_data.derivative_original_data// 
# real-pd.testing_data.derivative_original_data// 


for data_real_subtype in ['smartphone_accelerometer','smartwatch_accelerometer','smartwatch_gyroscope']:
    for data_subset in ["training_data", "ancillary_data", "testing_data"]:
        path_train_data, df_train_label = define_data_type(data_type,
                                                               data_dir,
                                                               data_subset,
                                                               data_real_subtype)

        for idx in df_train_label.index:
            try:
                df_train_data = pd.read_csv(path_train_data + df_train_label["measurement_id"][idx] + ".csv")
            except FileNotFoundError:
                print('Removing ' + df_train_label["measurement_id"][idx] +
                      ' as it doesn\'t exist for ' +
                      data_real_subtype)
                df_train_label = df_train_label.drop(idx)
            print(len(df_train_label))
        do_work = partial(
            get_first_derivative, 
            path_train_data=path_train_data,
            derivative_path="real-pd."+data_subset+".derivative_original_data/"+data_real_subtype+"/",
            padding=True, 
            mask_path=data_dir+"/real-pd."+data_subset+".high_pass_mask/"+data_real_subtype+"/"
        )

        num_jobs = 8
        with ProcessPoolExecutor(num_jobs) as ex:
            results = list(ex.map(do_work, df_train_label['measurement_id']))

# KFolds for CIS-PD and REAL-PD

KFolds are created with StratifiedKFold. They are balanced on only the on/off labels as this problem has a multilabel output. 

### Create the K-Fold files for the CIS database 

In [None]:
# This will create the following folders: 
# cis-pd.training_data.k_fold_v3
# cis-pd.ancillary_data.k_fold_v3

# Define the data type as we have two databases
data_type = "cis"

# Go through the subject_id and k-fold their data
# FIXME: get_k_fold could me renamed to just create the folds, save them, not return anything
for data_subset in ['training_data', 'ancillary_data']:
    path_train_data, df_train_label = define_data_type(data_type,
                                                      data_dir,
                                                      data_subset)

    # Group data by subject_id
    df_train_label_subject_id = df_train_label.groupby("subject_id")

    for subject_id, value in df_train_label_subject_id:
        list_df_train_label, list_df_test_label = get_k_fold(
            df_train_label=df_train_label,
            data_dir=data_dir,
            data_type=data_type,
            n_splits=5,
            subject_id=subject_id,
            data_subset=data_subset,
        )

### Create the K-Fold Files for the REAL database

In [None]:
# This will create the following folders: 
# real-pd.training_data.k_fold_v2
# real-pd.ancillary_data.k_fold_v2

data_type = "real"

for data_subset in ['training_data', 'ancillary_data']:
    for data_real_subtype in ['smartphone_accelerometer','smartwatch_accelerometer','smartwatch_gyroscope']:

        path_train_data, df_train_label = define_data_type(data_type,
                                                       data_dir,
                                                       data_subset,
                                                       data_real_subtype)

        # Group data by subject_id
        df_train_label_subject_id = df_train_label.groupby("subject_id")

        # Go through the subject_id and k-fold their data
        for subject_id, value in df_train_label_subject_id:
            list_df_train_label, list_df_test_label = get_k_fold(
                df_train_label=df_train_label,
                data_dir=data_dir,
                data_type=data_type,
                n_splits=5,
                subject_id=subject_id,
                data_subset=data_subset,
                data_real_subtype=data_real_subtype,
            )

# Data Preparation for MFCC Embeddings

### CIS-PD: Create WAV files

#### CIS-PD: Write Wav Files - Training Data - Original

In [None]:
# This will create the following folders: 
# cis-pd.training_data.wav_X : Wav files of the training data — the inactivity is NOT removed
# cis-pd.training_data.wav_Y
# cis-pd.training_data.wav_Z

data_subset='training_data'

create_cis_wav_files(data_subset, data_dir, sAxis="X", data_type="cis", bMask=False)
create_cis_wav_files(data_subset, data_dir, sAxis="Y", data_type="cis", bMask=False)
create_cis_wav_files(data_subset, data_dir, sAxis="Z", data_type="cis", bMask=False)

#### CIS-PD: Write Wav Files - Original Training Data - High Pass + Inactivity Removed 

In [None]:
# This will create the following folders: 
# cis-pd.training_data.high_pass_mask.wav_X : Original training data where inactivity is removed to wav files
# cis-pd.training_data.high_pass_mask.wav_Y
# cis-pd.training_data.high_pass_mask.wav_Z

data_subset='training_data'

create_cis_wav_files(data_subset, data_dir, sAxis="X", data_type="cis", bMask=True)
create_cis_wav_files(data_subset, data_dir, sAxis="Y", data_type="cis", bMask=True)
create_cis_wav_files(data_subset, data_dir, sAxis="Z", data_type="cis", bMask=True)

#### CIS-PD: Write Wav Files - Training Data - High Pass Filter Applied

In [None]:
# This will create the following folders: 
# cis-pd.training_data.high_pass.wav_X : High Pass filtered data to wav files (inactivity is not removed)
# cis-pd.training_data.high_pass.wav_Y
# cis-pd.training_data.high_pass.wav_Z

data_subset='training_data.high_pass'

create_cis_wav_files(data_subset, data_dir, sAxis="X", data_type="cis", bMask=False)
create_cis_wav_files(data_subset, data_dir, sAxis="Y", data_type="cis", bMask=False)
create_cis_wav_files(data_subset, data_dir, sAxis="Z", data_type="cis", bMask=False)

#### CIS-PD: Write Wav Files - Training Data - Original

In [None]:
# Creates files in these folders:
# cis-pd.ancillary_data.wav_X : Original ancillary data where inactivity is NOT removed to wav files
# cis-pd.ancillary_data.wav_Y
# cis-pd.ancillary_data.wav_Z

data_subset='ancillary_data'

create_cis_wav_files(data_subset, data_dir, sAxis="X", data_type="cis", bMask=False)
create_cis_wav_files(data_subset, data_dir, sAxis="Y", data_type="cis", bMask=False)
create_cis_wav_files(data_subset, data_dir, sAxis="Z", data_type="cis", bMask=False)

#### CIS-PD: Write Wav Files - Ancillary Data - High Pass data + Inactivity Removed 

In [None]:
# Creates files in these folders:
# cis-pd.ancillary_data.high_pass_mask.wav_X : Original ancillary data where inactivity is removed to wav files
# cis-pd.ancillary_data.high_pass_mask.wav_Y
# cis-pd.ancillary_data.high_pass_mask.wav_Z

data_subset='ancillary_data'

create_cis_wav_files(data_subset, data_dir, sAxis="X", data_type="cis", bMask=True)
create_cis_wav_files(data_subset, data_dir, sAxis="Y", data_type="cis", bMask=True)
create_cis_wav_files(data_subset, data_dir, sAxis="Z", data_type="cis", bMask=True)

#### CIS-PD: Write Wav Files - Ancillary Data - High Pass Filter Applied

In [None]:
# Creates files in these folders:
# cis-pd.ancillary_data.high_pass.wav_X : High Pass filtered data to wav files (inactivity is not removed)
# cis-pd.ancillary_data.high_pass.wav_Y
# cis-pd.ancillary_data.high_pass.wav_Z

data_subset='ancillary_data.high_pass'

create_cis_wav_files(data_subset, data_dir, sAxis="X", data_type="cis", bMask=False)
create_cis_wav_files(data_subset, data_dir, sAxis="Y", data_type="cis", bMask=False)
create_cis_wav_files(data_subset, data_dir, sAxis="Z", data_type="cis", bMask=False)

### REAL-PD: Create WAV files for all subtypes

#### REAL-PD: Write Wav Files - Training Data - Original Data

In [None]:
# Creates files in these folders:
# real-pd.training_data.wav_X : Wav files of the training data — the inactivity is NOT removed
# real-pd.training_data.wav_Y
# real-pd.training_data.wav_Z

data_subset="training_data"

create_real_wav_files(data_subset, data_dir=data_dir, sAxis="X", data_type="real", bMask=False)
create_real_wav_files(data_subset, data_dir=data_dir, sAxis="Y", data_type="real", bMask=False)
create_real_wav_files(data_subset, data_dir=data_dir, sAxis="Z", data_type="real", bMask=False)

#### REAL-PD: Write Wav Files - Training Data - High Pass Data + Original Data

In [None]:
# Creates files in these folders:
# real-pd.training_data.high_pass_mask.wav_X : Original training data where inactivity is removed to wav files
# real-pd.training_data.high_pass_mask.wav_Y
# real-pd.training_data.high_pass_mask.wav_Z

data_subset="training_data"

create_real_wav_files(data_subset, data_dir=data_dir, sAxis="X", data_type="real", bMask=True)
create_real_wav_files(data_subset, data_dir=data_dir, sAxis="Y", data_type="real", bMask=True)
create_real_wav_files(data_subset, data_dir=data_dir, sAxis="Z", data_type="real", bMask=True)

#### REAL-PD: Write Wav Files - Ancillary Data - Original Data

In [None]:
# Creates files in these folders:
# real-pd.ancillary_data.wav_X : Original ancillary data where inactivity is NOT removed to wav files
# real-pd.ancillary_data.wav_Y
# real-pd.ancillary_data.wav_Z

data_subset="ancillary_data"

create_real_wav_files(data_subset, data_dir=data_dir, sAxis="X", data_type="real", bMask=False)
create_real_wav_files(data_subset, data_dir=data_dir, sAxis="Y", data_type="real", bMask=False)
create_real_wav_files(data_subset, data_dir=data_dir, sAxis="Z", data_type="real", bMask=False)

#### REAL-PD: Write Wav Files - Ancillary Data - High Pass Data + Inactivity Removed 

In [None]:
# Creates files in these folders:
# real-pd.ancillary_data.high_pass_mask.wav_X : Original ancillary data where inactivity is removed to wav files
# real-pd.ancillary_data.high_pass_mask.wav_Y
# real-pd.ancillary_data.high_pass_mask.wav_Z

data_subset="ancillary_data"

create_real_wav_files(data_subset, data_dir=data_dir, sAxis="X", data_type="real", bMask=True)
create_real_wav_files(data_subset, data_dir=data_dir, sAxis="Y", data_type="real", bMask=True)
create_real_wav_files(data_subset, data_dir=data_dir, sAxis="Z", data_type="real", bMask=True)