# Data Augmentation 

The only goal of this notebook is to visualize what happens when we do data augmentation on the signals


After doing this data augmentation, I will need to extract features again with tsfresh... Hopefully it will work, but I think it was taking 2 days to extract.

* [Linear Combination](#linear-combination)
* [Rotation](#rotation)
* [Resampling - Stretching / Shrinking](#Stretching-/-Shrinking)
* [Add Noise](#Generate-Noise-Data-Augmentation)
* [Making a more balanced dataset](#Making-more-balanced-dataset)

## Imports

In [None]:
# Import required libraries

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.display import HTML, display

# Imports for the high pass signal
from scipy.signal import butter, freqz, lfilter

# KFold
from sklearn.model_selection import KFold

# Import required modules
from sklearn.preprocessing import StandardScaler

import os.path

# To write WAV File
from scipy.io.wavfile import write

# To make derivative work on multiple CPUs
from concurrent.futures import ProcessPoolExecutor
from functools import partial

import sys

# Confusion matrix
from sklearn import metrics

# 3d
from mpl_toolkits.mplot3d import Axes3D
import statsmodels.api as sm

%load_ext autoreload
%autoreload 2

import scipy.signal as sg 

from create_graphs import *
from transform_data import *

### Paths

In [4]:
data_type = "cis"
data_dir = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/"
training_or_ancillary='training_data'
path_save_accelerometer_plots = "/export/b19/mpgill/BeatPD_data_aug_plots/"

# Linear Combination on the signal level

This linear combination code is not completed because it is actually done at the feature level, and not at the signal level like the code here. It was started, but not completed. 

In [None]:
path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)


all_subjects = df_train_label['subject_id'].unique()

columns_to_exclude = ['measurement_id','subject_id', 'Timestamp']
lambda_value = 0.5
linear_combination_path = "/export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.lamb_"+str(lambda_value)

for subject in all_subjects: 
    df_train_label_subject = df_train_label.loc[df_train_label['subject_id'] == subject]

    df_data_aug = []
    # First loop to go over the rows
    for index, measurement1 in df_train_label_subject.iterrows():
        df_train_data_1 = pd.read_csv(path_train_data + measurement1[0] + ".csv")

        # Second rows to go over the loop except the same two rows
        for index2, measurement2 in df_train_label_subject.iterrows():
            df_train_data_2 = pd.read_csv(path_train_data + measurement2[0] + ".csv")

            if index == index2:
                continue
                
            print(len(df_train_data_1))
            print(len(df_train_data_2))
            modDfObj1 = df_train_data_1[df_train_data_1.columns.difference(columns_to_exclude)].apply(lambda x: x * lambda_value, axis=1, result_type='broadcast')
            modDfObj2 = df_train_data_2[df_train_data_2.columns.difference(columns_to_exclude)].apply(lambda x: x * (1-lambda_value), axis=1, result_type='broadcast')
            
            print()
            # maybe i should add timestamp back 
            df_data_aug = pd.DataFrame(modDfObj1.add(modDfObj2), columns=modDfObj1.columns)

            print(df_data_aug)
            raise KeyboardInterrupt
        # If the high_pass folder doesn't exists, we need to create it 
        if not os.path.exists(rotation_path):
            os.makedirs(rotation_path)
            print('The rotate folder was created : ', rotation_path)

        # Save to a folder 
        df_data_aug.to_csv(
            rotation_path + measurement_id + ".csv",
            index=False
        )
        raise KeyboardInterrupt
            
            
print(df_train_label)

## Rotation

In [84]:
from scipy.spatial.transform import Rotation as R

def rotate_signal(measurement_id, path_train_data, params, rotation_path, mask_path=None, verbose=0):
    """
    measurement_id: speficic measurement_id to apply rotation 
    path_train_data: path to the training data to use 
    params: add_rotation: 'True' or 'False' if we want to do rotation or not 
            rotation_path: path where to save the rotated signals
            add_bounds_rotation: 'True' or 'False': if we want to do rotation between ]0,-5[ , and [0, 5[
            add_jump_rotation: 'True' if we want to rotate only part of the signals 
                                'False' to rotate the whole signal 
    mask_path: path to the mask if we want to remove inactivity
    verbose: can be 0 or 1, level of details to log 
    """
    add_rotation = params["add_rotation"]
    add_jump_rotation = params["add_jump_rotation"]

    # bounds rotation is if we do rotation between ]0,-5[ , and [0, 5[ 
    add_bounds_rotation = params["add_bounds_rotation"]

    min_len = params.get('min_len',1000)
    max_len = params.get('max_len',10000)
    rot_ang = params.get('rot_ang',45)
    
    temp_train_X_orig = pd.read_csv(path_train_data + measurement_id + '.csv')
    
    if verbose == 1:
        temp_train_X_orig.plot(x="Timestamp", legend=True, subplots=True, title="Before")
    
    temp_train_X = temp_train_X_orig.values[:,-3:]

    if mask_path is not None:
        print('Removing Inactivity')
        temp_train_X = apply_mask(path_train_data,
                                  measurement_id,
                                  mask_path)
        temp_train_X = temp_train_X.values[:,1:]
    sig_len = temp_train_X.shape[0]
    if add_bounds_rotation == 'True':
        print('Adding bounds rotation between [', -rot_ang, ', ', -(rot_ang-5), '[ and [', rot_ang-5, ', ', rot_ang, '[')
        rot1 = np.random.randint(-rot_ang,-(rot_ang-5),size=1)[0]
        rot2 = np.random.randint(rot_ang-5,rot_ang,size=1)[0]
        print('Two suggested angles between both bounds are : ', rot1, ' and ', rot2)
        # Choose a rotation angle between the two intervals 
        rot = np.random.choice([rot1, rot2])
        print('BOUND ROTATION ENDED UP BEING : ', rot)
        r = R.from_euler('xyz', [rot]*3, degrees=True)
        rot_mat = r.as_dcm()
        temp_train_X = np.dot(temp_train_X, rot_mat)
    elif add_rotation == 'True':
        print('Adding rotation')
        print('rot_ang : ', rot_ang)
        rot = np.random.randint(-rot_ang,rot_ang,size=1)[0]
        print('ROTATION IS : ', rot)
        r = R.from_euler('xyz', [rot]*3, degrees=True)
        rot_mat = r.as_dcm()
        temp_train_X = np.dot(temp_train_X, rot_mat)
    elif add_jump_rotation == 'True':
        print('Adding Jump Rotation')
        s_ind = 0
        while (s_ind < sig_len):
            # We rotate at random increments 
            jump = np.random.randint(min_len,max_len,1)[0]

            rot = np.random.randint(-rot_ang,rot_ang,size=1)[0]
            r = R.from_euler('xyz', [rot]*3, degrees=True)
            rot_mat = r.as_dcm()
            temp_train_X[s_ind:s_ind+jump,:] = np.dot(temp_train_X[s_ind:s_ind+jump,:],rot_mat)
            s_ind = s_ind + jump

    temp_train_X = pd.concat([temp_train_X_orig.Timestamp, pd.DataFrame(temp_train_X, columns=["X","Y","Z"])], axis=1)

    if verbose == 1:
        pd.DataFrame(temp_train_X).plot(x="Timestamp", legend=True, subplots=True, title="After rotation")
    
    # If the high_pass folder doesn't exists, we need to create it 
    if not os.path.exists(rotation_path):
        os.makedirs(rotation_path)
        print('The rotate folder was created : ', rotation_path)
    
    # Save to a folder 
    temp_train_X.to_csv(
        rotation_path + measurement_id + "_ang_"+str(rot)+".csv",
        index=False
    )


### Launch Rotation for Original Data

You will have to change the value of `rotation_path` to choose the folder where to save the new data! 

In [None]:
df_train_label[(df_train_label.dyskinesia >= 2.0) | (df_train_label.on_off >= 2.0) | (df_train_label.tremor >= 2.0)]

In [None]:
path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

# list_measurement_id = ["3cf49c01-0499-4bad-9167-67691711204a"]

# df_train_label = interesting_patients(df_train_label=df_train_label,
#                                       list_measurement_id=list_measurement_id)

angle=45

# Original 
do_work = partial(
        rotate_signal, 
        path_train_data="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact/",
        params={'add_bounds_rotation': 'False', 'rot_ang': angle, 'add_rotation': 'True', 'add_jump_rotation': 'False'},
        rotation_path="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact.rotate_thesis/",
        mask_path=None,#"/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/",
        verbose=0,
    )

num_jobs = 8
with ProcessPoolExecutor(num_jobs) as ex:
    results = list(ex.map(do_work, df_train_label['measurement_id']))

### Rotations with bounds 

In [None]:
# Code to only create 1 new file with rotation

path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

list_measurement_id = ["3cf49c01-0499-4bad-9167-67691711204a"]

df_train_label = interesting_patients(df_train_label=df_train_label,
                                      list_measurement_id=list_measurement_id)

for repetition_no in [1]:
    for angle in [30]:
#         !mkdir /export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.rotate_bound_{angle}_{repetition_no}/
        # combhpfnoinact 
        do_work = partial(
                rotate_signal, 
                path_train_data="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact/",
                params={'add_bounds_rotation': 'True', 'rot_ang': angle, 'add_rotation': 'False', 'add_jump_rotation': 'False'},
                rotation_path="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact.rotate_thesis/",
                mask_path=None,#"/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/",
                verbose=0,
            )

        num_jobs = 8
        with ProcessPoolExecutor(num_jobs) as ex:
            results = list(ex.map(do_work, df_train_label['measurement_id']))

In [None]:
path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)


for repetition_no in [2, 3, 4, 5]:
    for angle in [5, 10, 15, 20, 25, 30, 35, 40, 45]:
        !mkdir /export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.rotate_bound_{angle}_{repetition_no}/
        # combhpfnoinact 
        do_work = partial(
                rotate_signal, 
                path_train_data="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact/",
                params={'add_bounds_rotation': 'True', 'rot_ang': angle, 'add_rotation': 'False'},
                rotation_path="/export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.rotate_bound_"+str(angle)+"_"+str(repetition_no)+"/",
                mask_path=None,#"/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/",
                verbose=0,
            )

        num_jobs = 8
        with ProcessPoolExecutor(num_jobs) as ex:
            results = list(ex.map(do_work, df_train_label['measurement_id']))

In [None]:
for repetition_no in [2, 3, 4, 5]:
    for angle in [5, 10, 15, 20, 25, 30, 35, 40, 45]:
        !ln -s /export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.rotate_bound_{angle}_{repetition_no}/ /home/sjoshi/codes/python/BeatPD/data/BeatPD/
        !./tsfresh/submit/create_scp_files.sh combhpfnoinact.rotate_bound_{angle}_{repetition_no}

### Launch Rotation for Comb HPF Noinact

In [None]:
path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

# Original 
do_work = partial(
        rotate_signal, 
        path_train_data="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact/",
        params={'add_rotation': 'True'},
        rotation_path="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact.rotate_5/",
        mask_path=None,#"/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/",
        verbose=0,
    )

num_jobs = 8
with ProcessPoolExecutor(num_jobs) as ex:
    results = list(ex.map(do_work, df_train_label['measurement_id']))

### Making more balanced dataset 

In [None]:

path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

df_train_label_1 = df_train_label[((df_train_label.dyskinesia >= 1.0) | (df_train_label.on_off >= 1.0) | (df_train_label.tremor >= 1.0)) & (df_train_label.on_off != 0)]

# combhpfnoinact 
do_work = partial(
        rotate_signal, 
        path_train_data="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact/",
        params={'add_rotation': 'True'},
        rotation_path="/export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.rotate_balance_1/",
        mask_path=None,#"/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/",
        verbose=0,
    )

num_jobs = 8
with ProcessPoolExecutor(num_jobs) as ex:
    results = list(ex.map(do_work, df_train_label_1['measurement_id']))

print('NEXT FOLDER')

df_train_label_3 = df_train_label[((df_train_label.dyskinesia >= 3.0) | (df_train_label.on_off >= 3.0) | (df_train_label.tremor >= 3.0)) & (df_train_label.on_off != 0)]

# combhpfnoinact 
do_work = partial(
        rotate_signal, 
        path_train_data="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact/",
        params={'add_rotation': 'True'},
        rotation_path="/export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.rotate_balance_3/",
        mask_path=None,#"/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/",
        verbose=0,
    )

num_jobs = 8
with ProcessPoolExecutor(num_jobs) as ex:
    results = list(ex.map(do_work, df_train_label_3['measurement_id']))
    
print('LAST FOLDER')
df_train_label_4 = df_train_label[((df_train_label.dyskinesia >= 3.0) | (df_train_label.on_off >= 3.0) | (df_train_label.tremor >= 3.0)) & (df_train_label.on_off != 0)]

do_work = partial(
        rotate_signal, 
        path_train_data="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact/",
        params={'add_rotation': 'True'},
        rotation_path="/export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.rotate_balance_4/",
        mask_path=None,#"/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/",
        verbose=0,
    )

num_jobs = 8
with ProcessPoolExecutor(num_jobs) as ex:
    results = list(ex.map(do_work, df_train_label_4['measurement_id']))

## Visualization of measurements before and after rotation

In [None]:
path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

list_measurement_id = ["5cf68c8e-0b7a-4b73-ad4f-015c7a20fb5a"]

list_measurement_id = [
    "2d852742-10a9-4c56-9f38-779f2cd66879",
    "4fc3c295-857f-4920-8fa5-f21bfdc7ab4f",
    "db2e053a-0fb8-4206-891a-6f079fb14e3a",
]

list_measurement_id = ["db2e053a-0fb8-4206-891a-6f079fb14e3a"]

df_train_label = interesting_patients(df_train_label=df_train_label,
                                      list_measurement_id=list_measurement_id)

load_data(measurement_id, path_train_data, params, rotation_path, factor, mask_path=None, verbose=0)
    
# df_train_data = load_data_all(df_train_label, params={'add_rotation': 'True', 'my_data_path': path_train_data})

## Stretching / Shrinking 

### "Manual" low pass filter

In [None]:
def butter_lowpass(cutOff, fs, order=5):
    nyq = 0.5 * fs
    normalCutoff = cutOff / nyq # makes cutoff between 0 and 1
#     print('normal cutoff : ', normalCutoff)
    b, a = butter(order, normalCutoff, btype='low', analog = False)
    return b, a

def butter_lowpass_filter(data, cutOff, fs, order=4):
    b, a = butter_lowpass(cutOff, fs, order=order)
    y = lfilter(b, a, data)
    return y

# FIXME: Not sure how to choose between filtfilt and lfilter.
# def butter_lowpass_filter(data, cutoff, fs, order):
#     normal_cutoff = cutoff / nyq
#     # Get the filter coefficients 
#     b, a = butter(order, normal_cutoff, btype='low', analog=False)
#     y = filtfilt(b, a, data)
#     return y

def apply_lowpass_filter(df_train_data):
    # Filter requirements.
    order = 10
    fs = 50.0  # sample rate, Hz
    # FIXME 0.9 should not be hardcoded. It's supposed to be the factor 
    cutoff = 25 * 0.9  # desired cutoff frequency of the filter, Hz 
    
    # Filter the data
    # X = [:,-3], Y = [:,-2], Z = [:,-1]
    X_filtered_data = butter_lowpass_filter(df_train_data.iloc[:,-3], cutoff, fs, order)
    Y_filtered_data = butter_lowpass_filter(df_train_data.iloc[:,-2], cutoff, fs, order)
    Z_filtered_data = butter_lowpass_filter(df_train_data.iloc[:,-1], cutoff, fs, order)

    return X_filtered_data, Y_filtered_data, Z_filtered_data

In [None]:
def resample_signal(measurement_id, path_train_data, resample_path, factor, mask_path=None, verbose=0): 
    """
    Function to stretch or shrink a signal  
    
    Keyword arguments: 
    - df_train_label: 
    - path_train_data: path to the data to load. Can provide original data or high_pass data
    - factor: use 0.90 to reduce the length of the recording of 10%. Use 1.10 to make the recording 10% longer.
    - mask_path: if provided, inactivity will be removed. 
    - verbose: default 0. If 1, plots will be printed. 
    """
    
    print("Reading " + path_train_data + measurement_id + ".csv")
    df_train_data = pd.read_csv(path_train_data + measurement_id + ".csv")
    
#         print('Y max orig: ', df_train_data.iloc[:,-2].max())
#         print('Y min orig: ', df_train_data.iloc[:,-2].min())

    if (verbose == 1) and (mask_path is not None):
        df_train_data.plot(x="Timestamp", legend=True, subplots=True, title="Before inactivity removal {0}".format(get_plot_title(idx, df_train_label)))
        plt.show()

    if mask_path is not None:
        df_train_data = apply_mask(path_train_data, measurement_id, mask_path)
#             print('Y max after inactivity removal: ', df_train_data.iloc[:,-2].max())
#             print('Y min after inactivity removal: ', df_train_data.iloc[:,-2].min())
    if factor < 1:
        X_filtered_data, Y_filtered_data, Z_filtered_data = apply_lowpass_filter(df_train_data)
    else:
        X_filtered_data = df_train_data.iloc[:,-3]
        Y_filtered_data = df_train_data.iloc[:,-2]
        Z_filtered_data = df_train_data.iloc[:,-1]

#         print('X len : ', int(len(X_filtered_data)*factor))
#         print('Y len : ', int(len(Y_filtered_data)*factor))
#         print('Z len : ', int(len(Z_filtered_data)*factor))

#         x_axis_data_type = "t" if data_type == "real" else "Timestamp"
#         time = df_train_data[x_axis_data_type]

#         df_allo =  pd.DataFrame(np.vstack([time,
#                                                 X_filtered_data,
#                                                 Y_filtered_data,
#                                                 Z_filtered_data]).T,columns= [x_axis_data_type, "X", "Y", "Z"])

#         print('AAAAAAAAAAAAAAAAAAAa')
#         df_allo.plot(x="Timestamp", legend=True, subplots=True, title="Shrink/Stretch of factor {0} on {1}".format(factor, get_plot_title(idx, df_train_label)))
#         plt.show()
#         print('bbbb')
#         print('Y max after lowpass : ', Y_filtered_data.max())
#         print('Y min after lowpass: ', Y_filtered_data.min())
    X_filtered_data = sg.resample(X_filtered_data, int(len(X_filtered_data)*factor))
    Y_filtered_data = sg.resample(Y_filtered_data, int(len(Y_filtered_data)*factor))
    Z_filtered_data = sg.resample(Z_filtered_data, int(len(Z_filtered_data)*factor))


#         print('Y max after resample: ', Y_filtered_data.max())
#         print('Y min after resample: ', Y_filtered_data.min())

    # Set the time axis. It's not the same name for the two databases
    x_axis_data_type = "t" if data_type == "real" else "Timestamp"
    time = df_train_data[x_axis_data_type]
    if factor <= 1:
        time = time[:len(X_filtered_data)]
    else:
        stop = time.iloc[len(time)-1] + ((len(X_filtered_data) - len(time)) * 0.02)
        array_time_add = np.arange(time.iloc[len(time)-1], stop, 0.02)
        if len(X_filtered_data) != (len(array_time_add) + len(time)):
            array_time_add = array_time_add[:-1]
#                 print('not equal')
#                 print('len(time) : ', len(time))
#                 print('len(array_time_add) : ', len(array_time_add))
#                 print('len(X_filtered_data) : ', len(X_filtered_data))
#             if len(X_filtered_data) % 2:
#                 array_time_add = array_time_add[:-1]
        time = time.append(pd.Series(array_time_add))

#             while len(time) != len(X_filtered_data):
#                 time = time.append(pd.Series([time.iloc[len(time)-1] + 0.02]))
#                 print(len(time))

#         print('len(time) : ', len(time))
#         print('len(X_filtered_data) : ', len(X_filtered_data))
#         print('len(Y_filtered_data) : ', len(Y_filtered_data))
#         print('len(Z_filtered_data) : ', len(Z_filtered_data))
    # Merge the dataframes together 
    df_low_pass =  pd.DataFrame(np.vstack([time,
                                            X_filtered_data,
                                            Y_filtered_data,
                                            Z_filtered_data]).T,columns= [x_axis_data_type, "X", "Y", "Z"])

    # If the high_pass folder doesn't exists, we need to create it 
    if not os.path.exists(resample_path):
        os.makedirs(resample_path)
        print('The resample folder was created : ', resample_path)
    
    # Save to a folder 
    df_low_pass.to_csv(
        resample_path + measurement_id + ".csv",
        index=False
    )

#         if verbose == 1:
#             df_train_data.plot(x="Timestamp", legend=True, subplots=True, title=get_plot_title(idx, df_train_label))
#             plt.show()

#             df_low_pass.plot(x="Timestamp", legend=True, subplots=True, title="Shrink/Stretch of factor {0} on {1}".format(factor, get_plot_title(idx, df_train_label)))
#             plt.show()

### Generate the HPF + Inactivity Removed data augmentation: shrinking with a 0.9 factor

In [None]:
# path_train_data, df_train_label = define_data_type(data_type,
#                                                    data_dir,
#                                                    training_or_ancillary)

# path_train_data = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass/"
# mask_path = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/"
# factor = 0.9
# number = 2
# resample_path = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask.resample_{0}_{1}/".format(factor, number)

# resample_signal(df_train_label, path_train_data, resample_path, factor, mask_path, verbose=0)

In [None]:
path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

print(df_train_label['measurement_id'])
path_train_data = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass/"
mask_path = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/"
factor = 1.1
resample_path = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask.resample_{0}/".format(factor)

# resample_signal(df_train_label, path_train_data, resample_path, factor, mask_path, verbose=0)

number = 1

do_work = partial(
        resample_signal, 
        path_train_data="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass/",
        resample_path="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact.resample_{0}_{1}/".format(factor, number),
        factor=1.15, 
        mask_path="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/",
        verbose=0,
    )

num_jobs = 8
with ProcessPoolExecutor(num_jobs) as ex:
    results = list(ex.map(do_work, df_train_label['measurement_id']))

In [None]:
path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

factor = 0.95
number = 1

do_work = partial(
        resample_signal, 
        path_train_data="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass/",
        resample_path="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact.resample_{0}_{1}/".format(factor, number),
        factor=0.95, 
        mask_path="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/",
        verbose=0,
    )

num_jobs = 8
with ProcessPoolExecutor(num_jobs) as ex:
    results = list(ex.map(do_work, df_train_label['measurement_id']))

In [None]:
path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

factor = 1.05
number = 1

do_work = partial(
        resample_signal, 
        path_train_data="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass/",
        resample_path="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact.resample_{0}_{1}/".format(factor, number),
        factor=1.05, 
        mask_path="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/",
        verbose=0,
    )

num_jobs = 8
with ProcessPoolExecutor(num_jobs) as ex:
    results = list(ex.map(do_work, df_train_label['measurement_id']))

In [None]:
!ls -l '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data/' | egrep -c '^-'
!ls -l '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass/' | egrep -c '^-'
!ls -l '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact.resample_0.85_1/' | egrep -c '^-'
!ls -l '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact.resample_1.15_1/' | egrep -c '^-'
!ls -l '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact.resample_0.95_1/' | egrep -c '^-'
!ls -l '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact.resample_1.05_1/' | egrep -c '^-'


!ls -l '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact.noise_mu_0_sig_0.1/' | egrep -c '^-'


In [None]:
path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

do_work = partial(
        resample_signal, 
        path_train_data="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data/",
        resample_path="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.resample_{0}/".format(0.9),
        factor=0.9, 
        mask_path=None,
        verbose=0,
    )

num_jobs = 8
with ProcessPoolExecutor(num_jobs) as ex:
    results = list(ex.map(do_work, df_train_label['measurement_id']))

In [None]:
path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

do_work = partial(
        resample_signal, 
        path_train_data="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data/",
        resample_path="/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.resample_{0}/".format(1.1),
        factor=1.1, 
        mask_path=None,
        verbose=0,
    )

num_jobs = 8
with ProcessPoolExecutor(num_jobs) as ex:
    results = list(ex.map(do_work, df_train_label['measurement_id']))

### Playing around with stretching/shrinking on specific measurements 

In [None]:
## Import up sound alert dependencies
from IPython.display import Audio, display

def allDone():
    display(Audio(url='https://sound.peal.io/ps/audios/000/000/537/original/woo_vu_luvub_dub_dub.wav', autoplay=True))



### Time domain to frequency + Plot

Source: https://www.oreilly.com/library/view/elegant-scipy/9781491922927/ch04.html

In [None]:
from scipy import fftpack

df_train_data = pd.read_csv(path_train_data + "cc7b822c-e310-46f0-a8ea-98c95fdb67a1" + ".csv")
x = df_train_data.iloc[:,-3]
f_s = 50
X = fftpack.fft(df_train_data.iloc[:,-3])
print(df_train_data.iloc[:,-3])

freqs = fftpack.fftfreq(len(x)) * f_s
print(len(freqs))
fig, ax = plt.subplots()

ax.stem(freqs, np.abs(X))
ax.set_xlabel('Frequency in Hertz [Hz]')
ax.set_ylabel('Frequency Domain (Spectrum) Magnitude')
ax.set_xlim(-f_s / 2, f_s / 2)
ax.set_ylim(-5, 110)

allDone()

### Stretching Visualization

In [None]:
path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

list_measurement_id = ["5cf68c8e-0b7a-4b73-ad4f-015c7a20fb5a"]


# list_measurement_id = [
#     "2d852742-10a9-4c56-9f38-779f2cd66879",
#     "4fc3c295-857f-4920-8fa5-f21bfdc7ab4f",
#     "db2e053a-0fb8-4206-891a-6f079fb14e3a",
# ]

# Filter df_train_label according to the measurement_id we are most interested in
df_train_label = interesting_patients(df_train_label=df_train_label,
                                      list_measurement_id=list_measurement_id)

plot_accelerometer(df_train_label=df_train_label,
                   data_type=data_type,
                   path_train_data=path_train_data,
                   path_accelerometer_plots=path_save_accelerometer_plots)


for idx in df_train_label.index:
    #y = librosa.core.resample(x, 4000, 16000)
    df_train_data = pd.read_csv(path_train_data + df_train_label["measurement_id"][idx] + ".csv")
    
    display(df_train_data)
    
    df_train_data["Timestamp"] = pd.to_datetime(df_train_data["Timestamp"], unit='s')
    display(df_train_data)
    df_train_data = df_train_data.set_index(df_train_data.Timestamp)
    print(type(df_train_data["Timestamp"]))
    #df_train_data_minute = df_train_data.resample('S').sum()
    df_train_data_minute = df_train_data.resample('S').mean()
    display(df_train_data_minute)
    
    df_train_data_minute.index = np.arange(0,len(df_train_data_minute))
    df_train_data_minute.plot(subplots=True)#(x="Timestamp", legend=True, subplots=True, title="Noise added")
    plt.show()
    

In [None]:
df_train_data_minute.iloc[:,-2:]

In [None]:
df = pd.DataFrame(np.arange(0,1200))
df_train_data_minute.index = np.arange(0,1200)
df_train_data_minute
df_train_data_minute.plot()#(x="Timestamp", legend=True, subplots=True, title="Noise added")
plt.show()

## Generate Noise Data Augmentation

* Does adding noise also act as an offset removal? 
  * I guess it adds a noise centered at 0 so it kind of does 
  
We also have two new hyperparameters here:
* `mu` (mean) : Centre of the distribution
* `sigma` (variance) : Strandard deviation (spread, or width or the distribution)


Sources:
* https://stackoverflow.com/questions/46093073/adding-gaussian-noise-to-a-dataset-of-floating-points-and-save-it-python 

In [None]:
def add_noise(df_train_label, noise_path, path_train_data, data_type, mask_path=None, mu=0, sigma=0.1):
    """
    TODO
    
    Keyword Arguments:
    - mask_path: Provide if you want to remove inactivity 
    """

    for idx in df_train_label.index:
        print(df_train_label["measurement_id"][idx])
        df_train_data = pd.read_csv(path_train_data + df_train_label["measurement_id"][idx] + ".csv")
        
        if mask_path is not None:
            df_train_data = apply_mask(path_train_data, df_train_label["measurement_id"][idx], mask_path)
        # creating a noise with the same dimension as the recording
        df_noise = np.random.normal(mu, sigma, df_train_data.iloc[:,-3:].shape)#[59805,3]) 

        df_signal = df_train_data.iloc[:,-3:] + df_noise
        df_signal = pd.concat([df_train_data.iloc[:, 0], df_signal], axis=1)

        # If the high_pass folder doesn't exists, we need to create it 
        if not os.path.exists(noise_path):
            os.makedirs(noise_path)
            print('The noise folder was created : ', noise_path)

        # Save to a folder 
        df_signal.to_csv(
            noise_path + df_train_label["measurement_id"][idx] + ".csv",
            index=False
        )

### Create noise augmented csv files (on original data)

In [None]:
path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

noise_path = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.noise_mu_0_sig_0.1/"
data_type = "cis"

add_noise(df_train_label, noise_path, path_train_data, data_type)

### Create noise augmented csv (on high pass filtered + inactivity removed data)

### On original data

In [None]:
for sigma in [0.01,0.001]:#[0.1, 0.2, 0.3, 0.4, 0.5]: 
    path_train_data, df_train_label = define_data_type(data_type,
                                                       data_dir,
                                                       training_or_ancillary)

    # Redefine manually the path because we want the data where high pass was already applied
    path_train_data = '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data/'
    noise_path = "/export/fs02/mpgill/BeatPD/cis-pd.training_data.noise_mu_0_sig_{0}_1/".format(sigma)
    data_type = "cis"

    add_noise(df_train_label, noise_path, path_train_data, data_type, mu=0, sigma=sigma)

In [None]:
!ln -s /export/fs02/mpgill/BeatPD/cis-pd.training_data.noise_mu_0_sig_0.1_1/ /home/sjoshi/codes/python/BeatPD/data/BeatPD/
!ln -s /export/fs02/mpgill/BeatPD/cis-pd.training_data.noise_mu_0_sig_0.2_1/ /home/sjoshi/codes/python/BeatPD/data/BeatPD/
!ln -s /export/fs02/mpgill/BeatPD/cis-pd.training_data.noise_mu_0_sig_0.3_1/ /home/sjoshi/codes/python/BeatPD/data/BeatPD/
!ln -s /export/fs02/mpgill/BeatPD/cis-pd.training_data.noise_mu_0_sig_0.4_1/ /home/sjoshi/codes/python/BeatPD/data/BeatPD/
!ln -s /export/fs02/mpgill/BeatPD/cis-pd.training_data.noise_mu_0_sig_0.5_1/ /home/sjoshi/codes/python/BeatPD/data/BeatPD/
!./tsfresh/submit/create_scp_files.sh noise_mu_0_sig_0.1_1
!./tsfresh/submit/create_scp_files.sh noise_mu_0_sig_0.2_1
!./tsfresh/submit/create_scp_files.sh noise_mu_0_sig_0.3_1
!./tsfresh/submit/create_scp_files.sh noise_mu_0_sig_0.4_1
!./tsfresh/submit/create_scp_files.sh noise_mu_0_sig_0.5_1

### On Combhpfnoinact data 

In [None]:
!ln -s /export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.noise_mu_0_sig_0.4_1/ /home/sjoshi/codes/python/BeatPD/data/BeatPD/
!ln -s /export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.noise_mu_0_sig_0.5_1/ /home/sjoshi/codes/python/BeatPD/data/BeatPD/


In [None]:
for sigma in [0.01, 0.001]:#[0.4, 0.5]: #missing 0.2 and 0.3
    path_train_data, df_train_label = define_data_type(data_type,
                                                       data_dir,
                                                       training_or_ancillary)

    # Redefine manually the path because we want the data where high pass was already applied
    path_train_data = '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass/'
    mask_path = '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/'
    noise_path = "/export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.noise_mu_0_sig_{0}_1/".format(sigma)
    data_type = "cis"

    add_noise(df_train_label, noise_path, path_train_data, data_type, mask_path, mu=0, sigma=sigma)

In [None]:
!ln -s /export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.noise_mu_0_sig_0.01_1/ /home/sjoshi/codes/python/BeatPD/data/BeatPD/
!ln -s /export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.noise_mu_0_sig_0.001_1/ /home/sjoshi/codes/python/BeatPD/data/BeatPD/


#### For curiosity, what happens if mu=0.1, sigma=0.1? That's a lot more noise 

In [None]:
path_train_data, df_train_label = define_data_type(data_type,
                                                       data_dir,
                                                       training_or_ancillary)

# Redefine manually the path because we want the data where high pass was already applied
path_train_data = '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass/'
mask_path = '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/'
noise_path = "/export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.noise_mu_0.1_sig_0.1_1/".format(sigma)
data_type = "cis"

add_noise(df_train_label, noise_path, path_train_data, data_type, mask_path, mu=0.1, sigma=0.1)

!ln -s /export/fs02/mpgill/BeatPD/cis-pd.training_data.combhpfnoinact.noise_mu_0.1_sig_0.1_1/ /home/sjoshi/codes/python/BeatPD/data/BeatPD/

#### Repeat 5 times mu = 0, sigma = 0.1

In [None]:
for no in [1, 2, 3, 4, 5]:
    path_train_data, df_train_label = define_data_type(data_type,
                                                       data_dir,
                                                       training_or_ancillary)

    # Redefine manually the path because we want the data where high pass was already applied
    path_train_data = '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass/'
    mask_path = '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/'
    noise_path = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact.noise_mu_0_sig_0.1_{0}/".format(no)
    data_type = "cis"

    add_noise(df_train_label, noise_path, path_train_data, data_type, mask_path)

### Added noise visualization

In [None]:
noise_path_5 = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact.noise_mu_0_sig_0.1_5/"
noise_path_2 = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact.noise_mu_0_sig_0.1_2/"
path_train_comb = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact/"
df_train_data = pd.read_csv(path_train_comb + "2d852742-10a9-4c56-9f38-779f2cd66879" + ".csv")
df_train_data_5 = pd.read_csv(noise_path_5 + "2d852742-10a9-4c56-9f38-779f2cd66879" + ".csv")
df_train_data_2 = pd.read_csv(noise_path_2 + "2d852742-10a9-4c56-9f38-779f2cd66879" + ".csv")

In [None]:
df_train_data.describe()

In [None]:
df_train_data_5.describe()

In [None]:
display(df_train_data_5)

In [None]:
display(df_train_data_2)

In [None]:
import re
def plot_data_aug(df_train_label, path, aug_type, title=None, path_accelerometer_plots=None):
    for idx in df_train_label.index:
        df_train_data = pd.read_csv(path + df_train_label["measurement_id"][idx] + ".csv")
        display(df_train_data.describe())
        if aug_type == "noise":
            m = re.search('(?<=mu_)[\d+.]+', path)
            mu = m.group(0)
            m = re.search('(?<=sig_)[\d+.]+', path)
            sigma = m.group(0)
            plot_title = "Noise added. Mean="+mu+", sigma = "+sigma
        elif aug_type == "rotation":
            plot_title = title
        elif aug_type == "resampling":
            m = re.search('(?<=resample_)[\d+.]+', path)
            factor = m.group(0)
            plot_title = "Resampling with factor "+factor
        df_train_data.plot(x="Timestamp", legend=True, subplots=True, title=plot_title)
        # Save plotted graph with the measurement_id as name of the file
        if path_accelerometer_plots is not None:
            plt.savefig(path_accelerometer_plots + plot_title.replace(" ", "_").replace(",","").replace(".","").replace("=","_")+"_"+df_train_label["measurement_id"][idx] + ".png")
            plt.savefig(path_accelerometer_plots + plot_title.replace(" ", "_").replace(",","").replace(".","").replace("=","_")+"_"+df_train_label["measurement_id"][idx]+ ".pdf")
        plt.show()

In [None]:
path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

list_measurement_id = ["5cf68c8e-0b7a-4b73-ad4f-015c7a20fb5a"]


list_measurement_id = [
    "2d852742-10a9-4c56-9f38-779f2cd66879",
    "4fc3c295-857f-4920-8fa5-f21bfdc7ab4f",
    "db2e053a-0fb8-4206-891a-6f079fb14e3a",
]

list_measurement_id = [
    "5cf68c8e-0b7a-4b73-ad4f-015c7a20fb5a",
    "cc7b822c-e310-46f0-a8ea-98c95fdb67a1",
    "5163afe8-a6b0-4ea4-b2ba-9b4501dd5912",
    "db2e053a-0fb8-4206-891a-6f079fb14e3a",
    "2d852742-10a9-4c56-9f38-779f2cd66879",
    "2e3a4c9c-ff01-4a28-bfcf-ce9b7633a39d",  # no inactivity should be removed
    "3cf49c01-0499-4bad-9167-67691711204a",  # no inactivity should be removed PAS LA??
    "3d0f965c-9d72-43d1-9369-1ea3acf963cc",  # PAS LA ???
    "4b269cc2-8f0c-4816-adbf-10c0069b8833",
    "4bc51b90-bfce-4231-85e1-5de3b4bc0745",
    "4fc3c295-857f-4920-8fa5-f21bfdc7ab4f",
]

list_measurement_id = ["3cf49c01-0499-4bad-9167-67691711204a"]

# Filter df_train_label according to the measurement_id we are most interested in
df_train_label = interesting_patients(df_train_label=df_train_label,
                                      list_measurement_id=list_measurement_id)

path_train_data = '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.combhpfnoinact/'

plot_accelerometer(df_train_label=df_train_label,
                   data_type=data_type,
                   path_train_data=path_train_data,
                   path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/")

data_path="/home/sjoshi/codes/python/BeatPD/data/BeatPD/"

# Noise
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.noise_mu_0_sig_0.1/", "noise",
#               path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/noise/")
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.noise_mu_0_sig_0.2_1/", "noise",
#              path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/noise/")
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.noise_mu_0_sig_0.3_1/", "noise",
#              path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/noise/")
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.noise_mu_0_sig_0.4_1/", "noise",
#              path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/noise/")
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.noise_mu_0_sig_0.5_1/", "noise",
#              path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/noise/")

# # Rotation 
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.rotate_1/", "rotation", "Rotation 1 [-45,45[",
#              path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/rotation/")
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.rotate_2/", "rotation", "Rotation 2 [-45,45[",
#              path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/rotation/")
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.rotate_3/", "rotation", "Rotation 3 [-45,45[",
#              path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/rotation/")
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.rotate_bound_30/", "rotation", "Rotation Bounds [-30, -25[, [25, 30[",
#              path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/rotation/")

# # Resample 
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.resample_0.85_1/", "resampling",
#              path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/resampling/")
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.resample_0.9/", "resampling",
#              path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/resampling/")
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.resample_0.95_1/", "resampling",
#              path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/resampling/")
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.resample_1.05_1/", "resampling",
#              path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/resampling/")
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.resample_1.1/", "resampling",
#              path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/resampling/")
# plot_data_aug(df_train_label, data_path+"cis-pd.training_data.combhpfnoinact.resample_1.15_1/", "resampling",
#              path_accelerometer_plots="/export/b19/mpgill/BeatPD_data_aug_plots/resampling/")

# Drafts

### This measurement didn't have any high pass for some reason 

In [None]:
data_type = "cis"
path_train_data, df_train_label = define_data_type(data_type, data_dir, 'training_data')

list_measurement_id = ["dc90dc36-b4e5-43ec-b3e8-47c39c763c71"]
high_pass_path = '/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass/'

# Filter df_train_label according to the measurement_id we are most interested in
df_train_label = interesting_patients(df_train_label=df_train_label, list_measurement_id=list_measurement_id)
print(df_train_label)

high_pass_filter(df_train_label, high_pass_path, path_train_data, data_type)

### Example on how to do rotation on only one measurement 

In [None]:
# print(df_train_label['measurement_id'])
# path_train_data = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass/"
# mask_path = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask/"
# factor = 1.1
# resample_path = "/home/sjoshi/codes/python/BeatPD/data/BeatPD/cis-pd.training_data.high_pass_mask.resample_{0}/".format(factor)


# rotate_signal(measurement_id, path_train_data, params, rotation_path, factor, mask_path=None, verbose=0):


### Using decimate of scipy

In [None]:


path_train_data, df_train_label = define_data_type(data_type,
                                                   data_dir,
                                                   training_or_ancillary)

list_measurement_id = ["5cf68c8e-0b7a-4b73-ad4f-015c7a20fb5a"]


df_train_label = interesting_patients(df_train_label=df_train_label,
                                      list_measurement_id=list_measurement_id)

for idx in df_train_label.index:
    df_train_data = pd.read_csv(path_train_data + df_train_label["measurement_id"][idx] + ".csv")
    x = df_train_data.iloc[:,-3]
    print(df_train_data.shape)
    z = sg.decimate(x, 2, zero_phase=True)
    print((z))