In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os

import xgboost
from sklearn import metrics

from scipy.signal import argrelmax, argrelmin

import pickle

## Dataset pre-processing

### Misc Data

In [2]:
defog_meta = pd.read_csv('./defog_metadata.csv')
tdcsfog_meta = pd.read_csv('./tdcsfog_metadata.csv')
sub_meta = pd.read_csv('./subjects.csv')
sub_meta = sub_meta.drop_duplicates(subset=['Subject'])

In [3]:
defog_meta.loc[defog_meta['Id'] == '02ab235146']

Unnamed: 0,Id,Subject,Visit,Medication
0,02ab235146,ab54e1,2,on


In [4]:
sub_meta.loc[sub_meta['Subject'] == 'ab54e1']

Unnamed: 0,Subject,Visit,Age,Sex,YearsSinceDx,UPDRSIII_On,UPDRSIII_Off,NFOGQ
93,ab54e1,1.0,79,F,8.0,52.0,55.0,28


In [5]:
tdcsfog_meta.loc[tdcsfog_meta['Id'] == '003f117e14']

Unnamed: 0,Id,Subject,Visit,Test,Medication
0,003f117e14,13abfd,3,2,on


In [6]:
sub_meta.loc[sub_meta['Subject'] == '13abfd']

Unnamed: 0,Subject,Visit,Age,Sex,YearsSinceDx,UPDRSIII_On,UPDRSIII_Off,NFOGQ
10,13abfd,,68,F,9.0,17.0,15.0,15


In [7]:
def extract_45(single_waveform, sample_rate=100):
    def __next_pow2(x):
        return 1<<(x-1).bit_length()
    features = []
    maxima_index = argrelmax(np.array(single_waveform))[0]
    minima_index = argrelmin(np.array(single_waveform))[0]
    derivative_1 = np.diff(single_waveform, n=1) * float(sample_rate)
    derivative_2 = np.diff(single_waveform, n=2) * float(sample_rate)
    sp_mag = np.abs(np.fft.fft(single_waveform, n=__next_pow2(len(single_waveform))*16))
    freqs = np.fft.fftfreq(len(sp_mag))
    sp_mag_maxima_index = argrelmax(sp_mag)[0]
    # x 0
    x = single_waveform[maxima_index[0]]
    features.append(x)
    # y 1
    y = single_waveform[maxima_index[1]]
    features.append(y)
    # z 2
    z = single_waveform[minima_index[0]]
    features.append(z)
    # f_base 39
    f_base = freqs[sp_mag_maxima_index[0]] * sample_rate
    features.append(f_base)
    # sp_mag_base 40
    sp_mag_base = sp_mag[sp_mag_maxima_index[0]] / len(single_waveform)
    features.append(sp_mag_base)
    # f_2 41
    f_2 = freqs[sp_mag_maxima_index[1]] * sample_rate
    features.append(f_2)
    # sp_mag_2 42
    sp_mag_2 = sp_mag[sp_mag_maxima_index[1]] / len(single_waveform)
    features.append(sp_mag_2)
    # f_3 43
    f_3 = freqs[sp_mag_maxima_index[2]] * sample_rate
    features.append(f_3)
    # sp_mag_3 44
    sp_mag_3 = sp_mag[sp_mag_maxima_index[2]] / len(single_waveform)
    features.append(sp_mag_3)
    return features


In [8]:
def calFeatMat(sig,window=400,sr=100):

    featmat = np.zeros((len(sig),9))
    featflag = np.zeros((len(sig)),dtype=np.bool_)
    for i in range(window,len(sig)):
        tsig = sig[i:window+i]
        try:
            featmat[window+i] = extract_45(tsig,sample_rate=sr)
            featflag[window+i] = True
        except:
            pass

    received = False
    for i in np.flip(np.arange(len(sig))):
        if not featflag[i]:
            if not received:
                continue
            featmat[i] = tval
            featflag[i] = True
        if featflag[i]:
            tval = featmat[i]
            received = True

    received = False
    for i in np.arange(len(sig)):
        if not featflag[i]:
            if not received:
                continue
            featmat[i] = tval
            featflag[i] = True
        if featflag[i]:
            tval = featmat[i]
            received = True
            
    return featmat

def accFeatMats(pdat,sr=100):
    accML = calFeatMat(pdat['AccML'].to_numpy(),sr)

    clms = ['0','1','2','39','40','41','42','43','44']
    
    allFeat = accML
    return pd.DataFrame(allFeat,columns=clms)

## Train dataset

In [9]:
split_cols = ['Time', 'AccV', 'AccML', 'AccAP', 'StartHesitation', 'Turn', 'Walking', 'Subject', 'Medication', 'Age', 'Sex', 'YearsSinceDx', 'NFOGQ', '0','1','2','39','40','41','42','43','44']
mod_cols = ['AccAP','0','2','40','1','Age','39','42','44','YearsSinceDx','NFOGQ','Medication','AccV','AccML','41','43']

### defog

In [10]:
drloc = './train/defog/'
dataset_defog = pd.DataFrame()
for i in os.listdir(drloc):
    fnm = drloc + i
    pdat = pd.read_csv(fnm)
    feats = accFeatMats(pdat,100)
    pdat = pd.concat([pdat,feats],axis=1)
    pdat['ID'] = i.split('.')[0]
    dataset_defog = pd.concat([dataset_defog,pdat])

In [11]:
dataset_defog_f = dataset_defog.loc[dataset_defog['Valid'] == True]
dataset_defog_f = dataset_defog_f.loc[dataset_defog_f['Task'] == True]
dataset_defog_f = dataset_defog_f.drop(['Valid','Task'],axis=1)
dataset_defog_f = dataset_defog_f.reset_index(drop=True)

In [12]:
dataset_defog_sub = dataset_defog_f.merge(defog_meta,left_on='ID',right_on='Id').drop(['Visit'],axis=1)
dataset_defog_sub = dataset_defog_sub.merge(sub_meta,left_on='Subject',right_on='Subject',how='left')
dataset_defog_sub = dataset_defog_sub.drop(['ID','Id','Visit','UPDRSIII_On','UPDRSIII_Off'],axis=1)

# Sex male and female converted to -1 and 1
dataset_defog_sub = dataset_defog_sub.replace('M',-1)
dataset_defog_sub = dataset_defog_sub.replace('F',1)

# Medication on and off converted to 1 and -1
dataset_defog_sub = dataset_defog_sub.replace('on',1)
dataset_defog_sub = dataset_defog_sub.replace('off',-1)

# Remove NaN
dataset_defog_sub = dataset_defog_sub.dropna()

# dataset_defog_sub.to_csv('combined_defog_m.csv',index=False)
# dataset_defog_sub

In [13]:
idsl = pd.DataFrame(dataset_defog_sub['Subject'].unique())
ids_train = idsl.sample(frac = 0.8)
ids_valid = idsl.drop(ids_train.index)

train_defog = pd.DataFrame(columns=split_cols)
for v in ids_train[0]:
    tpddf = dataset_defog_sub.loc[dataset_defog_sub['Subject'] == v]
    train_defog = pd.concat([train_defog,tpddf])
train_defog = train_defog.drop(['Subject'],axis=1)
train_defog = train_defog.reset_index(drop=True)
train_defog = train_defog.astype('float64')
X_train_defog = train_defog[mod_cols]
y_train_defog = train_defog[['StartHesitation', 'Turn', 'Walking']]

valid_defog = pd.DataFrame(columns=split_cols)
for v in ids_valid[0]:
    tpddf = dataset_defog_sub.loc[dataset_defog_sub['Subject'] == v]
    valid_defog = pd.concat([valid_defog,tpddf])
valid_defog = valid_defog.drop(['Subject'],axis=1)
valid_defog = valid_defog.reset_index(drop=True)
valid_defog = valid_defog.astype('float64')
X_valid_defog = valid_defog[mod_cols]
y_valid_defog = valid_defog[['StartHesitation', 'Turn', 'Walking']]

X_defog = pd.concat([X_train_defog,X_valid_defog])
y_defog = pd.concat([y_train_defog,y_valid_defog])

### tdcsfog

In [15]:
drloc = './train/tdcsfog/'
dataset_tdcsfog = pd.DataFrame()
for i in os.listdir(drloc):
    fnm = drloc + i
    pdat = pd.read_csv(fnm)
    feats = accFeatMats(pdat,128)
    pdat = pd.concat([pdat,feats],axis=1)
    pdat['ID'] = i.split('.')[0]
    dataset_tdcsfog = pd.concat([dataset_tdcsfog,pdat])

In [16]:
dataset_tdcsfog_f = dataset_tdcsfog.reset_index(drop=True)

In [17]:
dataset_tdcsfog_sub = dataset_tdcsfog_f.merge(tdcsfog_meta,left_on='ID',right_on='Id').drop(['Visit'],axis=1)
dataset_tdcsfog_sub = dataset_tdcsfog_sub.merge(sub_meta,left_on='Subject',right_on='Subject',how='left')
dataset_tdcsfog_sub = dataset_tdcsfog_sub.drop(['ID','Id','Visit','Test','UPDRSIII_On','UPDRSIII_Off'],axis=1)

# Sex male and female converted to -1 and 1
dataset_tdcsfog_sub = dataset_tdcsfog_sub.replace('M',-1)
dataset_tdcsfog_sub = dataset_tdcsfog_sub.replace('F',1)


# Medication on and off converted to 1 and -1
dataset_tdcsfog_sub = dataset_tdcsfog_sub.replace('on',1)
dataset_tdcsfog_sub = dataset_tdcsfog_sub.replace('off',-1)

# Remove NaN
dataset_tdcsfog_sub = dataset_tdcsfog_sub.dropna()

# dataset_tdcsfog_sub.to_csv('combined_tdcsfog_m.csv',index=False)
# dataset_tdcsfog_sub

In [18]:
idsl = pd.DataFrame(dataset_tdcsfog_sub['Subject'].unique())
ids_train = idsl.sample(frac = 0.8)
ids_valid = idsl.drop(ids_train.index)

train_tdcsfog = pd.DataFrame(columns=split_cols)
for v in ids_train[0]:
    tpddf = dataset_tdcsfog_sub.loc[dataset_tdcsfog_sub['Subject'] == v]
    train_tdcsfog = pd.concat([train_tdcsfog,tpddf])
train_tdcsfog = train_tdcsfog.drop(['Subject'],axis=1)
train_tdcsfog = train_tdcsfog.reset_index(drop=True)
train_tdcsfog = train_tdcsfog.astype('float64')
X_train_tdcsfog = train_tdcsfog[mod_cols]
y_train_tdcsfog = train_tdcsfog[['StartHesitation', 'Turn', 'Walking']]

valid_tdcsfog = pd.DataFrame(columns=split_cols)
for v in ids_valid[0]:
    tpddf = dataset_tdcsfog_sub.loc[dataset_tdcsfog_sub['Subject'] == v]
    valid_tdcsfog = pd.concat([valid_tdcsfog,tpddf])
valid_tdcsfog = valid_tdcsfog.drop(['Subject'],axis=1)
valid_tdcsfog = valid_tdcsfog.reset_index(drop=True)
valid_tdcsfog = valid_tdcsfog.astype('float64')
X_valid_tdcsfog = valid_tdcsfog[mod_cols]
y_valid_tdcsfog = valid_tdcsfog[['StartHesitation', 'Turn', 'Walking']]

X_tdcsfog = pd.concat([X_train_tdcsfog,X_valid_tdcsfog])
y_tdcsfog = pd.concat([y_train_tdcsfog,y_valid_tdcsfog])

## Test data

### defog

In [None]:
drloc = './test/defog/'
dataset_defog_test = pd.DataFrame()
for i in os.listdir(drloc):
    fnm = drloc + i
    pdat = pd.read_csv(fnm)
    feats = accFeatMats(pdat,100)
    pdat = pd.concat([pdat,feats],axis=1)
    pdat['ID'] = i.split('.')[0]
    dataset_defog_test = pd.concat([dataset_defog_test,pdat])

In [None]:
dataset_defog_test_f = dataset_defog_test.reset_index(drop=True)

In [None]:
dataset_defog_sub_test = dataset_defog_test_f.merge(defog_meta,left_on='ID',right_on='Id').drop(['Visit'],axis=1)
dataset_defog_sub_test = dataset_defog_sub_test.merge(sub_meta,left_on='Subject',right_on='Subject',how='left')
dataset_defog_sub_test = dataset_defog_sub_test.drop(['Id','Visit','UPDRSIII_On','UPDRSIII_Off'],axis=1)

# Sex male and female converted to -1 and 1
dataset_defog_sub_test = dataset_defog_sub_test.replace('M',-1)
dataset_defog_sub_test = dataset_defog_sub_test.replace('F',1)


# Medication on and off converted to 1 and -1
dataset_defog_sub_test = dataset_defog_sub_test.replace('on',1)
dataset_defog_sub_test = dataset_defog_sub_test.replace('off',-1)

# dataset_defog_sub_test.to_csv('test_defog_m.csv',index=False)
# dataset_defog_sub_test

In [None]:
test_defog = dataset_defog_sub_test.drop(['Subject','ID'],axis=1)
test_defog = test_defog.reset_index(drop=True)
test_defog = test_defog.astype('float64')
X_test_defog = test_defog[mod_cols]

# Remove NaN
X_test_defog = X_test_defog.dropna()

X_test_defog

### tdcsfog

In [None]:
drloc = './test/tdcsfog/'
dataset_tdcsfog_test = pd.DataFrame()
for i in os.listdir(drloc):
    fnm = drloc + i
    pdat = pd.read_csv(fnm)
    feats = accFeatMats(pdat,100)
    pdat = pd.concat([pdat,feats],axis=1)
    pdat['ID'] = i.split('.')[0]
    dataset_tdcsfog_test = pd.concat([dataset_tdcsfog_test,pdat])

In [None]:
dataset_tdcsfog_test_f = dataset_tdcsfog_test.reset_index(drop=True)

In [None]:
dataset_tdcsfog_sub_test = dataset_tdcsfog_test_f.merge(tdcsfog_meta,left_on='ID',right_on='Id').drop(['Visit'],axis=1)
dataset_tdcsfog_sub_test = dataset_tdcsfog_sub_test.merge(sub_meta,left_on='Subject',right_on='Subject',how='left')
dataset_tdcsfog_sub_test = dataset_tdcsfog_sub_test.drop(['Id','Visit','UPDRSIII_On','UPDRSIII_Off'],axis=1)

# Sex male and female converted to -1 and 1
dataset_tdcsfog_sub_test = dataset_tdcsfog_sub_test.replace('M',-1)
dataset_tdcsfog_sub_test = dataset_tdcsfog_sub_test.replace('F',1)


# Medication on and off converted to 1 and -1
dataset_tdcsfog_sub_test = dataset_tdcsfog_sub_test.replace('on',1)
dataset_tdcsfog_sub_test = dataset_tdcsfog_sub_test.replace('off',-1)

# dataset_tdcsfog_sub_test.to_csv('test_tdcsfog_m.csv',index=False)
# dataset_tdcsfog_sub_test

In [None]:
test_tdcsfog = dataset_tdcsfog_sub_test.drop(['Subject','ID'],axis=1)
test_tdcsfog = test_tdcsfog.reset_index(drop=True)
test_tdcsfog = test_tdcsfog.astype('float64')
X_test_tdcsfog = test_tdcsfog[mod_cols]

# Remove NaN
X_test_tdcsfog = X_test_tdcsfog.dropna()

X_test_tdcsfog

## Model

#### XGBoost defog

In [None]:
xg_defog = xgboost.XGBClassifier(n_jobs=20)
xg_defog.fit(X_train_defog[mod_cols],y_train_defog)

In [None]:
y_pred_defog = xg_defog.predict(X_valid_defog[mod_cols])
tcol = np.zeros((len(y_pred_defog),1))
tmat = np.hstack((tcol, y_pred_defog))
tmat[:,0] = 1 - (tmat[:,1]+tmat[:,2]+tmat[:,3])
y_pred_defog = tmat

y_pred_val = np.argmax(y_pred_defog,axis=1)

y_valid_defog_np = y_valid_defog
tcol = np.zeros((len(y_valid_defog_np),1))
tmat = np.hstack((tcol, y_valid_defog_np))
tmat[:,0] = 1 - (tmat[:,1]+tmat[:,2]+tmat[:,3])
y_pred_defog_np = tmat
y_valid_val = np.argmax(y_valid_defog_np,axis=1)

print('Accuracy: ',metrics.accuracy_score(y_valid_val,y_pred_val))

In [None]:
xg_defog_all = xgboost.XGBClassifier(n_jobs=20)
xg_defog_all.fit(X_defog[mod_cols],y_defog)

#### XGBoost tdcsfog

In [None]:
xg_tdcsfog = xgboost.XGBClassifier(n_jobs=20)
xg_tdcsfog.fit(X_train_tdcsfog[mod_cols],y_train_tdcsfog)

In [None]:
y_pred_tdcsfog = xg_tdcsfog.predict(X_valid_tdcsfog[mod_cols])
tcol = np.zeros((len(y_pred_tdcsfog),1))
tmat = np.hstack((tcol, y_pred_tdcsfog))
tmat[:,0] = 1 - (tmat[:,1]+tmat[:,2]+tmat[:,3])
y_pred_tdcsfog = tmat

y_pred_val = np.argmax(y_pred_tdcsfog,axis=1)

y_valid_tdcsfog_np = y_valid_tdcsfog
tcol = np.zeros((len(y_valid_tdcsfog_np),1))
tmat = np.hstack((tcol, y_valid_tdcsfog_np))
tmat[:,0] = 1 - (tmat[:,1]+tmat[:,2]+tmat[:,3])
y_pred_tdcsfog_np = tmat
y_valid_val = np.argmax(y_valid_tdcsfog_np,axis=1)

print('Accuracy: ',metrics.accuracy_score(y_valid_val,y_pred_val))

In [None]:
xg_tdcsfog_all = xgboost.XGBClassifier(n_jobs=20)
xg_tdcsfog_all.fit(X_tdcsfog[mod_cols],y_tdcsfog)

## Prediction

In [None]:
y_pred_defog_test = xg_defog_all.predict(X_test_defog[mod_cols])

y_pred_defog_test = pd.DataFrame(y_pred_defog_test,columns=['StartHesitation', 'Turn', 'Walking'])

y_pred_defog_test['Id'] = dataset_defog_sub_test['ID'].astype('str')+'_'+dataset_defog_sub_test['Time'].astype('str')

y_pred_defog_test = y_pred_defog_test[['Id','StartHesitation', 'Turn', 'Walking']]

y_pred_defog_test

In [None]:
y_pred_tdcsfog_test = xg_tdcsfog_all.predict(X_test_tdcsfog[mod_cols])

y_pred_tdcsfog_test = pd.DataFrame(y_pred_tdcsfog_test,columns=['StartHesitation', 'Turn', 'Walking'])

y_pred_tdcsfog_test['Id'] = dataset_tdcsfog_sub_test['ID'].astype('str')+'_'+dataset_tdcsfog_sub_test['Time'].astype('str')

y_pred_tdcsfog_test = y_pred_tdcsfog_test[['Id','StartHesitation', 'Turn', 'Walking']]

y_pred_tdcsfog_test

In [None]:
y_pred_test = pd.concat([y_pred_defog_test,y_pred_tdcsfog_test])
y_pred_test.to_csv('submission.csv',index=False)
y_pred_test