# SWELL Analysis

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sk
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import cross_val_score, ShuffleSplit, train_test_split
from sklearn.decomposition import TruncatedSVD
import seaborn as sns

In [2]:
dfTrainFull = pd.read_csv("data/final-SWELL/train.csv")
dfTestFull = pd.read_csv("data/final-SWELL/test.csv")
dfFull = pd.concat([dfTrainFull, dfTestFull])
dfFull.shape

(410322, 36)

In [3]:
dfTestFull.head()

Unnamed: 0,MEAN_RR,MEDIAN_RR,SDRR,RMSSD,SDSD,SDRR_RMSSD,HR,pNN25,pNN50,SD1,...,HF,HF_PCT,HF_NU,TP,LF_HF,HF_LF,sampen,higuci,datasetId,condition
0,721.901897,727.26728,74.722315,12.361264,12.361069,6.044877,84.121868,4.933333,0.0,8.743513,...,66.617057,3.921868,9.760289,1698.60539,9.245599,0.10816,2.097342,1.243696,2,no stress
1,843.538633,844.40793,58.499429,19.29888,19.298795,3.031234,71.478642,21.0,0.2,13.650863,...,26.500086,1.123416,1.663151,2358.884694,59.126832,0.016913,2.217275,1.250056,2,time pressure
2,958.523868,966.671125,132.84911,21.342715,21.342653,6.224565,63.874293,24.133333,1.8,15.096571,...,16.024935,0.370208,0.766416,4328.633724,129.477524,0.007723,2.217136,1.144943,2,no stress
3,824.838669,842.485905,117.822094,11.771814,11.771248,10.00883,74.330531,4.733333,0.533333,8.326307,...,17.58147,0.615932,3.358652,2854.449091,28.773854,0.034754,2.106863,1.142355,2,no stress
4,756.707933,747.94162,143.968457,13.357748,13.356388,10.777899,82.092049,5.933333,0.666667,9.447545,...,35.199054,0.662879,6.292253,5310.027472,14.892559,0.067148,1.912191,1.128098,2,interruption


# Regular Split

In [4]:
testLabels = dfTestFull['condition']
trainLabels = dfTrainFull['condition']
dfTrain = dfTrainFull.drop(['condition', 'datasetId'],axis=1)
dfTest = dfTestFull.drop(['condition', 'datasetId'], axis=1)

In [5]:
rfModel1 = RandomForestClassifier(n_estimators=10, random_state=0, verbose=1)

rfModel1.fit(dfTrain, trainLabels)

preds = rfModel1.predict(dfTest)

score = sk.metrics.accuracy_score(testLabels, preds)
score

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   35.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s finished


1.0

In [6]:
scores = []
for column in dfTrain:
    train = dfTrain[column].to_numpy()
    test = dfTest[column].to_numpy()
    rfModel1.fit(train.reshape(-1,1), trainLabels)
    pred = rfModel1.predict(test.reshape(-1,1))
    scores.append(sk.metrics.accuracy_score(testLabels, pred))
    
scores

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   12.7s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    8.3s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   13.7s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_j

[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   12.4s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.2s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   12.8s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.2s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   13.2s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.2s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   12.7s finished
[Parallel(n_jobs=1)]: Us

[0.536592498720542,
 0.9024443740404064,
 0.49745326931981576,
 0.4834888991787098,
 0.4843906124338947,
 0.4649428508761241,
 0.5370067994053567,
 0.5933273219116321,
 0.5738308190968245,
 0.48407379426315406,
 0.49803816440425996,
 0.45007676747983333,
 0.44530012429020543,
 0.5371530231764677,
 0.7929958813637804,
 0.5377622888894305,
 0.5736602246971949,
 0.5743182316671946,
 0.4682572563546414,
 0.45007676747983333,
 0.44530012429020543,
 0.4424243901250213,
 0.4458850193746497,
 0.4553408232398314,
 0.44327736212316915,
 0.4503692150220554,
 0.4515633758194624,
 0.464747885847976,
 0.44846830599761167,
 0.45811907489094145,
 0.4487607535398338,
 0.4529037603879804,
 0.48490239563278337,
 0.4452757536616869]

In [11]:
print(max(scores), min(scores))

0.9024443740404064 0.4424243901250213


# No Interruptions

In [None]:
testFullNI = dfTestFull[dfTestFull['condition'] != 'interruption']
trainFullNI = dfTrainFull[dfTrainFull['condition'] != 'interruption']

testLabelsNI = testFullNI['condition']
trainLabelsNI = trainFullNI['condition']
dfTrainNI = trainFullNI.drop(['condition', 'datasetId'],axis=1)
dfTestNI = testFullNI.drop(['condition', 'datasetId'], axis=1)

In [None]:
rfModelNI = RandomForestClassifier(n_estimators=10, random_state=0, verbose=1)

rfModelNI.fit(dfTrainNI, trainLabelsNI)

predsNI = rfModelNI.predict(dfTestNI)

scoreNI = sk.metrics.accuracy_score(testLabelsNI, predsNI)
scoreNI

# Cross Validation

In [None]:
labels = dfFull['condition']

dfData = dfFull.drop(['condition', 'datasetId'], axis=1)
dfData.head()

In [None]:
cvf = ShuffleSplit(n_splits=5, test_size=0.3, random_state=0)
rfModel = RandomForestClassifier(n_estimators=10, random_state=0, verbose=1)
score = np.mean(cross_val_score(rfModel, dfData, labels, cv=cvf))
score

In [None]:
trainedRf = rfModel.fit(dfData, labels)

In [None]:
trainedRf.feature_importances_

In [None]:
svd2 = TruncatedSVD(n_components=2)
svd2Data = svd2.fit_transform(dfData)

In [None]:
plt.scatter(svd2Data[:,0],svd2Data[:,1])

In [None]:
fig_dims = (8, 8)
fig, ax = plt.subplots(figsize=fig_dims)
sns.scatterplot(x=svd2Data[:,0],
                y=svd2Data[:,1],
                ax=ax,
                data=pd.concat([labels, dfData], axis=1),
                hue='condition',
                )

# Developing HRV