In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.dpi'] = 70 #display 70 dpi in Jupyter Notebook, may consider100 dpi 
plt.rcParams['savefig.dpi'] = 300 #define 300 dpi for saving figures

import seaborn as sns
## here are some settings 
sns.set_style('whitegrid')
sns.set(rc={"figure.dpi":70, 'savefig.dpi':300}) #defining dpi setting
sns.set_context('notebook')
sns.set_style("ticks")
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')

In [None]:
# Tells matplotlib to display images inline instead of a new window
%matplotlib inline

import pandas as pd
import numpy as np

from tqdm import tqdm
import os
import re
import csv
import statistics

import matplotlib.pyplot as plt
import seaborn as sns

from time import time
import timeit #imports timeit module

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.preprocessing import PolynomialFeatures

import sklearn.linear_model as skl_lm
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn import neighbors
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis 
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.tree import DecisionTreeRegressor 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.tree import export_graphviz
from sklearn.svm import SVC

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import mean_squared_error

from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
rlist = []
records = os.path.normpath('mit-bih-dataframes/subject_list.csv')
with open(records) as rfile:
    recordreader = csv.reader(rfile, delimiter=' ', quotechar='|')
    for row in recordreader:
        rlist.append(row[0])

In [None]:
feature_dfs = {}
for record in tqdm(rlist):
    feature_dfs[record] = pd.read_csv(os.path.normpath('mit-bih-time-features/'+record+'.csv'), index_col=0, dtype={'subjectID': str})

In [None]:
statallfeatures_dict = {
    'Run Time': [],
    'Accuracy': [],   ## To store the MEAN accuracy for 5-fold CV for each model
    'Standard Error': [],
    'Sensitivity': [],
    'Specificity': [],
    'Precision': [],
    'F1_Score': []   
}

In [None]:
start_time = timeit.default_timer()

decisionTree = DecisionTreeClassifier(max_depth=6) #criterion='entropy'

acc_score = []
Truth = []
Output = []

for idx, record in enumerate(feature_dfs):
    test_df = feature_dfs[record][1:]
    train_df = pd.concat([value[1:] for key, value in feature_dfs.items() if key != record])

    X_train = train_df[['StoS', 'StoR', 'StoL', 'RtoS', 'RtoR', 'RtoL', 'LtoS', 'LtoR', 'LtoL', 'std', 'cov', 'range', 'rrInt_var', 'rmean_var', 'rmssd', 'mad', 'iqr']]
    y_train = train_df['mappedLabel']

    X_test = test_df[['StoS', 'StoR', 'StoL', 'RtoS', 'RtoR', 'RtoL', 'LtoS', 'LtoR', 'LtoL', 'std', 'cov', 'range', 'rrInt_var', 'rmean_var', 'rmssd', 'mad', 'iqr']]
    y_test = test_df['mappedLabel']

    decisionTree.fit(X_train, y_train)
    pred_values = decisionTree.predict(X_test)

    acc = accuracy_score(y_test, pred_values)
    acc_score.append(acc)

    Truth.extend(y_test.values.reshape(y_test.shape[0])) 
    Output.extend(pred_values)

elapsed = timeit.default_timer() - start_time

print("---Run time is %s seconds ---" % elapsed)
print()
print('Accuracy of each fold: \n {}'.format(acc_score))
print("Avg accuracy: {}".format(np.mean(acc_score)))
print('Std of accuracy : \n{}'.format(np.std(acc_score)))
print("confusion matrix: ")

cm = confusion_matrix(Truth, Output)
print(cm)
print("classification report: ")
print(classification_report(Truth, Output))

sensitivity = cm[0][0]/(cm[0][0]+cm[0][1])
specificity = cm[1][1]/(cm[1][0]+cm[1][1])
precision = (cm[0][0])/(cm[0][0]+cm[1][0])
f1_score = (2*precision*sensitivity)/(precision+sensitivity)

print(sensitivity)
print(specificity)
print(precision)
print(f1_score)

statallfeatures_dict['Run Time'].append(elapsed)
statallfeatures_dict['Accuracy'].append(np.mean(acc_score))
statallfeatures_dict['Standard Error'].append(np.std(acc_score))
statallfeatures_dict['Sensitivity'].append(sensitivity)
statallfeatures_dict['Specificity'].append(specificity)
statallfeatures_dict['Precision'].append(precision)
statallfeatures_dict['F1_Score'].append(f1_score)

In [None]:
start_time = timeit.default_timer()

bagging = RandomForestClassifier(max_features = 17, random_state = 2)

acc_score = []
Truth = []
Output = []

for record in tqdm(feature_dfs):
    test_df = feature_dfs[record][1:]
    train_df = pd.concat([value[1:] for key, value in feature_dfs.items() if key != record])

    X_train = train_df[['StoS', 'StoR', 'StoL', 'RtoS', 'RtoR', 'RtoL', 'LtoS', 'LtoR', 'LtoL', 'std', 'cov', 'range', 'rrInt_var', 'rmean_var', 'rmssd', 'mad', 'iqr']]
    y_train = train_df['mappedLabel']

    X_test = test_df[['StoS', 'StoR', 'StoL', 'RtoS', 'RtoR', 'RtoL', 'LtoS', 'LtoR', 'LtoL', 'std', 'cov', 'range', 'rrInt_var', 'rmean_var', 'rmssd', 'mad', 'iqr']]
    y_test = test_df['mappedLabel']

    bagging.fit(X_train, y_train)
    pred_values = bagging.predict(X_test)

    acc = accuracy_score(y_test, pred_values)
    acc_score.append(acc)

    Truth.extend(y_test.values.reshape(y_test.shape[0])) 
    Output.extend(pred_values)

elapsed = timeit.default_timer() - start_time

print("---Run time is %s seconds ---" % elapsed)
print()
print('Accuracy of each fold: \n {}'.format(acc_score))
print("Avg accuracy: {}".format(np.mean(acc_score)))
print('Std of accuracy : \n{}'.format(np.std(acc_score)))
print("confusion matrix: ")

cm = confusion_matrix(Truth, Output)
print(cm)
print("classification report: ")
print(classification_report(Truth, Output))

sensitivity = cm[0][0]/(cm[0][0]+cm[0][1])
specificity = cm[1][1]/(cm[1][0]+cm[1][1])
precision = (cm[0][0])/(cm[0][0]+cm[1][0])
f1_score = (2*precision*sensitivity)/(precision+sensitivity)

print(sensitivity)
print(specificity)
print(precision)
print(f1_score)

statallfeatures_dict['Run Time'].append(elapsed)
statallfeatures_dict['Accuracy'].append(np.mean(acc_score))
statallfeatures_dict['Standard Error'].append(np.std(acc_score))
statallfeatures_dict['Sensitivity'].append(sensitivity)
statallfeatures_dict['Specificity'].append(specificity)
statallfeatures_dict['Precision'].append(precision)
statallfeatures_dict['F1_Score'].append(f1_score)

In [14]:
start_time = timeit.default_timer()

randomForest = RandomForestClassifier(max_features = 4, random_state = 2)

acc_score = []
Truth = []
Output = []

for idx, record in enumerate(feature_dfs):
    test_df = feature_dfs[record][1:]
    train_df = pd.concat([value[1:] for key, value in feature_dfs.items() if key != record])

    X_train = train_df[['StoS', 'StoR', 'StoL', 'RtoS', 'RtoR', 'RtoL', 'LtoS', 'LtoR', 'LtoL', 'std', 'cov', 'range', 'rrInt_var', 'rmean_var', 'rmssd', 'mad', 'iqr']]
    y_train = train_df['mappedLabel']

    X_test = test_df[['StoS', 'StoR', 'StoL', 'RtoS', 'RtoR', 'RtoL', 'LtoS', 'LtoR', 'LtoL', 'std', 'cov', 'range', 'rrInt_var', 'rmean_var', 'rmssd', 'mad', 'iqr']]
    y_test = test_df['mappedLabel']

    randomForest.fit(X_train, y_train)
    pred_values = randomForest.predict(X_test)

    acc = accuracy_score(y_test, pred_values)
    acc_score.append(acc)

    Truth.extend(y_test.values.reshape(y_test.shape[0])) 
    Output.extend(pred_values)

elapsed = timeit.default_timer() - start_time

print("---Run time is %s seconds ---" % elapsed)
print()
print('Accuracy of each fold: \n {}'.format(acc_score))
print("Avg accuracy: {}".format(np.mean(acc_score)))
print('Std of accuracy : \n{}'.format(np.std(acc_score)))
print("confusion matrix: ")

cm = confusion_matrix(Truth, Output)
print(cm)
print("classification report: ")
print(classification_report(Truth, Output))

sensitivity = cm[0][0]/(cm[0][0]+cm[0][1])
specificity = cm[1][1]/(cm[1][0]+cm[1][1])
precision = (cm[0][0])/(cm[0][0]+cm[1][0])
f1_score = (2*precision*sensitivity)/(precision+sensitivity)

print(sensitivity)
print(specificity)
print(precision)
print(f1_score)

statallfeatures_dict['Run Time'].append(elapsed)
statallfeatures_dict['Accuracy'].append(np.mean(acc_score))
statallfeatures_dict['Standard Error'].append(np.std(acc_score))
statallfeatures_dict['Sensitivity'].append(sensitivity)
statallfeatures_dict['Specificity'].append(specificity)
statallfeatures_dict['Precision'].append(precision)
statallfeatures_dict['F1_Score'].append(f1_score)

---Run time is 1810.5572048799995 seconds ---

Accuracy of each fold: 
 [0.9172932330827067, 0.9244733521017376, 0.9941176470588236, 0.949606962380685, 0.9958547921115438, 0.9571776155717762, 0.900022416498543, 0.9942707480766083, 0.9453887884267631, 0.9286563614744352, 0.9739395921927816, 0.9896319336443753, 0.8690631808278867, 0.9148023291449586, 0.6349127182044888, 0.9887964148527529, 0.9872537659327926, 0.9737682165163081, 0.7262218616913405, 0.9746300211416491, 0.995608211622919, 0.9708547266686801, 0.997274379164143]
Avg accuracy: 0.9349399681908129
Std of accuracy : 
0.08688937665362446
confusion matrix: 
[[78156  6516]
 [ 6901 95984]]
classification report: 
              precision    recall  f1-score   support

        Afib       0.92      0.92      0.92     84672
    Non-Afib       0.94      0.93      0.93    102885

    accuracy                           0.93    187557
   macro avg       0.93      0.93      0.93    187557
weighted avg       0.93      0.93      0.93    187557

In [None]:
start_time = timeit.default_timer()

adaBoost = AdaBoostClassifier(n_estimators=500, learning_rate = 0.1, algorithm="SAMME.R", random_state=2)

acc_score = []
Truth = []
Output = []

for idx, record in enumerate(feature_dfs):
    test_df = feature_dfs[record][1:]
    train_df = pd.concat([value[1:] for key, value in feature_dfs.items() if key != record])

    X_train = train_df[['StoS', 'StoR', 'StoL', 'RtoS', 'RtoR', 'RtoL', 'LtoS', 'LtoR', 'LtoL', 'std', 'cov', 'range', 'rrInt_var', 'rmean_var', 'rmssd', 'mad', 'iqr']]
    y_train = train_df['mappedLabel']

    X_test = test_df[['StoS', 'StoR', 'StoL', 'RtoS', 'RtoR', 'RtoL', 'LtoS', 'LtoR', 'LtoL', 'std', 'cov', 'range', 'rrInt_var', 'rmean_var', 'rmssd', 'mad', 'iqr']]
    y_test = test_df['mappedLabel']

    adaBoost.fit(X_train, y_train)
    pred_values = adaBoost.predict(X_test)

    acc = accuracy_score(y_test, pred_values)
    acc_score.append(acc)

    Truth.extend(y_test.values.reshape(y_test.shape[0])) 
    Output.extend(pred_values)

elapsed = timeit.default_timer() - start_time

print("---Run time is %s seconds ---" % elapsed)
print()
print('Accuracy of each fold: \n {}'.format(acc_score))
print("Avg accuracy: {}".format(np.mean(acc_score)))
print('Std of accuracy : \n{}'.format(np.std(acc_score)))
print("confusion matrix: ")

cm = confusion_matrix(Truth, Output)
print(cm)
print("classification report: ")
print(classification_report(Truth, Output))

sensitivity = cm[0][0]/(cm[0][0]+cm[0][1])
specificity = cm[1][1]/(cm[1][0]+cm[1][1])
precision = (cm[0][0])/(cm[0][0]+cm[1][0])
f1_score = (2*precision*sensitivity)/(precision+sensitivity)

print(sensitivity)
print(specificity)
print(precision)
print(f1_score)

statallfeatures_dict['Run Time'].append(elapsed)
statallfeatures_dict['Accuracy'].append(np.mean(acc_score))
statallfeatures_dict['Standard Error'].append(np.std(acc_score))
statallfeatures_dict['Sensitivity'].append(sensitivity)
statallfeatures_dict['Specificity'].append(specificity)
statallfeatures_dict['Precision'].append(precision)
statallfeatures_dict['F1_Score'].append(f1_score)

In [None]:
start_time = timeit.default_timer()

gradientBoost = GradientBoostingClassifier(n_estimators = 500, 
                                           learning_rate = 0.1, 
                                           max_depth = 4, 
                                           random_state = 2)

acc_score = []
Truth = []
Output = []

for idx, record in enumerate(feature_dfs):
    test_df = feature_dfs[record][1:]
    train_df = pd.concat([value[1:] for key, value in feature_dfs.items() if key != record])

    X_train = train_df[['StoS', 'StoR', 'StoL', 'RtoS', 'RtoR', 'RtoL', 'LtoS', 'LtoR', 'LtoL', 'std', 'cov', 'range', 'rrInt_var', 'rmean_var', 'rmssd', 'mad', 'iqr']]
    y_train = train_df['mappedLabel']

    X_test = test_df[['StoS', 'StoR', 'StoL', 'RtoS', 'RtoR', 'RtoL', 'LtoS', 'LtoR', 'LtoL', 'std', 'cov', 'range', 'rrInt_var', 'rmean_var', 'rmssd', 'mad', 'iqr']]
    y_test = test_df['mappedLabel']

    gradientBoost.fit(X_train, y_train)
    pred_values = gradientBoost.predict(X_test)

    acc = accuracy_score(y_test, pred_values)
    acc_score.append(acc)

    Truth.extend(y_test.values.reshape(y_test.shape[0])) 
    Output.extend(pred_values)

elapsed = timeit.default_timer() - start_time

print("---Run time is %s seconds ---" % elapsed)
print()
print('Accuracy of each fold: \n {}'.format(acc_score))
print("Avg accuracy: {}".format(np.mean(acc_score)))
print('Std of accuracy : \n{}'.format(np.std(acc_score)))
print("confusion matrix: ")

cm = confusion_matrix(Truth, Output)
print(cm)
print("classification report: ")
print(classification_report(Truth, Output))

sensitivity = cm[0][0]/(cm[0][0]+cm[0][1])
specificity = cm[1][1]/(cm[1][0]+cm[1][1])
precision = (cm[0][0])/(cm[0][0]+cm[1][0])
f1_score = (2*precision*sensitivity)/(precision+sensitivity)

print(sensitivity)
print(specificity)
print(precision)
print(f1_score)

statallfeatures_dict['Run Time'].append(elapsed)
statallfeatures_dict['Accuracy'].append(np.mean(acc_score))
statallfeatures_dict['Standard Error'].append(np.std(acc_score))
statallfeatures_dict['Sensitivity'].append(sensitivity)
statallfeatures_dict['Specificity'].append(specificity)
statallfeatures_dict['Precision'].append(precision)
statallfeatures_dict['F1_Score'].append(f1_score)