# Import libraries

In [1]:
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import roc_auc_score, auc, precision_recall_curve, accuracy_score
from joblib import dump, load

In [3]:
ptbdb_normal = pd.read_csv("ptbdb_normal.csv", header=None)
ptbdb_abnormal = pd.read_csv("ptbdb_abnormal.csv", header=None)
ptbdb = pd.concat([ptbdb_normal, ptbdb_abnormal])

mitbih_train = pd.read_csv("mitbih_train.csv", header=None)
mitbih_train = mitbih_train.sample(frac=1)
mitbih_test = pd.read_csv("mitbih_test.csv", header=None)


ptbdb_categories=2
ptbdb_train, ptbdb_test = train_test_split(ptbdb, test_size=0.2, random_state=42, stratify=ptbdb[187])
ptbdb_train_Y = np.array(ptbdb_train[187].values).astype(np.int8)
ptbdb_train_X = np.array(ptbdb_train[list(range(187))].values)[..., np.newaxis].squeeze()
ptbdb_validation_x, ptbdb_train_x, ptbdb_validation_y, ptbdb_train_y = train_test_split(ptbdb_train_X, ptbdb_train_Y, test_size=0.33, random_state=42)
ptbdb_test_Y = np.array(ptbdb_test[187].values).astype(np.int8)
ptbdb_test_X = np.array(ptbdb_test[list(range(187))].values)[..., np.newaxis].squeeze()
ptbdb_beats=range(len(ptbdb_train_x))
ptbdb_color=['green','red']
ptbdb_label=["Normal beat", "Abnormal beat"]

mitbih_categories=5
mitbih_train_Y = np.array(mitbih_train[187].values).astype(np.int8)
mitbih_train_X = np.array(mitbih_train[list(range(187))].values)[..., np.newaxis].squeeze()
mitbih_validation_x, mitbih_train_x, mitbih_validation_y, mitbih_train_y = train_test_split(mitbih_train_X, mitbih_train_Y, test_size=0.33, random_state=42)
mitbih_beats=range(len(mitbih_train_x))
mitbih_color=['green','red','black','blue','grey']
mitbih_label=["Normal", "Supraventricular", "Premature","Fusion", "Unclassifiable"]

for category in range(ptbdb_categories):
    for beat in ptbdb_beats:
        if ptbdb_train_y[beat]==category:
            plt.plot(range(len(ptbdb_train_x[beat])),ptbdb_train_x[beat],color=ptbdb_color[category],label=ptbdb_label[category])
            break
plt.legend()
plt.xlabel("time")
plt.ylabel("intensity")
plt.title('PTBDB Different beats')
plt.savefig("ptbdb_dif_beats.png")
plt.clf()

for category in range(mitbih_categories):
    for beat in mitbih_beats:
        if mitbih_train_y[beat]==category:
            plt.plot(range(len(mitbih_train_x[beat])),mitbih_train_x[beat],color=mitbih_color[category],label=mitbih_label[category])
            break
plt.xlabel("time")
plt.ylabel("intensity")
plt.title('MITBIH Different beats')
plt.legend()
plt.savefig("mitbih_dif_beats.png")
plt.clf()

<Figure size 432x288 with 0 Axes>

In [4]:
window=15
def make_peaks(data):
    peaks=np.zeros((len(data),2*window))
    for i in range(len(data)):
        #I started after 10 because sometimes there is a big peak at the very start
        peak=np.argmax(data[i][10:])+10
        peakinfo=data[i][peak-window:peak+window]
        for j in range(len(peakinfo)):
            peaks[i][j]=peakinfo[j]
    return pd.DataFrame(peaks)

ptbdb_peaks=make_peaks(ptbdb_train_X)
ptbdb_cat_peaks=pd.concat([ptbdb_peaks, pd.DataFrame(ptbdb_train_Y).rename(columns={0: "Category"})], axis=1)

mitbih_peaks=make_peaks(mitbih_train_X)
mitbih_cat_peaks=pd.concat([mitbih_peaks, pd.DataFrame(mitbih_train_Y).rename(columns={0: "Category"})], axis=1)

ptbdb_cat_peaks=ptbdb_cat_peaks.groupby(['Category']).mean().T
mitbih_cat_peaks=mitbih_cat_peaks.groupby(['Category']).mean().T

<Figure size 432x288 with 0 Axes>

In [5]:
ptbdb_counts=ptbdb[187].value_counts(ascending=True).sort_index()
mitbih_counts=pd.Series(mitbih_train_Y).value_counts().sort_index()


<Figure size 432x288 with 0 Axes>

In [6]:
def make_differences(X):
    differences=np.zeros((len(X),len(X[1])))
    signals=np.zeros(len(X))
    for beat in range(len(X)):
        for signal in range(1,len(X[1])):
            if X[beat][signal-1]==0 and X[beat][signal]==0:
                if (signal>=len(X[1])-2):
                    signals[beat]=signal
                    break
                else:
                    if X[beat][signal+1]==0 and X[beat][signal+2]==0:
                        signals[beat]=signal
                        break
            differences[beat][signal]=X[beat][signal]-X[beat][signal-1]
    return differences,signals

ptbdb_difCat=[[],[]]
ptbdb_differences, ptbdb_signals=make_differences(ptbdb_train_X)
for beat in range(len(ptbdb_differences)):
    ptbdb_difCat[ptbdb_train_Y[beat]].extend(ptbdb_differences[beat][:int(ptbdb_signals[beat]-1)])

X=mitbih_train_X

mitbih_difCat=[[],[],[],[],[]]
mitbih_differences, mitbih_signals=make_differences(mitbih_train_X)
for beat in range(len(mitbih_differences)):
    mitbih_difCat[mitbih_train_Y[beat]].extend(mitbih_differences[beat][:int(mitbih_signals[beat]-1)])

<Figure size 432x288 with 0 Axes>

# Training of SVCs
## check out the time it took me
the error happened after the hypertuning of the SVC

In [7]:
parameters = {'kernel':('linear', 'poly', 'rbf', 'sigmoid'), 'C':[0.1, 1, 10], 'class_weight':[None, 'balanced'],'random_state':[36]}
svc = svm.SVC(probability=True)
#ptbdb_clf_normal = RandomizedSearchCV(svc, parameters,n_jobs=-2, verbose=4, random_state=36)
#ptbdb_clf_peaks = RandomizedSearchCV(svc, parameters,n_jobs=-2, verbose=4, random_state=36)
#ptbdb_clf_normal.fit(ptbdb_train_X,ptbdb_train_Y)
#dump(ptbdb_clf_normal, 'ptbdb_clf_normal.joblib') 
#ptbdb_clf_peaks.fit(ptbdb_peaks,ptbdb_train_Y)
#dump(ptbdb_clf_peaks, 'ptbdb_clf_peaks.joblib')
#ptbdb_test_peaks=make_peaks(ptbdb_test_X)
#ptbdb_svc_normal_acc=ptbdb_clf_normal.score(ptbdb_test_X.squeeze(),ptbdb_test_Y)
#ptbdb_svc_peaks_acc=ptbdb_clf_peaks.score(ptbdb_test_peaks,ptbdb_test_Y)

#test_probs_normal=ptbdb_clf_normal.predict_proba(ptbdb_test_X)[:,1]
#test_probs_peaks=ptbdb_clf_peaks.predict_proba(ptbdb_test_peaks)[:,1]
#ptbdb_svc_normal_auroc=roc_auc_score(ptbdb_test_Y, test_probs_normal)
#ptbdb_svc_peaks_auroc = roc_auc_score(ptbdb_test_Y, test_probs_peaks)

#precision, recall, thresh = precision_recall_curve(ptbdb_test_Y,test_probs_normal)
#ptbdb_svc_peaks_auprc=auc(recall,precision)
#precision, recall, thresh = precision_recall_curve(ptbdb_test_Y,test_probs_peaks)
#ptbdb_svc_peaks_auprc=auc(recall,precision)

mitbih_clf_normal=RandomizedSearchCV(svc, parameters,n_jobs=-2, verbose=4, random_state=36)
mitbih_clf_peaks = RandomizedSearchCV(svc, parameters,n_jobs=-2, verbose=4, random_state=36)
mitbih_clf_normal.fit(mitbih_train_X, mitbih_train_Y)
print("mitbih started")
dump(mitbih_clf_normal, 'mitbih_clf_normal.joblib')
mitbih_clf_peaks.fit(mitbih_peaks,mitbih_train_Y)
dump(mitbih_clf_peaks, 'mitbih_clf_peaks.joblib')
mitbih_svc_normal_acc = mitbih_clf_normal.score(mitbih_test_X,mitbih_test_Y)
mitbih_svc_peaks_acc = mitbih_clf_peaks.score(make_peaks(mitbih_test_X),mitbih_test_Y)

[Parallel(n_jobs=-2)]: Using backend LokyBackend with 3 concurrent workers.


Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-2)]: Done  19 tasks      | elapsed: 1726.3min
[Parallel(n_jobs=-2)]: Done  30 out of  30 | elapsed: 2680.9min finished


mitbih started
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-2)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=-2)]: Done  19 tasks      | elapsed: 400.8min
[Parallel(n_jobs=-2)]: Done  30 out of  30 | elapsed: 540.9min finished


NameError: name 'mitbih_test_X' is not defined