In [16]:
from imblearn.ensemble import BalancedRandomForestClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV, cross_val_score, train_test_split, RandomizedSearchCV
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

import numpy as np
import os

from scripts.preprocessing import DWTFeatureExtractor

In [2]:
random_state = 420
dataset = np.load('data/database_1_fft.npz')
ecg_raw = dataset['ecg']
y = dataset['label']
X = DWTFeatureExtractor().transform(ecg_raw)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(ecg_raw, y,
                                                    test_size=0.2,
                                                    stratify=y,
                                                    random_state=random_state)

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)

pipeline = Pipeline([
    ('classifier', BalancedRandomForestClassifier(n_estimators=200,
                                                  verbose=1,
                                                  class_weight='balanced',
                                                  n_jobs=os.cpu_count() // 2,
                                                  random_state=random_state)),
])

In [11]:
cv_score = cross_val_score(pipeline, X, y, cv=skf,
                           scoring='balanced_accuracy',
                           n_jobs=os.cpu_count() // 2)
pipeline.fit(X_train, y_train)

[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:   15.2s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:  1.2min
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:  1.3min finished


In [12]:
y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred))

[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s


              precision    recall  f1-score   support

           0       0.54      0.10      0.17      1182
           1       0.88      0.99      0.93      7849

    accuracy                           0.87      9031
   macro avg       0.71      0.54      0.55      9031
weighted avg       0.83      0.87      0.83      9031



[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.2s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:    0.2s finished


In [None]:
param_grid = {
    'classifier__n_estimators': [100, 150, 200, 350],
    'classifier__max_features': ['sqrt', 'log2']
}

grid_search = GridSearchCV(estimator=pipeline,
                           param_grid=param_grid,
                           cv=skf,
                           scoring='balanced_accuracy',
                           verbose=2)
grid_search.fit(X_train, y_train)

In [24]:
print(classification_report(y_test, grid_search.best_estimator_.predict(X_test)))
print(f'Best parameters: {grid_search.best_params_}')

[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s


              precision    recall  f1-score   support

           0       0.18      0.24      0.21      1182
           1       0.88      0.83      0.86      7849

    accuracy                           0.76      9031
   macro avg       0.53      0.54      0.53      9031
weighted avg       0.79      0.76      0.77      9031

Best parameters: {'classifier__max_features': 'sqrt', 'classifier__n_estimators': 100}


[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.3s finished


In [28]:
be = grid_search.best_estimator_
be.fit(X_train, y_train)

[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:  1.3min
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:  3.2min finished


In [30]:
yp = be.predict(X_test)
print(classification_report(y_test, yp))

[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.1s


              precision    recall  f1-score   support

           0       0.18      0.24      0.21      1182
           1       0.88      0.83      0.86      7849

    accuracy                           0.76      9031
   macro avg       0.53      0.54      0.53      9031
weighted avg       0.79      0.76      0.77      9031



[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.4s finished
