In [57]:
from sklearn.linear_model import Perceptron
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.model_selection import cross_val_predict, cross_val_score, cross_validate
import sys
sys.path.append("..")
from general.utils import *

Define Perceptron Parameter grid search function.

In [58]:
def grid_search_params():
    parameters = {'max_iter': list(range(20, 201, 10)),
            'l1_ratio': [0.0, 0.05, 0.15, 0.25],
            'penalty': [None, 'l2','l1','elasticnet'],
            'alpha': [0.0001, 0.01, 0.]
    }
    clf = Perceptron(fit_intercept = True, shuffle = True)
    cv = StratifiedKFold(n_splits = 3, shuffle = True)
    gridsearch = GridSearchCV(clf, parameters, n_jobs = 12, cv = cv, scoring = 'roc_auc',
                                      verbose = 2, refit = True)

    _, _, train_set, train_label = get_samples("./data/train1_icu_data.csv", "./data/train1_icu_label.csv", ret_raw = True)
    gridsearch.fit(train_set, train_label.ravel())

    print(gridsearch.best_estimator_)

    # Best estimator: Perceptron(alpha=0.01, l1_ratio=0.0, max_iter=40, penalty='l1')

Load training and testing dataset from dataset 1.

In [59]:
# grid_search_params()
# According to GridSearchCV (a rough one), I can have the best perceptron estimator

clf = Perceptron(fit_intercept = True, shuffle = True, max_iter = 100, l1_ratio = 0.0, alpha = 0.01, penalty='l1')

_, _, train_set, train_label = get_samples("./data/train1_icu_data.csv", "./data/train1_icu_label.csv", ret_raw = True)
_, _, raw_test, test_labels = get_samples("./data/test1_icu_data.csv", "./data/test1_icu_label.csv", ret_raw = True)

In [60]:
clf.fit(train_set, train_label.ravel())

train_pred = clf.predict(train_set)
test_pred = clf.predict(raw_test)

After direct fitting and testing, we perform cross validation.

In [61]:
cv_score = cross_val_score(clf, train_set, train_label.ravel(), cv = 5)

In [62]:
train_set_acc = acc_calculate(train_pred, train_label)
test_set_acc = acc_calculate(test_pred, test_labels)

print("From dataset 1:")
print("Train set accuracy: %f, train set error rate: %f"%(train_set_acc, 1 - train_set_acc))
print("Test set accuracy: %f, test set error rate: %f"%(test_set_acc, 1 - test_set_acc))
print("Cross validation score: ", cv_score)

From dataset 1:
Train set accuracy: 0.716800, train set error rate: 0.283200
Test set accuracy: 0.727438, test set error rate: 0.272562
Cross validation score:  [0.699 0.744 0.711 0.609 0.645]


> Train set accuracy: 0.730600, train set error rate: 0.269400 
> Test set accuracy: 0.714676, test set error rate: 0.285324

In [63]:
_, _, train_set, train_label = get_samples("./data/train2_icu_data.csv", "./data/train2_icu_label.csv", ret_raw = True)
_, _, raw_test, test_labels = get_samples("./data/test2_icu_data.csv", "./data/test2_icu_label.csv", ret_raw = True)

In [64]:
clf.fit(train_set, train_label.ravel())

train_pred = clf.predict(train_set)
test_pred = clf.predict(raw_test)
cv_score = cross_val_score(clf, train_set, train_label.ravel(), cv = 5)

train_set_acc = acc_calculate(train_pred, train_label)
test_set_acc = acc_calculate(test_pred, test_labels)

print("From dataset 2:")
print("Train set accuracy: %f, train set error rate: %f"%(train_set_acc, 1 - train_set_acc))
print("Test set accuracy: %f, test set error rate: %f"%(test_set_acc, 1 - test_set_acc))
print("Cross validation score: ", cv_score)

From dataset 2:
Train set accuracy: 0.842712, train set error rate: 0.157288
Test set accuracy: 0.784444, test set error rate: 0.215556
Cross validation score:  [0.84067797 0.81355932 0.74915254 0.79661017 0.80338983]
