In [58]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

In [59]:
df = pd.read_csv('balanced_dataset.csv')
df.drop('id', axis = 1, inplace=True)

In [60]:
y = df.pop('stroke')
X = df

In [61]:
scl = StandardScaler()
rf_pipe = Pipeline(steps=[('scaler',scl), ('RandomForest',RandomForestClassifier())], verbose=True)
nb_pipe = Pipeline(steps=[('scaler',scl), ('NaiveBayes',GaussianNB())], verbose=True)
dt_pipe = Pipeline(steps=[('scaler',scl), ('DescisionTree',DecisionTreeClassifier())], verbose=True)
nn_pipe = Pipeline(steps=[('scaler',scl), ('KNN',KNeighborsClassifier())], verbose=True)
lr_pipe = Pipeline(steps=[('scaler',scl), ('LogReg',LogisticRegression())], verbose=True)
pipes = [rf_pipe, nb_pipe, dt_pipe, nn_pipe, lr_pipe]

In [62]:
scores = []
for pipe in pipes:
    print(pipe.named_steps)
    scores.append((pipe.named_steps,cross_val_score(pipe, X, y, cv=5, scoring='precision'),cross_val_score(pipe, X, y, cv=5, scoring='recall'),cross_val_score(pipe, X, y, cv=5, scoring='f1')))

{'scaler': StandardScaler(), 'RandomForest': RandomForestClassifier()}
[Pipeline] ............ (step 1 of 2) Processing scaler, total=   0.0s
[Pipeline] ...... (step 2 of 2) Processing RandomForest, total=   0.4s
[Pipeline] ............ (step 1 of 2) Processing scaler, total=   0.0s
[Pipeline] ...... (step 2 of 2) Processing RandomForest, total=   0.4s
[Pipeline] ............ (step 1 of 2) Processing scaler, total=   0.0s
[Pipeline] ...... (step 2 of 2) Processing RandomForest, total=   0.4s
[Pipeline] ............ (step 1 of 2) Processing scaler, total=   0.0s
[Pipeline] ...... (step 2 of 2) Processing RandomForest, total=   0.4s
[Pipeline] ............ (step 1 of 2) Processing scaler, total=   0.0s
[Pipeline] ...... (step 2 of 2) Processing RandomForest, total=   0.4s
[Pipeline] ............ (step 1 of 2) Processing scaler, total=   0.0s
[Pipeline] ...... (step 2 of 2) Processing RandomForest, total=   0.4s
[Pipeline] ............ (step 1 of 2) Processing scaler, total=   0.0s
[Pipel

In [65]:
for score in scores:
    print(f'Pipeline: {score[0]}')
    print(f'Precision: {score[1]}')
    print(f'Recall: {score[2]}')
    print(f'F1-Score: {score[3]}\n')

Pipeline: {'scaler': StandardScaler(), 'RandomForest': RandomForestClassifier()}
Precision: [0.98183653 0.98380567 0.97590361 0.98480243 0.98580122]
Recall: [1. 1. 1. 1. 1.]
F1-Score: [0.98982706 0.99183673 0.98680203 0.99335718 0.99284985]

Pipeline: {'scaler': StandardScaler(), 'NaiveBayes': GaussianNB()}
Precision: [0.75712881 0.74246842 0.7400582  0.74003984 0.75435005]
Recall: [0.79136691 0.78600823 0.78497942 0.76440329 0.75823045]
F1-Score: [0.77386935 0.76361819 0.76185721 0.75202429 0.75628527]

Pipeline: {'scaler': StandardScaler(), 'DescisionTree': DecisionTreeClassifier()}
Precision: [0.94282946 0.95481336 0.94277401 0.95387635 0.94186047]
Recall: [1. 1. 1. 1. 1.]
F1-Score: [0.97348674 0.97639377 0.97297297 0.97541395 0.96909272]

Pipeline: {'scaler': StandardScaler(), 'KNN': KNeighborsClassifier()}
Precision: [0.87108326 0.870188   0.86708296 0.87096774 0.87174888]
Recall: [1. 1. 1. 1. 1.]
F1-Score: [0.93110048 0.9305888  0.92881032 0.93103448 0.93148059]

Pipeline: {'scal