In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

In [3]:
dataframe = pd.read_csv('heart.csv')

In [4]:
dataframe

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0


In [5]:
dataframe.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [6]:
dataframe['target'].value_counts()

target
1    165
0    138
Name: count, dtype: int64

In [7]:
X = dataframe.drop(columns = 'target', axis=1)
Y = dataframe['target']

In [8]:
X = np.asarray(X)
Y = np.asarray(Y)

In [9]:
# checking models with default hyperparameters
models = [LogisticRegression(max_iter=1000), SVC(kernel='linear'), KNeighborsClassifier(),RandomForestClassifier()]

In [10]:
def compare_models_cv():

    for model in models:

        cv_score = cross_val_score(model, X, Y, cv=2)
        mean_accuracy = sum(cv_score)/len(cv_score)
        mean_accuracy = mean_accuracy*100
        mean_accuracy = round(mean_accuracy, 2)

        print("Cross Validation Score of ", model, "=", cv_score)
        print("Mean accuracy of ",model, "=", mean_accuracy)
        print("---------------------------------------------------")

In [11]:
compare_models_cv()

Cross Validation Score of  LogisticRegression(max_iter=1000) = [0.86184211 0.81456954]
Mean accuracy of  LogisticRegression(max_iter=1000) = 83.82
---------------------------------------------------
Cross Validation Score of  SVC(kernel='linear') = [0.86184211 0.79470199]
Mean accuracy of  SVC(kernel='linear') = 82.83
---------------------------------------------------
Cross Validation Score of  KNeighborsClassifier() = [0.66447368 0.66225166]
Mean accuracy of  KNeighborsClassifier() = 66.34
---------------------------------------------------
Cross Validation Score of  RandomForestClassifier() = [0.84210526 0.79470199]
Mean accuracy of  RandomForestClassifier() = 81.84
---------------------------------------------------


In [12]:
models_list = [LogisticRegression(max_iter=10000), SVC(), KNeighborsClassifier(),RandomForestClassifier(random_state=0)]

In [98]:
model_hypertparameter = {
    
    'Logistic_Regression_hyperparameter':{
        
        'C':[1,5,10,20]
    },
    
    'SVC_hyperparameter':{
        
        'kernel':['linear','poly','rbf','sigmoid'],
        'C':[1,5,10,20]
    },
    
    'K_Neibghbors_hyperparameter':{
        
        'n_neighbors':[3,5,10]
    },
    
    'Random_forest_hyperparameter':{
        
        'n_estimators':[10,20,50,100]
    }
}

In [100]:
type(model_hypertparameter)

dict

In [102]:
model_hypertparameter.keys()

dict_keys(['Logistic_Regression_hyperparameter', 'SVC_hyperparameter', 'K_Neibghbors_hyperparameter', 'Random_forest_hyperparameter'])

In [104]:
model_hypertparameter['Logistic_Regression_hyperparameter']

{'C': [1, 5, 10, 20]}

In [106]:
model_keys = list(model_hypertparameter.keys())

In [108]:
model_keys[0]

'Logistic_Regression_hyperparameter'

In [110]:
model_hypertparameter[model_keys[0]]

{'C': [1, 5, 10, 20]}

In [116]:
def ModelSelection(list_models, hyperparameter_dictionary):
    result = []
    i = 0
    for model in list_models:
        key = model_keys[i]
        params = hyperparameter_dictionary[key]
        i+=1

        print(model)
        print(params)
        print("--------------------------------------------")

        classifier = GridSearchCV(model, params, cv=5)
        
        classifier.fit(X,Y)
        result.append({
            'model used': model,
            'highest score': classifier.best_score_,
            'best hyperparameters' : classifier.best_params_
        })
    result_dataframe = pd.DataFrame(result, columns=['model used', 'highest score', 'best hyperparameters'])
    return result_dataframe

In [118]:
ModelSelection(models_list, model_hypertparameter)

LogisticRegression(max_iter=10000)
{'C': [1, 5, 10, 20]}
--------------------------------------------
SVC()
{'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'C': [1, 5, 10, 20]}
--------------------------------------------
KNeighborsClassifier()
{'n_neighbors': [3, 5, 10]}
--------------------------------------------
RandomForestClassifier(random_state=0)
{'n_estimators': [10, 20, 50, 100]}
--------------------------------------------


Unnamed: 0,model used,highest score,best hyperparameters
0,LogisticRegression(max_iter=10000),0.831585,{'C': 5}
1,SVC(),0.828306,"{'C': 1, 'kernel': 'linear'}"
2,KNeighborsClassifier(),0.64388,{'n_neighbors': 5}
3,RandomForestClassifier(random_state=0),0.841421,{'n_estimators': 100}
