**Importing the Dataset**

In [1]:
import pandas as pd
import numpy as np

data = pd.read_csv('/content/heart.csv')

**First 5 rows of dataset**

In [2]:
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


**Last 5 rows of dataset**

In [3]:
data.tail()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0
302,57,0,1,130,236,0,0,174,0,0.0,1,1,2,0


**Shape of dataset**

In [4]:
data.shape

(303, 14)

**Checking for null values**

In [5]:
data.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [6]:
data['target'].value_counts()

target
1    165
0    138
Name: count, dtype: int64

**Splitting the dataset**

In [7]:
X = data.drop(['target'], axis=1)
Y = data['target']

In [8]:
X

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3


In [9]:
Y

0      1
1      1
2      1
3      1
4      1
      ..
298    0
299    0
300    0
301    0
302    0
Name: target, Length: 303, dtype: int64

In [10]:
X = np.asarray(X)
Y = np.asarray(Y)

In [11]:
X

array([[63.,  1.,  3., ...,  0.,  0.,  1.],
       [37.,  1.,  2., ...,  0.,  0.,  2.],
       [41.,  0.,  1., ...,  2.,  0.,  2.],
       ...,
       [68.,  1.,  0., ...,  1.,  2.,  3.],
       [57.,  1.,  0., ...,  1.,  1.,  3.],
       [57.,  0.,  1., ...,  1.,  1.,  2.]])

In [12]:
Y

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

**Models as an array**

In [14]:
models = [
    LogisticRegression(max_iter=10000),
    SVC(max_iter=10000),
    KNeighborsClassifier(),
    RandomForestClassifier()
]


**Model hyperparameters**

In [15]:
model_hyperparameters = {
    'logistic': {
        'C' : [1, 5, 10, 20, 25]
    },
    'svm' : {
        'kernel' : ['linear', 'poly', 'rbf', 'sigmoid'],
        'C' : [1, 5, 10, 20, 25]
    },
    'knn' : {
        'n_neighbors' : [2, 3, 5, 8, 10]
    },
    'rf' : {
        'n_estimators' : [5, 10 ,25, 50, 100]
    }
}


In [16]:
model_keys = list(model_hyperparameters.keys())
print(model_keys)

['logistic', 'svm', 'knn', 'rf']


In [17]:
from sklearn.model_selection import GridSearchCV, cross_val_score

In [18]:
def Model_Selection(models, model_hyperparameters):
  result = []
  i = 0
  for model in models:
    key = model_keys[i]
    params = model_hyperparameters[key]
    i+=1
    print(model)
    print(params)
    print("-----_____-----_____-----_____-----_____-----_____")

    classifier = GridSearchCV(model, params, cv=5)
    classifier.fit(X, Y)

    result.append({
        'Model used' : model,
        'Highest score' : classifier.best_score_,
        'Best Hyperparameters' : classifier.best_params_
    })

  result_dataframe = pd.DataFrame(result, columns = ['Model used', 'Highest score', 'Best Hyperparameters'])
  return result_dataframe


In [19]:
Model_Selection(models, model_hyperparameters)

LogisticRegression(max_iter=10000)
{'C': [1, 5, 10, 20, 25]}
-----_____-----_____-----_____-----_____-----_____
SVC(max_iter=10000)
{'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'C': [1, 5, 10, 20, 25]}
-----_____-----_____-----_____-----_____-----_____




KNeighborsClassifier()
{'n_neighbors': [2, 3, 5, 8, 10]}
-----_____-----_____-----_____-----_____-----_____
RandomForestClassifier()
{'n_estimators': [5, 10, 25, 50, 100]}
-----_____-----_____-----_____-----_____-----_____


Unnamed: 0,Model used,Highest score,Best Hyperparameters
0,LogisticRegression(max_iter=10000),0.831585,{'C': 5}
1,SVC(max_iter=10000),0.778415,"{'C': 1, 'kernel': 'linear'}"
2,KNeighborsClassifier(),0.64388,{'n_neighbors': 5}
3,RandomForestClassifier(),0.824863,{'n_estimators': 50}
