In [1]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

In [2]:
data = pd.read_csv('/content/heart.csv')

In [3]:
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [5]:
data.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.366337,0.683168,0.966997,131.623762,246.264026,0.148515,0.528053,149.646865,0.326733,1.039604,1.39934,0.729373,2.313531,0.544554
std,9.082101,0.466011,1.032052,17.538143,51.830751,0.356198,0.52586,22.905161,0.469794,1.161075,0.616226,1.022606,0.612277,0.498835
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,47.5,0.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,2.0,0.0
50%,55.0,1.0,1.0,130.0,240.0,0.0,1.0,153.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,274.5,0.0,1.0,166.0,1.0,1.6,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


In [6]:
X = data.drop('target', axis=1)
y = data['target']

In [7]:
X = np.asarray(X)
y = np.asarray(y)

In [8]:
models = [RandomForestClassifier(random_state=0), DecisionTreeClassifier(), KNeighborsClassifier(), LogisticRegression(max_iter=10000), SVC(kernel='linear')]

In [9]:
def cross():
  for model in models:
    model.fit(X, y)
    print('the model is ',model,'=' ,cross_val_score(model, X, y, cv=5).mean())

In [10]:
cross()

the model is  RandomForestClassifier(random_state=0) = 0.838087431693989
the model is  DecisionTreeClassifier() = 0.7687431693989071
the model is  KNeighborsClassifier() = 0.643879781420765
the model is  LogisticRegression(max_iter=10000) = 0.8282513661202187
the model is  SVC(kernel='linear') = 0.8283060109289618


In [11]:
models3 = [RandomForestClassifier(random_state=0), DecisionTreeClassifier(), KNeighborsClassifier(), LogisticRegression(max_iter=10000), SVC(kernel='linear')]

In [12]:
hyper_para = {
               'forest' : {'n_estimators': [10, 100, 1000],
                           'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
              'tree' : {'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
              'knn' : {'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
              'logis':{ 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]},
               'svc':{'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
                      'kernel': ['linear', 'poly', 'rbf', 'sigmoid']}
}

In [13]:
keyss = list(hyper_para.keys())

In [14]:
print(keyss)

['forest', 'tree', 'knn', 'logis', 'svc']


In [15]:
def model_selec(models3,hyper_para):
    result = []
    i = 0
    for model in models3:
        key = keyss[i]
        params = hyper_para[key]
        i += 1
        print(model)
        print(params)
        print('.............')
        gs = GridSearchCV(model, params, cv=5)
        gs.fit(X, y)
        result.append({
            'model': model,
            'best_score': gs.best_score_,
            'best_params': gs.best_params_
        })
    resultsdataframe = pd.DataFrame(result, columns=['model', 'best_score', 'best_params'])
    return resultsdataframe

In [16]:
model_selec(models3,hyper_para)

RandomForestClassifier(random_state=0)
{'n_estimators': [10, 100, 1000], 'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
.............
DecisionTreeClassifier()
{'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
.............
KNeighborsClassifier()
{'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
.............
LogisticRegression(max_iter=10000)
{'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}
.............
SVC(kernel='linear')
{'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'kernel': ['linear', 'poly', 'rbf', 'sigmoid']}
.............


Unnamed: 0,model,best_score,best_params
0,RandomForestClassifier(random_state=0),0.848142,"{'max_depth': 1, 'n_estimators': 1000}"
1,DecisionTreeClassifier(),0.805137,{'max_depth': 3}
2,KNeighborsClassifier(),0.64388,{'n_neighbors': 5}
3,LogisticRegression(max_iter=10000),0.831475,{'C': 0.1}
4,SVC(kernel='linear'),0.828306,"{'C': 1, 'kernel': 'linear'}"
