# GRID Search CV With Python

In [2]:
import pandas as pd

from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split

from sklearn.model_selection import GridSearchCV

from sklearn import metrics

import warnings
from sklearn import svm

warnings.filterwarnings('ignore')
from sklearn.linear_model import LogisticRegression

In [18]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,chest pain type,resting blood pressure,serum cholestoral in mg/dl,fasting blood sugar,resting electrocardiographic results,maximum heart rate achieved,exercise induced angina,ST depression induced by exercise relative to rest,the slope of the peak exercise ST segment,number of major vessels (0-3) colored by flourosopy,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [19]:
X = df.drop('target', axis = 1)

y = df['target']

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

In [21]:
rfc = RandomForestClassifier(max_depth=3,max_features=5)

In [22]:
rfc.fit(X_train,y_train)

RandomForestClassifier(max_depth=3, max_features=5)

In [23]:
rfc.score(X_test, y_test)

0.8181818181818182

In [24]:
logi=LogisticRegression()
logi.fit(X_train,y_train)
logi.score(X_test, y_test)

0.8116883116883117

In [25]:
svc = svm.SVC(kernel='linear',C=3,gamma='auto')
svc.fit(X_train,y_train)
svc.score(X_test, y_test)

0.8116883116883117

In [26]:
forest_params = [{'max_depth': list(range(10, 15)), 'max_features': list(range(0,5))}]

In [27]:
clf = GridSearchCV(rfc, forest_params, cv = 10, scoring='accuracy')

In [28]:
clf.fit(X_train, y_train)
print(clf.best_params_)

{'max_depth': 10, 'max_features': 3}


In [29]:
rfc1=RandomForestClassifier(max_depth=10,max_features=3)

In [30]:
rfc1.fit(X_train, y_train)

RandomForestClassifier(max_depth=10, max_features=3)

In [31]:
rfc1.score(X_test,y_test)

0.9902597402597403

In [32]:
model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto'),
        'params' : {
            'C': [1,10,20],
            'kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
           'max_depth': list(range(10, 15)), 
            'max_features': list(range(0,5))
        }
    },
     'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'C': [1,5,10]
        }
    }
}

In [33]:
scores=[]
for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(X_train,y_train)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.938646,"{'C': 1, 'kernel': 'rbf'}"
1,random_forest,0.973475,"{'max_depth': 11, 'max_features': 1}"
2,logistic_regression,0.849378,{'C': 5}
