# Grid Search CV

In [1]:
import pandas as pd
import numpy as np

In [2]:
import seaborn as sns

In [3]:
df=sns.load_dataset("titanic")

# Parameters
## model.get_params()


In [5]:
from sklearn.linear_model import LinearRegression
model=LinearRegression()

In [7]:
model.get_params()

{'copy_X': True,
 'fit_intercept': True,
 'n_jobs': None,
 'normalize': False,
 'positive': False}

In [8]:
from sklearn.ensemble import RandomForestClassifier
model=RandomForestClassifier()

In [10]:
model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

# Grid Search CV
## Method to select the best model with best params

In [12]:
from sklearn.model_selection import GridSearchCV

In [13]:
df.columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town',
       'alive', 'alone'],
      dtype='object')

In [15]:
X=df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y= df["survived"]

In [17]:
X=pd.get_dummies(X,columns=["sex"])
X

Unnamed: 0,pclass,age,sibsp,parch,fare,sex_female,sex_male
0,3,22.0,1,0,7.2500,0,1
1,1,38.0,1,0,71.2833,1,0
2,3,26.0,0,0,7.9250,1,0
3,1,35.0,1,0,53.1000,1,0
4,3,35.0,0,0,8.0500,0,1
...,...,...,...,...,...,...,...
886,2,27.0,0,0,13.0000,0,1
887,1,19.0,0,0,30.0000,1,0
888,3,,1,2,23.4500,1,0
889,1,26.0,0,0,30.0000,0,1


In [18]:
X.columns

Index(['pclass', 'age', 'sibsp', 'parch', 'fare', 'sex_female', 'sex_male'], dtype='object')

In [20]:
X.age.fillna(X.age.mean(),inplace=True)

In [21]:
X.isnull().sum()

pclass        0
age           0
sibsp         0
parch         0
fare          0
sex_female    0
sex_male      0
dtype: int64

In [22]:
y.shape

(891,)

#### Model

In [23]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [38]:
param_grid = {'n_neighbors': [3,5,7,9]} # Odd Neighbours to easily select one class only, even neighbours may lead to two classes

In [39]:
gridCV = GridSearchCV(model, param_grid, cv=5 , scoring="accuracy") #CV = cross validation

In [40]:
gridCV.fit(X,y)

GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': [3, 5, 7, 9]}, scoring='accuracy')

In [43]:
print("Best Parameters : ",gridCV.best_params_)
print("Best Score : ",gridCV.best_score_)

Best Parameters :  {'n_neighbors': 5}
Best Score :  0.7138597702592431


## 2nd Try

In [47]:
param_grid = {'n_neighbors': [3,5,7,9,11], 'weights': ['uniform','distance']}
gridCV = GridSearchCV(model, param_grid, cv=5 , scoring="accuracy") #CV = cross validation

In [48]:
gridCV.fit(X,y)

GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': [3, 5, 7, 9, 11],
                         'weights': ['uniform', 'distance']},
             scoring='accuracy')

In [49]:
print("Best Parameters : ",gridCV.best_params_)
print("Best Score : ",gridCV.best_score_)

Best Parameters :  {'n_neighbors': 11, 'weights': 'distance'}
Best Score :  0.7183792605611701
