In [None]:
from sklearn.datasets import load_breast_cancer

In [None]:
X, y = load_breast_cancer(return_X_y=True)

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
scores = cross_val_score(KNeighborsClassifier(n_neighbors=5), X, y, cv=5)
scores

array([0.88596491, 0.93859649, 0.93859649, 0.94736842, 0.92920354])

In [None]:
import numpy as np
np.round(scores,2)

array([0.89, 0.94, 0.94, 0.95, 0.93])

In [None]:
round(np.mean(scores),2)

0.93

In [None]:
round(np.std(scores),2)

0.02

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
scores = cross_val_score(DecisionTreeClassifier(), X, y, cv=5)
np.round(scores,2)

array([0.91, 0.89, 0.93, 0.95, 0.89])

In [None]:
round(np.mean(scores),2)

0.92

In [None]:
round(np.std(scores),2)

0.02

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
scores = cross_val_score(RandomForestClassifier(), X, y, cv=5)
np.round(scores,2)

array([0.92, 0.95, 0.98, 0.98, 0.98])

In [None]:
round(np.mean(scores),2)

0.96

In [None]:
round(np.std(scores),2)

0.03

# Manual Grid search

In [None]:
X, y = load_breast_cancer(return_X_y=True)

In [None]:
from sklearn.model_selection import train_test_split
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, random_state=0)

In [None]:
neighbor_list = range(1,22,2)

best_val_score = 0
best_param = None

for nn in neighbor_list:
  knn = KNeighborsClassifier(n_neighbors=nn)
  knn.fit(X_train, y_train)
  val_score = knn.score(X_val, y_val)

  if val_score > best_val_score :
    best_val_score = val_score
    best_param = nn

print(f'Best Parameter: n_neighbors={best_param}')
print(f'Best Validation Score = {round(best_val_score)} ')

knn = KNeighborsClassifier(n_neighbors=best_param)
knn.fit(X_train_val, y_train_val)
test_score = knn.score(X_test, y_test)

print(f'Test score of best model is: {round(test_score,2)}')

Best Parameter: n_neighbors=5
Best Validation Score = 1 
Test score of best model is: 0.94


In [None]:
neighbor_list = range(1,22,2)

best_val_score = 0
best_param = None

for nn in neighbor_list:
  knn = KNeighborsClassifier(n_neighbors=nn)
  val_score = np.mean(cross_val_score(knn, X_train_val, y_train_val))

  if val_score > best_val_score :
    best_val_score = val_score
    best_param = nn

print(f'Best Parameter: n_neighbors={best_param}')
print(f'Best Validation Score = {round(best_val_score,2)} ')

knn = KNeighborsClassifier(n_neighbors=best_param)
knn.fit(X_train_val, y_train_val)
test_score = knn.score(X_test, y_test)

print(f'Test score of best model is: {round(test_score,2)}')



Best Parameter: n_neighbors=1
Best Validation Score = 0.01 
Test score of best model is: 0.01




# sklearn grid search

In [None]:
from sklearn.datasets import load_diabetes
X, y = load_diabetes(return_X_y=True)
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, random_state=0)

In [None]:
X.shape, y.shape

((442, 10), (442,))

In [None]:
param_grid = { 'max_depth':range(1,11), 'n_estimators':[50,100,200,300] }

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
gs = GridSearchCV( RandomForestRegressor(), param_grid)

In [None]:
gs.fit(X_train_val, y_train_val)

In [None]:
gs.best_params_

{'max_depth': 5, 'n_estimators': 50}

In [None]:
gs.score(X_test, y_test)

0.2385704563428076