In [2]:
import numpy as np
from prep_terrain_data import makeTerrainData

In [3]:
features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow"
### points mixed together--separate them so we can give them different colors
### in the scatterplot and identify them visually
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]

In [4]:
###parameters

n_estimators = [int(x) for x in np.linspace(start = 10, stop = 80, num = 10)]

max_features = ['auto','sqrt']

max_depth = [2,4]

min_sample_split = [2,5]

min_sample_leaf = [1,2]

bootstrap = [True, False]

#create parameter grid

param_grid = {
    'n_estimators' : n_estimators,

    'max_features' : max_features,

    'max_depth' : max_depth,

    'min_samples_split' : min_sample_split,

    'min_samples_leaf' : min_sample_leaf,

    'bootstrap' : bootstrap
}

In [5]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier()

from sklearn.model_selection import GridSearchCV

rfGrid = GridSearchCV(estimator=rf_model,param_grid=param_grid, cv=3, verbose = 2, n_jobs=4)

rfGrid.fit(features_train,labels_train)



Fitting 3 folds for each of 320 candidates, totalling 960 fits


GridSearchCV(cv=3, estimator=RandomForestClassifier(), n_jobs=4,
             param_grid={'bootstrap': [True, False], 'max_depth': [2, 4],
                         'max_features': ['auto', 'sqrt'],
                         'min_samples_leaf': [1, 2],
                         'min_samples_split': [2, 5],
                         'n_estimators': [10, 17, 25, 33, 41, 48, 56, 64, 72,
                                          80]},
             verbose=2)

In [6]:
print(rfGrid.best_params_)

print(rfGrid.best_score_)
print(rfGrid.score(features_test,labels_test))

{'bootstrap': False, 'max_depth': 4, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 72}
0.9626666666666667
0.924


In [12]:
from sklearn.neighbors import KNeighborsClassifier

rf_model = KNeighborsClassifier()

param_grid = {

    'n_neighbors' : [int(x) for x in range(2,20)],

    'weights' : ['uniform', 'distance'],

    'algorithm' : ['auto', 'ball_tree', 'kd_tree', 'brute'],

    'leaf_size' :[int(x) for x in np.linspace(start = 10, stop = 50, num = 5)],

    'p' : [1,2]

}

In [13]:
from sklearn.model_selection import GridSearchCV

rfGrid = GridSearchCV(estimator=rf_model,param_grid=param_grid, cv=3, verbose = 2, n_jobs=4)

rfGrid.fit(features_train,labels_train)

Fitting 3 folds for each of 1440 candidates, totalling 4320 fits


GridSearchCV(cv=3, estimator=KNeighborsClassifier(), n_jobs=4,
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'leaf_size': [10, 20, 30, 40, 50],
                         'n_neighbors': [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
                                         14, 15, 16, 17, 18, 19],
                         'p': [1, 2], 'weights': ['uniform', 'distance']},
             verbose=2)

In [14]:
print(rfGrid.best_params_)

print(rfGrid.best_score_)
print(rfGrid.score(features_test,labels_test))

{'algorithm': 'auto', 'leaf_size': 10, 'n_neighbors': 10, 'p': 1, 'weights': 'uniform'}
0.9666666666666667
0.94


In [15]:
param_grid = {

    'n_neighbors' : [int(x) for x in range(2,20)],

    'weights' : ['uniform', 'distance'],

    'algorithm' : ['auto', 'ball_tree', 'kd_tree', 'brute'],

    'leaf_size' :[int(x) for x in range(2,10)],

    'p' : [1,2]

}

rfGrid = GridSearchCV(estimator=rf_model,param_grid=param_grid, cv=3, verbose = 2, n_jobs=4)

rfGrid.fit(features_train,labels_train)

Fitting 3 folds for each of 2304 candidates, totalling 6912 fits


GridSearchCV(cv=3, estimator=KNeighborsClassifier(), n_jobs=4,
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'leaf_size': [2, 3, 4, 5, 6, 7, 8, 9],
                         'n_neighbors': [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
                                         14, 15, 16, 17, 18, 19],
                         'p': [1, 2], 'weights': ['uniform', 'distance']},
             verbose=2)

In [16]:
print(rfGrid.best_params_)

print(rfGrid.best_score_)
print(rfGrid.score(features_test,labels_test))

{'algorithm': 'auto', 'leaf_size': 2, 'n_neighbors': 10, 'p': 1, 'weights': 'uniform'}
0.9666666666666667
0.94


In [20]:
from sklearn.ensemble import AdaBoostClassifier

rf_model = AdaBoostClassifier()

In [21]:
param_grid = {

    'n_estimators' : [30,50,70],

    'learning_rate' : [int(x) for x in range(1,5)],

    'algorithm' : ['SAMME', 'SAMME.R'],

    'random_state' : [1,'RandomState',None]

}

In [22]:
rfGrid = GridSearchCV(estimator=rf_model,param_grid=param_grid, cv=3, verbose = 2, n_jobs=4)

rfGrid.fit(features_train,labels_train)

Fitting 3 folds for each of 72 candidates, totalling 216 fits


 0.95333333        nan 0.95333333 0.876             nan 0.876
 0.87066667        nan 0.87066667 0.92666667        nan 0.92666667
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
 0.95333333        nan 0.95333333 0.95066667        nan 0.95066667
 0.94533333        nan 0.94533333 0.924             nan 0.924
 0.94133333        nan 0.94133333 0.93866667        nan 0.93733333
 0.63066667        nan 0.63066667 0.79333333        nan 0.79333333
 0.804             nan 0.81466667 0.79866667        nan 0.796
 0.79866667        nan 0.796      0.79866667        nan 0.78266667]


GridSearchCV(cv=3, estimator=AdaBoostClassifier(), n_jobs=4,
             param_grid={'algorithm': ['SAMME', 'SAMME.R'],
                         'learning_rate': [1, 2, 3, 4],
                         'n_estimators': [30, 50, 70],
                         'random_state': [1, 'RandomState', None]},
             verbose=2)

In [23]:
print(rfGrid.best_params_)

print(rfGrid.best_score_)
print(rfGrid.score(features_test,labels_test))

{'algorithm': 'SAMME', 'learning_rate': 1, 'n_estimators': 70, 'random_state': 1}
0.9533333333333333
0.924
