In [None]:
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Criterion: just squared error beacuse it's much faster than the other options
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 100, num = 10)]
max_depth.append(None)
# Number of features to consider at every split
max_features = [1.0, 'sqrt', 'log2', None]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 3, 5]
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start= 10, stop = 100, num = 10)]
# Random state
random_state = [int(x) for x in np.linspace(start= 0, stop = 100, num = 10)]


# Create the random grid
random_grid = {'bootstrap': bootstrap,
               'max_depth': max_depth,
               'max_features': max_features,
               'min_samples_leaf': min_samples_leaf,
               'min_samples_split': min_samples_split,
               'n_estimators': n_estimators,
               'random_state': random_state
               }

In [None]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = RandomForestRegressor()
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator=rf, param_distributions= random_grid,
                              n_iter = 100, cv = 3, verbose=3, random_state=0, n_jobs=-1)

# Fit the random search model
rf_random.fit(X_train, y_train)

In [None]:
print("Best Params: ", rf_random.best_params_)

In [None]:
def evaluate_model(model, X_test, y_test):
    predictions = model.predict(X_test)
    errors = [100 * (abs(predictions[i] - y_test[i])/ y_test[i]) for i in range(min(len(predictions), len(y_test)))]
    count_good_predictions = sum(1 for i in errors if i <= 10)
    good_predictions = round(np.mean(100 * (count_good_predictions / len(errors))), 2)
    print('Percentage of predictions with less than 10 % deviation: ', good_predictions, '%.')

In [None]:
# Evaluating base model without Hyperparameter Tuning
base_model = RandomForestRegressor(n_estimators = 10, random_state = 0)
base_model.fit(X_train, y_train)
evaluate_model(base_model, X_test, y_test)

In [None]:
# Evaluating model with Hyperparameter Tuning
best_random = rf_random.best_estimator_
evaluate_model(best_random, X_test, y_test)