In [1]:
# set up working catalog
import sys
from pathlib import Path
project_path = str(Path().cwd().parent.parent.resolve())
if project_path not in sys.path:
    sys.path.append(project_path)
    
# imports
from common.utils import get_datasets, X_TRAIN, X_TEST, Y_TRAIN, Y_TEST

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [2]:
datasets = get_datasets()

In [3]:
model = RandomForestClassifier()

param_grid = {
    "n_estimators": [100, 200, 300, 500],
    "max_depth": [30, 50, None],
    "min_samples_split": [2, 3],
    "min_samples_leaf": [1, 2],
    "bootstrap": [True, False]
}

grid = GridSearchCV(model, param_grid, cv=5, scoring="accuracy", n_jobs=-1)

for dataset in datasets:
    grid.fit(dataset[X_TRAIN], dataset[Y_TRAIN])
    
    print(f"Best params: {grid.best_params_}")
    print(f"Train accuracy: {grid.score(dataset[X_TRAIN], dataset[Y_TRAIN])}")
    print(f"Test accuracy: {grid.score(dataset[X_TEST], dataset[Y_TEST])}")
    print()

Best params: {'bootstrap': False, 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 3, 'n_estimators': 200}
Train accuracy: 0.9871142760257714
Test accuracy: 0.7667796610169492

Best params: {'bootstrap': False, 'max_depth': 50, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
Train accuracy: 1.0
Test accuracy: 0.7722033898305085

Best params: {'bootstrap': False, 'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
Train accuracy: 1.0
Test accuracy: 0.7876526458616011

