In [5]:
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.dummy import DummyRegressor
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [6]:
# Load the diabetes dataset
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

In [7]:
# Train test split.
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
)

## Nivel 2

In [10]:
# Define the models and parameters for GridSearchCV
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', DummyRegressor()),
])

param_grid = [
    {
        'model': [DummyRegressor()],
    },
    {
        'model': [RandomForestRegressor()],
        'model__n_estimators': [50, 100, 200],
        'model__max_depth': [None, 10, 20, 30]
    },
    {
        'model': [Ridge()],
        'model__alpha': [0.1, 1.0, 10.0, 100.0]
    },
]

# Perform GridSearchCV
grid_search = GridSearchCV(
    pipeline,
    param_grid,
    cv=5,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
)
grid_search.fit(X_train, y_train)

# Output the best parameters and the best score
print("Best parameters found: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

Best parameters found:  {'model': Ridge(), 'model__alpha': 10.0}
Best score:  -3127.99719853687


In [20]:
import numpy as np

In [21]:
N = len(y_train) // 5

for params, m, s in sorted(
        zip(
            grid_search.cv_results_['params'],
            grid_search.cv_results_['mean_test_score'],
            grid_search.cv_results_['std_test_score'],
        ), key=lambda x: x[1], reverse=True):
    print(m, s/np.sqrt(N), params)

-3127.99719853687 35.464230375076426 {'model': Ridge(), 'model__alpha': 10.0}
-3135.5262085947866 39.63988117689308 {'model': Ridge(), 'model__alpha': 1.0}
-3141.6672925900157 42.072395408706726 {'model': Ridge(), 'model__alpha': 0.1}
-3170.084337729107 36.149675163441344 {'model': Ridge(), 'model__alpha': 100.0}
-3376.1632581763506 65.32103755693501 {'model': RandomForestRegressor(), 'model__max_depth': 10, 'model__n_estimators': 200}
-3408.6992795466367 80.35876557136498 {'model': RandomForestRegressor(), 'model__max_depth': 10, 'model__n_estimators': 50}
-3415.6307570139165 65.62014799801041 {'model': RandomForestRegressor(), 'model__max_depth': 20, 'model__n_estimators': 200}
-3419.0997299154938 66.05839901626493 {'model': RandomForestRegressor(), 'model__max_depth': 30, 'model__n_estimators': 50}
-3424.162784920524 68.59814344996597 {'model': RandomForestRegressor(), 'model__max_depth': 30, 'model__n_estimators': 200}
-3443.8079333521127 67.69920038635452 {'model': RandomForestReg