In [2]:
def train_the_model(X_train, y_train, X_test):
    
    """
    Generating the predicted values and the best model based on the training data
      
    Args:
        X_train: Pandas DataFrame
            The features of the training set.
        X_test: Pandas DataFrame
            The features of the test set.
        y_train: Pandas Series
            The target variable of the training set.
            
      Returns:
        y_pred: Numpy Array
            Contains the predicted values from the best model
        best_model_grid:
            Defined configuration using the hyperparameters.  
    """

    # Defining the parameter grid for random search
    param_grid_random = {
        'n_estimators': sp_randint(100, 500),
        'learning_rate': [0.1, 0.05, 0.01, 0.001],
        'max_depth': sp_randint(3, 10),
        'min_samples_split': sp_randint(2, 10),
        'min_samples_leaf': sp_randint(1, 5),
        'max_features': ['sqrt', 'log2', None]
    }

    model = GradientBoostingRegressor(random_state=126)

    # Creating a random search object
    random_search = RandomizedSearchCV(
        estimator=model,
        param_distributions=param_grid_random,
        n_iter=50,
        scoring='neg_mean_squared_error',
        cv=5,
        random_state=126
    )

    random_search.fit(X_train, y_train)

    # Getting the best hyperparameters and the best model from random search
    best_params_random = random_search.best_params_
    best_model_random = random_search.best_estimator_

    # Defining the parameter grid for grid search, using best_params_random
    param_grid_grid = {
        'n_estimators': [best_params_random['n_estimators'] - 50, best_params_random['n_estimators'],
                         best_params_random['n_estimators'] + 50],
        'learning_rate': [best_params_random['learning_rate']],
        'max_depth': [best_params_random['max_depth'] - 1],
        'min_samples_split': [best_params_random['min_samples_split'] + 2], 
        'min_samples_leaf': [best_params_random['min_samples_leaf'] + 1],
        'max_features': [best_params_random['max_features']]
}

    # Creating a grid search object, using best_model_random
    grid_search = GridSearchCV(
        estimator=best_model_random,
        param_grid=param_grid_grid,
        scoring='neg_mean_squared_error',
        cv=5
    )

    grid_search.fit(X_train, y_train)

    # Getting the best hyperparameters and the best model from grid search
    best_params_grid = grid_search.best_params_
    best_model_grid = grid_search.best_estimator_

    # Evaluating the best model from grid search on the test data
    y_pred = best_model_grid.predict(X_test)

    return y_pred, best_model_grid
    