# Model training for NFL play prediction

---
 
In this notebook, we train multiple regression and ANN models to predict the yards gained per football play. As a basis we use the preselected data created by the `preprocessing.py``.

### Inline preprocessing steps:

---

As we use 5-fold cross-validation to protect the model against overfitting, we need a dynamic preprocessing appraoch. Therefore a pipeline provided by the `preprocessing.py`` will be used to process the training data of each fold. Further we use a nested cross validation to ensure the quality of our model using selected hyperparameters.


### Contributors

All contributors are only assigned to their primary task, the teams still interchanged know-how and worked on one anothers approaches.

---

##### Preprocessing Team

- Tim Oliver Krause (1689074)
- Jan Thilo Viktorin (1684159)
- Joël Pflomm (1634591)

##### Model Team

- Franziska Köllschen (1981780)
- Steffen Hüls (1979863)
- Matthias Biermanns (1980701)

## Coding

### Coding Foundations

In [None]:
# basic imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPRegressor
from sklearn.base import TransformerMixin, BaseEstimator
import preprocessing

# import for data visualization
import matplotlib.pyplot as plt

# imports for regression models
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import plot_tree
from sklearn.metrics import mean_squared_error, mean_absolute_error

import xgboost as xgb

In [None]:
# static variables
FILE_LIST = [f'./Data/play_by_play_{year}.csv' for year in range(1999, 2024)]
PREPROCESSOR = preprocessing.NFLPreprocessing(FILE_LIST)
TARGET_NAME = 'yards_gained'
RANDOM_STATE = 42
LABEL_PASS = 'Pass'
LABEL_RUN = 'Run'

##### General functions

In [None]:
def split_feature_target(df, data_fraction=1):
    # set fraction between 0 and 1 (e.g. 0.05 -> 5% df)
    df_sampled = df.sample(frac=data_fraction, random_state=RANDOM_STATE)

    # Reset the index if needed
    df_sampled = df_sampled.reset_index(drop=True)

    features = df_sampled.drop(TARGET_NAME, axis=1)
    target = df_sampled[TARGET_NAME]

    return features, target

def visualize_predicts(y_test, predictions, label):
    # Visualize predictions for passes
    plt.scatter(y_test, predictions)
    plt.title(label + ' Model: True vs Predicted Yards')
    plt.xlabel('True Yards')
    plt.ylabel('Predicted Yards')
    plt.show()

def plot_decision_tree(tree, label):
    # Plot the decision tree for runs
    plt.figure(figsize=(15, 10))
    plot_tree(tree, filled=True, feature_names=PREPROCESSOR.get_prepro_feature_names_from_pipeline())
    plt.title('Decision Tree for ' + label + ' Model')
    plt.show()

def visualize_train_val_loss(training_losses, validation_losses, label):
    # Plot the training and validation loss
    plt.plot(training_losses, label='Training Loss')
    plt.plot(validation_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Neural Network Training and Validation Loss - ' + label)
    plt.legend()
    plt.show()

def plot_feature_importances(pipeline, show_top_n=10):
    # Get feature importances
    feature_importances = pipeline.named_steps['regressor'].feature_importances_

    # Get the feature names after preprocessing
    columns = PREPROCESSOR.get_prepro_feature_names_from_pipeline()

    # Create a DataFrame to display feature importances
    feature_importance_df = pd.DataFrame({'Feature': columns, 'Importance': feature_importances})

    # Sort the DataFrame by importance in descending order
    feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

    # Plot the feature importances
    plt.figure(figsize=(10, 6))
    plt.barh(feature_importance_df['Feature'][:show_top_n], feature_importance_df['Importance'][:show_top_n])
    plt.xlabel('Importance')
    plt.title('Top {} Feature Importances'.format(show_top_n))
    plt.show()

def plot_coef(pipeline):
    coefs = pipeline.named_steps['regressor'].coef_

    # Get the feature names after preprocessing
    columns = PREPROCESSOR.get_prepro_feature_names_from_pipeline()

    coef = pd.DataFrame(coefs, columns=["Coefficients"], index=columns)
    coef.plot(kind="barh", figsize=(9, 7))
    plt.figure(figsize=(10, 6))
    plt.title("Ridge model")
    plt.axvline(x=0, color=".5")
    plt.subplots_adjust(left=0.3)

##### Functions for cross validation

In [None]:
def test_model(pipeline, x_train, y_train, x_test, y_test):
    pipeline.fit(x_train, y_train)

    # Make predictions on the test set
    predictions = pipeline.predict(x_test)

    # Evaluate the models
    pass_mse = mean_squared_error(y_test, predictions)
    pass_rmse = mean_squared_error(y_test, predictions, squared = False)
    pass_msa = mean_absolute_error(y_test, predictions)

    print(f"Mean Squared Error: {pass_mse}")
    print(f"Mean Absolute Error: {pass_msa}")
    print(f"Root Mean Squared Error: {pass_rmse}")
    return predictions

def test_model_k_fold(df, pipeline, data_fraction, k_folds=5):
    features, target = split_feature_target(df, data_fraction)
    
    cv_results = cross_val_predict(pipeline, features, target, cv=k_folds)
    #print(f"Run Model Cross-Validation Mean Squared Error: {np.mean(cv_results)}")
    #print(f"Run Model Cross-Validation Max Squared Error: {np.max(cv_results)}")
    return cv_results

def estimate_hyperparams(features, target, pipeline, scoring, k_folds=5, parameters={}):
    # create the grid search instance
    grid_search_estimator = GridSearchCV(pipeline, parameters, scoring=scoring, cv=k_folds, return_train_score=False)

    # run the grid search
    grid_search_estimator.fit(features, target)

    return grid_search_estimator

##### MLP Regressor Class - with history

In [None]:
class MLPWithHistory(BaseEstimator, TransformerMixin):
    def __init__(self, mlp_params=None):
        self.mlp_params = mlp_params
        self.training_losses = []
        self.validation_losses = []
        self.mlp_regressor = MLPRegressor(**(self.mlp_params or {}))

    def fit(self, X, y):
        X, X_val, y, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

        for epoch in range(self.mlp_regressor.max_iter):
            self.mlp_regressor.partial_fit(X, y)

            # Calculate training loss
            y_train_pred = self.mlp_regressor.predict(X)
            training_loss = mean_squared_error(y, y_train_pred)
            self.training_losses.append(training_loss)

            # Calculate validation loss
            y_val_pred = self.mlp_regressor.predict(X_val)
            validation_loss = mean_squared_error(y_val, y_val_pred)
            self.validation_losses.append(validation_loss)

        return self

    def transform(self, X):
        return X

    def predict(self, X):
        return self.mlp_regressor.predict(X)

    def get_params(self, deep=True):
        return {"mlp_params": self.mlp_params}

    def set_params(self, **params):
        self.mlp_params = params["mlp_params"]
        self.mlp_regressor.set_params(**self.mlp_params)
        return self
    
    def score(self, X, y):
        y_pred = self.predict(X)
        return -mean_squared_error(y, y_pred)

##### Train and Test Data

In [None]:
run_df = PREPROCESSOR.run_df
pass_df = PREPROCESSOR.pass_df

run_features, run_target = split_feature_target(run_df, 0.2)
pass_features, pass_target = split_feature_target(pass_df, 0.2)

run_X_train, run_X_test, run_y_train, run_y_test = train_test_split(run_features, run_target)
pass_X_train, pass_X_test, pass_y_train, pass_y_test = train_test_split(pass_features, pass_target)

print(run_features.shape)
print(pass_features.shape)

run_features

### Linear Regression

In [None]:
run_pipeline = PREPROCESSOR.make_preprocessing_pipeline()

run_pipeline

In [None]:
# make pipelines from preprocessing script
pipeline = PREPROCESSOR.make_preprocessing_pipeline(LinearRegression())

run_params = {
    'outlier_remover__kw_args': {
        'strict_factor_iqr': 1.0,
        'loose_factor_iqr': 2.0,
        'strict_columns': ['yardline_100', 'ydstogo'],
        'omit_columns': []
    }
}

pass_params = {
    'outlier_remover__kw_args': {
        'strict_factor_iqr': 1.5,
        'loose_factor_iqr': 3.0,
        'strict_columns': ['ydstogo'],
        'omit_columns': []
    }
}

# add model to pipeline
# pipeline.steps.append(("regressor", LinearRegression()))

# set params, test model and save predictions
pipeline = pipeline.set_params(**run_params)
run_predictions = test_model(pipeline, run_X_train, run_y_train, run_X_test, run_y_test)

plot_coef(pipeline)

# set params, test model and save predictions
pipeline = pipeline.set_params(**pass_params)
pass_predictions = test_model(pipeline, pass_X_train, pass_y_train, pass_X_test, pass_y_test)

# visualize predictions
visualize_predicts(run_y_test, run_predictions, LABEL_RUN)
visualize_predicts(pass_y_test, pass_predictions, LABEL_PASS)

In [None]:
# make pipelines from preprocessing script
pipeline = PREPROCESSOR.make_preprocessing_pipeline(LinearRegression())

run_cv_scores = cross_val_score(pipeline, run_features, run_target, cv=5, scoring='neg_mean_squared_error')
print(f"Run Model Cross-Validation Mean Squared Error: {-np.mean(run_cv_scores)}")


In [None]:
# hyperparameter tuning for linear regression

# could be more over engineered with automatic dict creation if necessary
run_params = {
    'outlier_remover__kw_args': [
        {
            'strict_factor_iqr': 1.0,
            'loose_factor_iqr': 2.0,
            'strict_columns': ['yardline_100', 'ydstogo'],
            'omit_columns': []
        }, {
            'strict_factor_iqr': 1.5,
            'loose_factor_iqr': 3.0,
            'strict_columns': ['yardline_100', 'ydstogo'],
            'omit_columns': []
        },
    ]
}

pass_params = {
    'outlier_remover__kw_args': [{
        'strict_factor_iqr': 1.5,
        'loose_factor_iqr': 3.0,
        'strict_columns': ['ydstogo'],
        'omit_columns': []
    }]
}

pipeline = PREPROCESSOR.make_preprocessing_pipeline(LinearRegression())

run_grid_search = estimate_hyperparams(run_features, run_target, pipeline, scoring='neg_mean_squared_error', parameters=run_params)
pass_grid_search = estimate_hyperparams(pass_features, pass_target, pipeline, scoring='neg_mean_squared_error', parameters=pass_params)

display(run_grid_search.best_params_)
display(pd.DataFrame(run_grid_search.cv_results_))
display(pass_grid_search.best_params_)
display(pd.DataFrame(pass_grid_search.cv_results_))

In [None]:
# make pipelines from preprocessing script
pipeline = PREPROCESSOR.make_preprocessing_pipeline(LinearRegression())

run_params = run_grid_search.best_params_
pass_params = pass_grid_search.best_params_

# set params, test model and save predictions
pipeline = pipeline.set_params(**run_params)
run_predictions = test_model(pipeline, run_X_train, run_y_train, run_X_test, run_y_test)

# set params, test model and save predictions
pipeline = pipeline.set_params(**pass_params)
pass_predictions = test_model(pipeline, pass_X_train, pass_y_train, pass_X_test, pass_y_test)

# visualize predictions
visualize_predicts(run_y_test, run_predictions, LABEL_RUN)
visualize_predicts(pass_y_test, pass_predictions, LABEL_PASS)

### Polynomial Regression

In [None]:
# basic model

# make pipelines from preprocessing script
run_pipeline = PREPROCESSOR.make_preprocessing_pipeline(make_pipeline(PolynomialFeatures(2), LinearRegression()))
pass_pipeline = PREPROCESSOR.make_preprocessing_pipeline(make_pipeline(PolynomialFeatures(2), LinearRegression()))

# test model and save predictions
run_predictions = test_model(run_pipeline, run_X_train, run_y_train, run_X_test, run_y_test)
pass_predictions = test_model(pass_pipeline, pass_X_train, pass_y_train, pass_X_test, pass_y_test)

# visualize predictions
visualize_predicts(run_y_test, run_predictions, LABEL_RUN)
visualize_predicts(pass_y_test, pass_predictions, LABEL_PASS)

In [None]:
# estimating hyperparameters

run_features, run_target = split_feature_target(run_df, 0.01)
pass_features, pass_target = split_feature_target(pass_df, 0.01)

# make new pipeliness from preprocessing script
run_pipeline = PREPROCESSOR.make_preprocessing_pipeline(make_pipeline(PolynomialFeatures(), LinearRegression()))
pass_pipeline = PREPROCESSOR.make_preprocessing_pipeline(make_pipeline(PolynomialFeatures(), LinearRegression()))

parameters = {
    'regressor__polynomialfeatures__degree': [2, 3]
}

run_grid_search = estimate_hyperparams(run_features, run_target, run_pipeline, 'neg_root_mean_squared_error', k_folds=3, parameters=parameters)
pass_grid_search = estimate_hyperparams(pass_features, pass_target, pass_pipeline, 'neg_root_mean_squared_error', k_folds=3, parameters=parameters)

display(run_grid_search.best_params_)
display(pass_grid_search.best_params_)

In [None]:
# cross-validation with best hyperparameters

run_features, run_target = split_feature_target(run_df, 0.01)
pass_features, pass_target = split_feature_target(pass_df, 0.01)

# make new pipeliness from preprocessing script
pipeline = PREPROCESSOR.make_preprocessing_pipeline(make_pipeline(PolynomialFeatures(), LinearRegression()))

run_params = run_grid_search.best_params_
pass_params = pass_grid_search.best_params_

pipeline = pipeline.set_params(**run_params)
run_cv_scores = cross_val_score(pipeline, run_features, run_target, cv=5, scoring='neg_mean_squared_error')
print(f"Run Model Cross-Validation Mean Squared Error: {-np.mean(run_cv_scores)}")

pipeline = pipeline.set_params(**pass_params)
pass_cv_scores = cross_val_score(pipeline, pass_features, pass_target, cv=5, scoring='neg_mean_squared_error')
print(f"Pass Model Cross-Validation Mean Squared Error: {-np.mean(pass_cv_scores)}")

### KNN

In [None]:
# Basic model

# make pipelines
run_pipeline = PREPROCESSOR.make_preprocessing_pipeline(KNeighborsRegressor(n_neighbors=3))
pass_pipeline = PREPROCESSOR.make_preprocessing_pipeline(KNeighborsRegressor(n_neighbors=3))

# test model and save predictions
run_predictions = test_model(run_pipeline, run_X_train, run_y_train, run_X_test, run_y_test)
pass_predictions = test_model(pass_pipeline, pass_X_train, pass_y_train, pass_X_test, pass_y_test)

# visualize predictions
visualize_predicts(run_y_test, run_predictions, LABEL_RUN)
visualize_predicts(pass_y_test, pass_predictions, LABEL_PASS)

In [None]:
# estimating hyperparameters

run_features, run_target = split_feature_target(run_df, 0.1)
pass_features, pass_target = split_feature_target(pass_df, 0.1)

# make new pipeliness from preprocessing script
run_pipeline = PREPROCESSOR.make_preprocessing_pipeline(KNeighborsRegressor())
pass_pipeline = PREPROCESSOR.make_preprocessing_pipeline(KNeighborsRegressor())

parameters = {
    'regressor__n_neighbors': range(1, 10)
}

run_grid_search = estimate_hyperparams(run_features, run_target, run_pipeline, 'neg_root_mean_squared_error', k_folds=3, parameters=parameters)
pass_grid_search = estimate_hyperparams(pass_features, pass_target, pass_pipeline, 'neg_root_mean_squared_error', k_folds=3, parameters=parameters)

display(run_grid_search.best_params_)
display(pass_grid_search.best_params_)

In [None]:
# cross-validation with best hyperparameters

run_features, run_target = split_feature_target(run_df)
pass_features, pass_target = split_feature_target(pass_df)

# make new pipeliness from preprocessing script
pipeline = PREPROCESSOR.make_preprocessing_pipeline(KNeighborsRegressor())

run_params = run_grid_search.best_params_
pass_params = pass_grid_search.best_params_

pipeline = pipeline.set_params(**run_params)
run_cv_scores = cross_val_score(run_pipeline, run_features, run_target, cv=5, scoring='neg_mean_squared_error')
print(f"Run Model Cross-Validation Mean Squared Error: {-np.mean(run_cv_scores)}")

pipeline = pipeline.set_params(**pass_params)
pass_cv_scores = cross_val_score(pass_pipeline, pass_features, pass_target, cv=5, scoring='neg_mean_squared_error')
print(f"Pass Model Cross-Validation Mean Squared Error: {-np.mean(pass_cv_scores)}")

### Random Forest

In [None]:
# Basic model

# make pipelines
from sklearn.pipeline import Pipeline

run_pipeline = PREPROCESSOR.make_preprocessing_pipeline(RandomForestRegressor(n_estimators=100, random_state=42, max_depth=5))
pass_pipeline = PREPROCESSOR.make_preprocessing_pipeline(RandomForestRegressor(n_estimators=100, random_state=42, max_depth=5))

# test model and save predictions
run_predictions = test_model(run_pipeline, run_X_train, run_y_train, run_X_test, run_y_test)
plot_feature_importances(run_pipeline)

pass_predictions = test_model(pass_pipeline, pass_X_train, pass_y_train, pass_X_test, pass_y_test)
plot_feature_importances(run_pipeline)

# visualize predictions
visualize_predicts(run_y_test, run_predictions, LABEL_RUN)
visualize_predicts(pass_y_test, pass_predictions, LABEL_PASS)

In [None]:
# estimating hyperparameters

run_features, run_target = split_feature_target(run_df, 0.01)
pass_features, pass_target = split_feature_target(pass_df, 0.01)

# make new pipeliness from preprocessing script
run_pipeline = PREPROCESSOR.make_preprocessing_pipeline(RandomForestRegressor())
pass_pipeline = PREPROCESSOR.make_preprocessing_pipeline(RandomForestRegressor())

parameters = {
    'regressor__max_depth': range(4, 7),
    'regressor__n_estimators': [20, 50, 100]
}

run_grid_search = estimate_hyperparams(run_features, run_target, run_pipeline, 'neg_root_mean_squared_error', k_folds=3, parameters=parameters)
pass_grid_search = estimate_hyperparams(pass_features, pass_target, pass_pipeline, 'neg_root_mean_squared_error', k_folds=3, parameters=parameters)

display(run_grid_search.best_params_)
display(pass_grid_search.best_params_)

In [None]:
# cross-validation with best hyperparameters

run_features, run_target = split_feature_target(run_df)
pass_features, pass_target = split_feature_target(pass_df)

# make new pipeliness from preprocessing script
pipeline = PREPROCESSOR.make_preprocessing_pipeline(RandomForestRegressor())

run_params = run_grid_search.best_params_
pass_params = pass_grid_search.best_params_

pipeline = pipeline.set_params(**run_params)
run_cv_scores = cross_val_score(pipeline, run_features, run_target, cv=5, scoring='neg_mean_squared_error')
print(f"Run Model Cross-Validation Mean Squared Error: {-np.mean(run_cv_scores)}")
pipeline = pipeline.set_params(**pass_params)
pass_cv_scores = cross_val_score(pipeline, pass_features, pass_target, cv=5, scoring='neg_mean_squared_error')
print(f"Pass Model Cross-Validation Mean Squared Error: {-np.mean(pass_cv_scores)}")

In [None]:
# visualize decision tree
# test model
test_model(run_pipeline, run_X_train, run_y_train, run_X_test, run_y_test)
test_model(pass_pipeline, pass_X_train, pass_y_train, pass_X_test, pass_y_test)

# Access a specific tree from the forest (e.g., the first tree)
tree_to_plot_pass = 0
tree_to_plot_run = 0

pass_rf_regressor = pass_pipeline.named_steps['regressor']
run_rf_regressor = run_pipeline.named_steps['regressor']

# Access the decision tree from the Random Forest
individual_tree_pass = pass_rf_regressor.estimators_[tree_to_plot_pass]
individual_tree_run = run_rf_regressor.estimators_[tree_to_plot_run]

# Plot the decision tree for passes
plot_decision_tree(individual_tree_pass, LABEL_PASS)

# Plot the decision tree for runs
plot_decision_tree(individual_tree_run, LABEL_RUN)

### XGBoost

In [None]:
# Basic model

# make pipelines
run_pipeline = PREPROCESSOR.make_preprocessing_pipeline(
    xgb.XGBRegressor(
        learning_rate = 0.022,
        n_estimators  = 1000,
        max_depth     = 8,
        eval_metric='rmsle'
                           )
)
pass_pipeline = PREPROCESSOR.make_preprocessing_pipeline(
    xgb.XGBRegressor(
        learning_rate = 0.015,
        n_estimators  = 1000,
        max_depth     = 8,
        eval_metric='rmsle'
        )
)

# test model and save predictions
run_predictions = test_model(run_pipeline, run_X_train, run_y_train, run_X_test, run_y_test)
pass_predictions = test_model(pass_pipeline, pass_X_train, pass_y_train, pass_X_test, pass_y_test)

plot_feature_importances(run_pipeline)
plot_feature_importances(pass_pipeline)

# visualize predictions
visualize_predicts(run_y_test, run_predictions, LABEL_RUN)
visualize_predicts(pass_y_test, pass_predictions, LABEL_PASS)

In [None]:
# estimating hyperparameters

run_features, run_target = split_feature_target(run_df, 0.05)
pass_features, pass_target = split_feature_target(pass_df, 0.05)

# make new pipeliness from preprocessing script
run_pipeline = PREPROCESSOR.make_preprocessing_pipeline(xgb.XGBRegressor())
pass_pipeline = PREPROCESSOR.make_preprocessing_pipeline(xgb.XGBRegressor())

parameters = {
    "regressor__max_depth":    [8, 10],
    "regressor__n_estimators": [1000, 1100],
    "regressor__learning_rate": [0.022, 0.015]
}

run_grid_search = estimate_hyperparams(run_features, run_target, run_pipeline, 'neg_root_mean_squared_error', k_folds=3, parameters=parameters)
pass_grid_search = estimate_hyperparams(pass_features, pass_target, pass_pipeline, 0.05, 'neg_root_mean_squared_error', k_folds=3, parameters=parameters)

display(run_grid_search.best_params_)
display(pass_grid_search.best_params_)

### Artificial Neuronal Network

In [None]:
# Basic model

run_pipeline = PREPROCESSOR.make_preprocessing_pipeline(
    MLPWithHistory(
        mlp_params={'hidden_layer_sizes': (50,),
  'activation': 'relu',
  'solver': 'adam',
  'max_iter': 100}
    )
)
pass_pipeline = PREPROCESSOR.make_preprocessing_pipeline(
    MLPWithHistory(
        mlp_params={'hidden_layer_sizes': (50,),
  'activation': 'relu',
  'solver': 'adam',
  'max_iter': 100}
    )
)

# Fit the pipelines
run_pipeline.fit(run_X_train, run_y_train)
pass_pipeline.fit(pass_X_train, pass_y_train)

run_mlp = run_pipeline.named_steps['regressor']
pass_mlp = pass_pipeline.named_steps['regressor']

# Plot the training and validation loss
visualize_train_val_loss(run_mlp.training_losses, run_mlp.validation_losses, LABEL_RUN)
visualize_train_val_loss(pass_mlp.training_losses, pass_mlp.validation_losses, LABEL_PASS)

# Evaluate the neural network for run plays
y_run_pred = run_pipeline.predict(run_X_test)
mse_run = mean_squared_error(run_y_test, y_run_pred)
print(f"Mean Squared Error (MSE) for run plays: {mse_run}")

# Evaluate the neural network for pass plays
y_pass_pred = pass_pipeline.predict(pass_X_test)
mse_pass = mean_squared_error(pass_y_test, y_pass_pred)
print(f"Mean Squared Error (MSE) for pass plays: {mse_pass}")


In [None]:
# estimating hyperparameters

run_features, run_target = split_feature_target(run_df, 0.01)
pass_features, pass_target = split_feature_target(pass_df, 0.01)

# make new pipeliness from preprocessing script
run_pipeline = PREPROCESSOR.make_preprocessing_pipeline(MLPWithHistory())
pass_pipeline = PREPROCESSOR.make_preprocessing_pipeline(MLPWithHistory())

parameters = {
    'regressor__mlp_params': [
        {'hidden_layer_sizes': (10,), 'activation': 'relu', 'solver': 'adam', 'max_iter': 100},
        {'hidden_layer_sizes': (50,), 'activation': 'relu', 'solver': 'adam', 'max_iter': 100},
        {'hidden_layer_sizes': (10, 5), 'activation': 'relu', 'solver': 'adam', 'max_iter': 100},
        {'hidden_layer_sizes': (20, 10), 'activation': 'relu', 'solver': 'adam', 'max_iter': 100},
    ]
}

run_estimator = estimate_hyperparams(run_features, run_target, run_pipeline, 'neg_root_mean_squared_error', k_folds=3, parameters=parameters)
pass_estimator = estimate_hyperparams(pass_features, pass_target, pass_pipeline, 'neg_root_mean_squared_error', k_folds=3, parameters=parameters)

display(run_estimator.best_params_)
display(pd.DataFrame(run_estimator.cv_results_))
display(pass_estimator.best_params_)
display(pd.DataFrame(pass_estimator.cv_results_))