## Preliminaries


This section imports the necessary library dependencies and sets the configurations for the randomizing seed value and number of cross-fold validation to be utilized.


In [None]:
import numpy as np
import pandas as pd
import os
import joblib

In [None]:
# set the randomizing seed value
random_state = 100
# set the number for the cross-fold validation
cv_folds = 5

if not os.path.isdir("model_dumps"):
    os.makedirs("model_dumps")

The cell below downloads our existing dataset.


In [None]:
from scripts.utils import download_weight
download_weight("./min_epsilon_dataset.csv", "https://drive.google.com/uc?export=download&id=14qjnVqEqE0pqzsYsjuHIQCGW7emyVazu")

This notebook trains and saves the weights of IARM. To load new weights, create an instance of an IARM object and set the weight location using the parameter `weight_loc`.


## Data Loading


This reads and stores the downloaded csv file to a variable.


In [None]:
# set the file name of the csv file that will be used for training here
CSV_FILENAME = "min_epsilon_dataset.csv"
df_features = pd.read_csv(CSV_FILENAME)
df_features.head()

## Feature Transformation


The cells below `string` datatypes into `numeric` numeric datatypes.


In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [None]:
encoder = LabelEncoder()
categorical_columns = df_features.select_dtypes(include=[bool, object]).columns
encoded_columns = df_features[categorical_columns].apply(encoder.fit_transform)
encoded_columns

In [None]:
df_encoded_features = df_features.copy()
df_encoded_features[categorical_columns] = encoded_columns
df_encoded_features

## Features and Labels


Split into X (features) and Y (labels).


In [None]:
feat_regions = ["bbox", "mask"]
non_color_features = ["w", "h", "x", "y", "obj_score", "class_score"]

color_channels = {
    "RGB": ("R_BIN_", "G_BIN_", "B_BIN_"),
    "HSV": ("H_HSV_BIN_", "S_HSV_BIN_", "V_HSV_BIN_"),
    "HSL": ("H_HSL_BIN_", "S_HSL_BIN_", "L_HSL_BIN_"),
    "LAB": ("L_LAB_BIN_", "A_LAB_BIN_", "B_LAB_BIN_"),
    "YCBCR": ("Y_BIN_", "CR_BIN_", "CB_BIN_"),
}

label_regions = ["lbbox", "bbox", "face"]

In [None]:
# this function generates the list of features with its corresponding bins given the configuration of color_space, region, label_model, and label_region.
def get_features_and_label(color_space, region, label_model="yf", label_region=None):
    if label_region is None:
        label_region = region if region != "mask" else "face"
        
    features = list(non_color_features)
    for color_channel in color_channels[color_space]: 
        features += [color_channel + region + "_" + str(i) for i in range(26)]
    features += ["LBP_BIN_" + region + "_" + str(i) for i in range(26)]
    features += ["SOBELX_BIN_" + region + "_" + str(i) for i in range(20)]
    features += ["SOBELY_BIN_" + region + "_" + str(i) for i in range(20)]
    features += ["SOBEL_BIN_" + region + "_" + str(i) for i in range(20)]
    
    return features, "e_" + label_region + "_" + label_model

## Base Models


### Model Evaluation Functions


This imports the regression models to be considered (i.e. `RandomForestRegressor`, `SVR`, and `MLPRegressor`).


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import make_scorer

The function is the definition for computing the root mean squared error function defined as $\sqrt{\bar{(\hat{y}-y)^2}}$


In [None]:
def rmse(actual, predictions):
    return np.sqrt(np.mean(np.square(predictions - actual)))

The function is the definition for computing the mean absolute error function defined as $\bar{|\hat{y}-y|}$


In [None]:
def mae(actual, predictions):
    return np.mean(np.abs(predictions - actual))

The function is the definition for computing the mean positive error (MPE). The mean positive error is defined as the average error while disregarding underpredictions.


In [None]:
def positive_error(actual, pred):
    total = 0
    count = 0
    for a,b in zip(pred, actual):
        if a > b:
            total += a - b
            count += 1
    if count == 0:
        return 0
    return total / count

The function is the definition for computing the concealment ratio (CR) which denotes the count of successfully predicted concealed faces over the entire count of actual concealed faces.


In [None]:
def concealment_ratio(actual, pred):
    count = 0
    for a, b in zip(pred, actual):
        if a >= b:
            count+= 1
            
    return count / len(actual)

The function defines a custom scorer using MPE and CR as its evaluation metrics. It is used when comparing different models trained with different hyperparameters. It has a penalty value which is used to balance whether MPE or CR is given more priority when scoring.


In [None]:
def custom_scorer(y_true, y_pred, penalty=1):
    pmae = positive_error(y_true, y_pred)
    concealment = concealment_ratio(y_true, y_pred)
    
    pmae_target = np.mean(y_train)
    
    pmae_norm = pmae / pmae_target
    
    return concealment - (pmae_norm * penalty)
    
concealment_scorer = make_scorer(custom_scorer, greater_is_better=True)

## Model Training w/ Hyperparameter Tuning + Feature Selection


Select which labels and which color space attributes to use for training


In [None]:
CHOSEN_COLOR_SPACE = "HSV" #<-- pick a colorspace
CHOSEN_REGION = "bbox" #lbbox next

features, _ = get_features_and_label(CHOSEN_COLOR_SPACE, CHOSEN_REGION)
label = "e_face_yf" #"e_bbox_mp"  #<-- pick label

X_features =  df_encoded_features.loc[:,  features]
y_features = df_encoded_features.loc[:, label].values

# train-test split
X_train, X_test, y_train, y_test = train_test_split(X_features, y_features, test_size = 0.2, random_state=random_state)

In [None]:
# from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.base import clone as clone_model

### Random Forest


#### Bayesian Optimization


This defines the search space configurations to be used for the `Bayesian Optimization` search for `RandomForestRegressor`.


In [None]:
search_space = {
    "n_estimators": Integer(100, 350),
    "criterion": Categorical(["squared_error", "absolute_error", "friedman_mse", "poisson"]),
    "max_depth": Integer(1, 300),
    "min_samples_split": Integer(2, 32),
    "min_samples_leaf": Integer(1, 20),
    "max_features": Categorical([None, "sqrt", "log2", 0.25, 0.5, 0.75]),
    "max_leaf_nodes": Integer(50, 300),
    "min_impurity_decrease": Real(0.0, 2.0),
    "bootstrap": Categorical([False, True]),
    "ccp_alpha": Real(0.0, 2.0),
}

This trains a random forest regressor while also performing hyperparameter tuning.


In [None]:
rfr = RandomForestRegressor()
bo_rfr = BayesSearchCV(rfr, search_space, n_iter=50, n_jobs=-1, cv=cv_folds, random_state=random_state, verbose=2, scoring=concealment_scorer)
bo_rfr.fit(X_train, y_train)

joblib.dump(bo_rfr, "model_dumps/rfr_tuned_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

This computes for the performance of the trained model using the defined performance metrics.


In [None]:
print('Best parameters found:\n', bo_rfr.best_params_)

print('Test')
y_pred = bo_rfr.predict(X_test)
print("RMSE: ", rmse(y_test, y_pred))
print("MAE: ", mae(y_test, y_pred))
print("Positive Error", positive_error(y_test, y_pred))
print("Concealment Ratio", concealment_ratio(y_test, y_pred))

print('Train')
y_pred = bo_rfr.predict(X_train)
print("RMSE: ", rmse(y_train, y_pred))
print("MAE: ", mae(y_train, y_pred))
print("Positive Error", positive_error(y_train, y_pred))
print("Concealment Ratio", concealment_ratio(y_train, y_pred))

### Feature Selection


The cell below illustrates method for training the model with a subset of the data. It selects the subsets of data to train with by select the top K best features determined using `f_regression` statistical analysis. The value of K is treated as a hyperparameter to be determined by the Bayesian Optimization search.


In [None]:
rfr = RandomForestRegressor()
pipe = Pipeline([('selector', SelectKBest(f_regression)), ('rfr', rfr)])

search_space = {
    "selector__k": Integer(X_train.shape[1] // 2, X_train.shape[1] - 1),
    "rfr__n_estimators": Integer(100, 350),
    "rfr__criterion": Categorical(["squared_error", "absolute_error", "friedman_mse", "poisson"]),
    "rfr__max_depth": Integer(1, 300),
    "rfr__min_samples_split": Integer(2, 32),
    "rfr__min_samples_leaf": Integer(1, 20),
    "rfr__max_features": Categorical([None, "sqrt", "log2", 0.25, 0.5, 0.75]),
    "rfr__max_leaf_nodes": Integer(50, 300),
    "rfr__min_impurity_decrease": Real(0.0, 2.0),
    "rfr__bootstrap": Categorical([False, True]),
    "rfr__ccp_alpha": Real(0.0, 2.0),
}

bo_rfr = BayesSearchCV(pipe, search_space, n_iter=75, n_jobs=-1, cv=cv_folds, random_state=random_state, scoring=concealment_scorer)
bo_rfr.fit(X_train, y_train)
joblib.dump(bo_rfr, "model_dumps/rfr_tunedfs_bo_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

selected_feat = bo_rfr.best_estimator_.named_steps["selector"].get_support()
best_rfr = clone_model(bo_rfr.best_estimator_)
best_rfr.fit(X_train, y_train)
joblib.dump(best_rfr, "model_dumps/rfr_tunedfs_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

This computes for the performance of the trained model using the defined performance metrics.


In [None]:
print('Best params:\n', bo_rfr.best_params_)
print('Best features found:\n', X_train.columns[selected_feat])

print("Test")
y_pred = best_rfr.predict(X_test)
print("RMSE: ", rmse(y_test, y_pred))
print("MAE: ", mae(y_test, y_pred))
print("Positive Error", positive_error(y_test, y_pred))
print("Concealment Ratio", concealment_ratio(y_test, y_pred))

print("Train")
y_pred = best_rfr.predict(X_train)
print("RMSE: ", rmse(y_train, y_pred))
print("MAE: ", mae(y_train, y_pred))
print("Positive Error", positive_error(y_train, y_pred))
print("Concealment Ratio", concealment_ratio(y_train, y_pred))

### Support Vector Machine


#### Bayesian Optimization


This defines the search space configurations to be used for the `Bayesian Optimization` search for `SVM`.


In [None]:
search_space = {
    "kernel": Categorical(["linear", "poly", "rbf", "sigmoid"]),
    "degree": Integer(3, 20),
    "gamma": Categorical(["auto", "scale"]),
    "coef0": Real(0.0, 5.0),
    "tol": Real(0.0001, 0.1),
    "C": Real(0.0001, 1000.0),
    "epsilon": Real(0.05, .5),
    "shrinking": Categorical([False, True]),
    "max_iter": Integer(100, 5000),
}

This trains a support vector machine while also performing hyperparameter tuning.


In [None]:
svr = SVR()
bo_svr = BayesSearchCV(svr, search_space, n_iter=50, n_jobs=-1, cv=cv_folds, random_state=random_state, verbose=2, scoring=concealment_scorer)
bo_svr.fit(X_train, y_train)

joblib.dump(bo_svr, "model_dumps/svr_tuned_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

In [None]:
This computes for the performance of the trained model using the defined performance metrics.

In [None]:
print('Best parameters found:\n', bo_svr.best_params_)

print('Test')
y_pred = bo_svr.predict(X_test)
print("RMSE: ", rmse(y_test, y_pred))
print("MAE: ", mae(y_test, y_pred))
print("Positive Error", positive_error(y_test, y_pred))
print("Concealment Ratio", concealment_ratio(y_test, y_pred))

print('Train')
y_pred = bo_svr.predict(X_train)
print("RMSE: ", rmse(y_train, y_pred))
print("MAE: ", mae(y_train, y_pred))
print("Positive Error", positive_error(y_train, y_pred))
print("Concealment Ratio", concealment_ratio(y_train, y_pred))

### Feature Selection


The cell below illustrates method for training the model with a subset of the data. It selects the subsets of data to train with by select the top K best features determined using `f_regression` statistical analysis. The value of K is treated as a hyperparameter to be determined by the Bayesian Optimization search.


In [None]:
svr = SVR()
pipe = Pipeline([('selector', SelectKBest(f_regression)), ('svr', svr)])

search_space = {
    "selector__k": Integer(X_train.shape[1] // 2, X_train.shape[1] - 1),
    "svr__kernel": Categorical(["linear", "poly", "rbf", "sigmoid"]),
    "svr__degree": Integer(3, 20),
    "svr__gamma": Categorical(["auto", "scale"]),
    "svr__coef0": Real(0.0, 5.0),
    "svr__tol": Real(0.0001, 0.1),
    "svr__C": Real(0.0001, 1000.0),
    "svr__epsilon": Real(0.05, .5),
    "svr__shrinking": Categorical([False, True]),
    "svr__max_iter": Integer(100, 5000),
}

bo_svr = BayesSearchCV(pipe, search_space, n_iter=75, n_jobs=-1, cv=cv_folds, random_state=random_state, scoring=concealment_scorer)
bo_svr.fit(X_train, y_train)
joblib.dump(bo_svr, "model_dumps/svr_tunedfs_bo_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

selected_feat = bo_svr.best_estimator_.named_steps["selector"].get_support()
best_svr = clone_model(bo_svr.best_estimator_)
best_svr.fit(X_train, y_train)
joblib.dump(best_svr, "model_dumps/svr_tunedfs_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

This computes for the performance of the trained model using the defined performance metrics.


In [None]:
print('Best params:\n', bo_svr.best_params_)
print('Best features found:\n', X_train.columns[selected_feat])

print("Test")
y_pred = best_svr.predict(X_test)
print("RMSE: ", rmse(y_test, y_pred))
print("MAE: ", mae(y_test, y_pred))
print("Positive Error", positive_error(y_test, y_pred))
print("Concealment Ratio", concealment_ratio(y_test, y_pred))

print("Train")
y_pred = best_svr.predict(X_train)
print("RMSE: ", rmse(y_train, y_pred))
print("MAE: ", mae(y_train, y_pred))
print("Positive Error", positive_error(y_train, y_pred))
print("Concealment Ratio", concealment_ratio(y_train, y_pred))

### Multilayer Perceptron


#### Bayesian Optimization With No Custom Loss


This defines the search space configurations to be used for the `Bayesian Optimization` search for `MLPRegressor`.


In [None]:
hidden_sz = X_train.shape[1] * 2 // 3 + 1
search_space = {
    "activation": Categorical(["identity", "logistic", "tanh", "relu"]),
    "solver": Categorical(["lbfgs", "adam", "sgd"]),
    "alpha": Real(0.00001, 0.001),
    "learning_rate": Categorical(["constant", "invscaling", "adaptive"]),
    "learning_rate_init": Real(0.0005, 0.005),
    "max_iter": Integer(200, 1000),
    "tol": Real(0.0001, 0.1),
    "momentum": Real(0.75, 0.9),
    "nesterovs_momentum": Categorical([False, True]),
    "early_stopping": Categorical([False, True]),
    "validation_fraction": Real(0.1, 0.15),
    "beta_1": Real(0.75, 0.9),
    "beta_2": Real(0.85, 0.999),
    "epsilon": Real(1e-08, 1e-07),
    "max_fun": Integer(10000, 15000),
}

This trains a multilayer perceptron using the default loss function while also performing hyperparameter tuning.


In [None]:
concealment_scorer = make_scorer(custom_scorer, greater_is_better=True, penalty=1.1578947368421053)
mpr = MLPRegressor(random_state=random_state, hidden_layer_sizes=(hidden_sz, hidden_sz * 2 // 3))
bo_mpr = BayesSearchCV(mpr, search_space, n_iter=50, n_jobs=-1, cv=cv_folds, random_state=random_state, verbose=2, scoring=concealment_scorer)
bo_mpr.fit(X_train, y_train)

joblib.dump(bo_mpr, "model_dumps/no_custom_loss" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

This computes for the performance of the trained model using the defined performance metrics.


In [None]:
print('Best parameters found:\n', bo_mpr.best_params_)

print("Test")
y_pred = bo_mpr.predict(X_test)
print("RMSE", rmse(y_test, y_pred))
print("MAE", mae(y_test, y_pred))
print("Positive Error", positive_error(y_test, y_pred))
print("Concealment Ratio", concealment_ratio(y_test, y_pred))

print("Train")
y_pred = bo_mpr.predict(X_train)
print("RMSE", rmse(y_train, y_pred))
print("MAE", mae(y_train, y_pred))
print("Positive Error", positive_error(y_train, y_pred))
print("Concealment Ratio", concealment_ratio(y_train, y_pred))

### Feature Selection


The cell below illustrates method for training the model with a subset of the data. It selects the subsets of data to train with by select the top K best features determined using `f_regression` statistical analysis. The value of K is treated as a hyperparameter to be determined by the Bayesian Optimization search.


In [None]:
mpr = MLPRegressor(random_state=random_state, hidden_layer_sizes=(hidden_sz, hidden_sz * 2 // 3))
pipe = Pipeline([('selector', SelectKBest(f_regression)), ('mpr', mpr)])

search_space = {
    "selector__k": Integer(X_train.shape[1] // 2, X_train.shape[1] - 1),
    "mpr__activation": Categorical(["identity", "logistic", "tanh", "relu"]),
    "mpr__solver": Categorical(["lbfgs", "adam", "sgd"]),
    "mpr__alpha": Real(0.00001, 0.001),
    "mpr__learning_rate": Categorical(["constant", "invscaling", "adaptive"]),
    "mpr__learning_rate_init": Real(0.0005, 0.005),
    "mpr__max_iter": Integer(200, 1000),
    "mpr__tol": Real(0.0001, 0.1),
    "mpr__momentum": Real(0.75, 0.9),
    "mpr__nesterovs_momentum": Categorical([False, True]),
    "mpr__early_stopping": Categorical([False, True]),
    "mpr__validation_fraction": Real(0.1, 0.15),
    "mpr__beta_1": Real(0.75, 0.9),
    "mpr__beta_2": Real(0.85, 0.999),
    "mpr__epsilon": Real(1e-08, 1e-07),
    "mpr__max_fun": Integer(10000, 15000),
}

bo_mpr = BayesSearchCV(pipe, search_space, n_iter=76, n_jobs=-1, cv=cv_folds, random_state=random_state, verbose=2, scoring=concealment_scorer)
bo_mpr.fit(X_train, y_train)
joblib.dump(bo_mpr, "model_dumps/no_custom_loss_bo_fs" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

selected_feat = bo_mpr.best_estimator_.named_steps["selector"].get_support()
best_mpr = clone_model(bo_mpr.best_estimator_)
best_mpr.fit(X_train, y_train)
joblib.dump(best_mpr, "model_dumps/no_custom_loss_fs" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

This computes for the performance of the trained model using the defined performance metrics.


In [None]:
print('Best params:\n', bo_mpr.best_params_)
print('Best features found:\n', X_train.columns[selected_feat])

print("Test")
y_pred = best_mpr.predict(X_test)
print("RMSE: ", rmse(y_test, y_pred))
print("MAE: ", mae(y_test, y_pred))
print("Positive Error", positive_error(y_test, y_pred))
print("Face Percent", concealment_ratio(y_test, y_pred))

print("Train")
y_pred = best_mpr.predict(X_train)
print("RMSE: ", rmse(y_train, y_pred))
print("MAE: ", mae(y_train, y_pred))
print("Positive Error", positive_error(y_train, y_pred))
print("Face Percent", concealment_ratio(y_train, y_pred))

#### Bayesian Optimization With Custom Loss


This defines the search space configurations to be used for the `Bayesian Optimization` search for `MLPRegressor`.


In [None]:
hidden_sz = X_train.shape[1] * 2 // 3 + 1
search_space = {
    "activation": Categorical(["identity", "logistic", "tanh", "relu"]),
    "solver": Categorical(["lbfgs", "adam", "sgd"]),
    "alpha": Real(0.00001, 0.001),
    "learning_rate": Categorical(["constant", "invscaling", "adaptive"]),
    "learning_rate_init": Real(0.0005, 0.005),
    "max_iter": Integer(200, 1000),
    "tol": Real(0.0001, 0.1),
    "momentum": Real(0.75, 0.9),
    "nesterovs_momentum": Categorical([False, True]),
    "early_stopping": Categorical([False, True]),
    "validation_fraction": Real(0.1, 0.15),
    "beta_1": Real(0.75, 0.9),
    "beta_2": Real(0.85, 0.999),
    "epsilon": Real(1e-08, 1e-07),
    "max_fun": Integer(10000, 15000),
    "loss_alpha": Integer(0, 1000),
}

This defines the custom loss function to be used for the custom MLP regressor. The custom loss function punishes the loss of underpredictions by multiplying the error with a hyperparameter alpha.


In [None]:
def custom_loss(y_true, y_pred, alpha=2):
    squared_error_sum = 0
    for a, b in zip(y_true, y_pred):
        if b < a: #underpredict
            squared_error_sum += ((a - b) ** 2 ) * alpha #penalize
        else: #overpredict or just right
            squared_error_sum += ((a - b) ** 2 )
    
    mse = squared_error_sum / len(y_true)
    
    rmse = np.sqrt(mse)
    
    return rmse

In [None]:
This defines the custom MLP regressor that uses a custom loss function

In [None]:
from sklearn.neural_network._base import DERIVATIVES
from sklearn.utils.extmath import safe_sparse_dot

class CustomMLP(MLPRegressor):
    def __init__(self, loss_alpha=2, hidden_layer_sizes=(100,), activation="relu", *, solver="adam", alpha=0.0001, batch_size="auto", learning_rate="constant", learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=1e-4, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-8, n_iter_no_change=10, max_fun=15000):
        super().__init__(
            hidden_layer_sizes=hidden_layer_sizes,
            activation=activation,
            solver=solver,
            alpha=alpha,
            batch_size=batch_size,
            learning_rate=learning_rate,
            learning_rate_init=learning_rate_init,
            power_t=power_t,
            max_iter=max_iter,
            shuffle=shuffle,
            random_state=random_state,
            tol=tol,
            verbose=verbose,
            warm_start=warm_start,
            momentum=momentum,
            nesterovs_momentum=nesterovs_momentum,
            early_stopping=early_stopping,
            validation_fraction=validation_fraction,
            beta_1=beta_1,
            beta_2=beta_2,
            epsilon=epsilon,
            n_iter_no_change=n_iter_no_change,
            max_fun=max_fun,
        )
        self.loss_alpha = loss_alpha
    
    def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
        n_samples = X.shape[0]

        # Forward propagate
        activations = self._forward_pass(activations)

        # Get loss
        loss_func_name = "custom_loss"
        loss = custom_loss(y, activations[-1], alpha=self.loss_alpha)
        # Add L2 regularization term to loss
        values = 0
        for s in self.coefs_:
            s = s.ravel()
            values += np.dot(s, s)
        loss += (0.5 * self.alpha) * values / n_samples

        # Backward propagate
        last = self.n_layers_ - 2

        # The calculation of delta[last] here works with following
        # combinations of output activation and loss function:
        # sigmoid and binary cross entropy, softmax and categorical cross
        # entropy, and identity with squared loss
        deltas[last] = activations[-1] - y

        # Compute gradient for the last layer
        self._compute_loss_grad(
            last, n_samples, activations, deltas, coef_grads, intercept_grads
        )

        inplace_derivative = DERIVATIVES[self.activation]
        # Iterate over the hidden layers
        for i in range(self.n_layers_ - 2, 0, -1):
            deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T)
            inplace_derivative(activations[i], deltas[i - 1])

            self._compute_loss_grad(
                i - 1, n_samples, activations, deltas, coef_grads, intercept_grads
            )

        return loss, coef_grads, intercept_grads

This trains a custom MLP regression that uses a custom MLP function while also performing hyperparameter tuning.


In [None]:
mpr = CustomMLP(random_state=random_state, hidden_layer_sizes=(hidden_sz, hidden_sz * 2 // 3))
bo_mpr = BayesSearchCV(mpr, search_space, n_iter=50, n_jobs=-1, cv=cv_folds, random_state=random_state, verbose=2, scoring=concealment_scorer)
bo_mpr.fit(X_train, y_train)

joblib.dump(bo_mpr, "model_dumps/mpr_tuned_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

In [None]:
This computes for the performance of the trained model using the defined performance metrics.

In [None]:
print('Best parameters found:\n', bo_mpr.best_params_)

print("Test")
y_pred = bo_mpr.predict(X_test)
print("RMSE", rmse(y_test, y_pred))
print("MAE", mae(y_test, y_pred))
print("Positive Error", positive_error(y_test, y_pred))
print("Concealment Ratio", concealment_ratio(y_test, y_pred))

print("Train")
y_pred = bo_mpr.predict(X_train)
print("RMSE", rmse(y_train, y_pred))
print("MAE", mae(y_train, y_pred))
print("Positive Error", positive_error(y_train, y_pred))
print("Concealment Ratio", concealment_ratio(y_train, y_pred))

### Feature Selection


The cell below illustrates method for training the model with a subset of the data. It selects the subsets of data to train with by select the top K best features determined using `f_regression` statistical analysis. The value of K is treated as a hyperparameter to be determined by the Bayesian Optimization search.


In [None]:
mpr = CustomMLP(random_state=random_state, hidden_layer_sizes=(hidden_sz, hidden_sz * 2 // 3))
pipe = Pipeline([('selector', SelectKBest(f_regression)), ('mpr', mpr)])

search_space = {
    "selector__k": Integer(X_train.shape[1] // 2, X_train.shape[1] - 1),
    "mpr__activation": Categorical(["identity", "logistic", "tanh", "relu"]),
    "mpr__solver": Categorical(["lbfgs", "adam", "sgd"]),
    "mpr__alpha": Real(0.00001, 0.001),
    "mpr__learning_rate": Categorical(["constant", "invscaling", "adaptive"]),
    "mpr__learning_rate_init": Real(0.0005, 0.005),
    "mpr__max_iter": Integer(200, 1000),
    "mpr__tol": Real(0.0001, 0.1),
    "mpr__momentum": Real(0.75, 0.9),
    "mpr__nesterovs_momentum": Categorical([False, True]),
    "mpr__early_stopping": Categorical([False, True]),
    "mpr__validation_fraction": Real(0.1, 0.15),
    "mpr__beta_1": Real(0.75, 0.9),
    "mpr__beta_2": Real(0.85, 0.999),
    "mpr__epsilon": Real(1e-08, 1e-07),
    "mpr__max_fun": Integer(10000, 15000),
    "mpr__loss_alpha": Integer(0, 1000),
}

bo_mpr = BayesSearchCV(pipe, search_space, n_iter=76, n_jobs=-1, cv=cv_folds, random_state=random_state, verbose=2, scoring=concealment_scorer)
bo_mpr.fit(X_train, y_train)
joblib.dump(bo_mpr, "model_dumps/mpr_tunedfs_bo_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

selected_feat = bo_mpr.best_estimator_.named_steps["selector"].get_support()
best_mpr = clone_model(bo_mpr.best_estimator_)
best_mpr.fit(X_train, y_train)
joblib.dump(best_mpr, "model_dumps/mpr_tunedfs_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

This computes for the performance of the trained model using the defined performance metrics.


In [None]:
print('Best params:\n', bo_mpr.best_params_)
print('Best features found:\n', X_train.columns[selected_feat])

print("Test")
y_pred = best_mpr.predict(X_test)
print("RMSE: ", rmse(y_test, y_pred))
print("MAE: ", mae(y_test, y_pred))
print("Positive Error", positive_error(y_test, y_pred))
print("Face Percent", concealment_ratio(y_test, y_pred))

print("Train")
y_pred = best_mpr.predict(X_train)
print("RMSE: ", rmse(y_train, y_pred))
print("MAE: ", mae(y_train, y_pred))
print("Positive Error", positive_error(y_train, y_pred))
print("Face Percent", concealment_ratio(y_train, y_pred))

#


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=ea4f5838-df37-44b8-866b-b9dc1b4dd46c' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>
