In [None]:
import numpy as np
import pandas as pd
import os
import joblib

In [None]:
random_state = 100
cv_folds = 5

if not os.path.isdir("model_dumps"):
    os.makedirs("model_dumps")

## Data Loading

Set the filename here

In [None]:
CSV_FILENAME = "ths-st3 compiled dataset.csv" #<-- update csv name
df_features = pd.read_csv(CSV_FILENAME)
df_features = df_features[df_features["used_mask"] == True]
df_features.head()

```
df_features = df_features.drop(columns = ['Unnamed: 0', 'path', 'source_w', 'source_h', 'face_index'])
df_features = df_features.loc[df_features["e_bbox_yf"] < 3, :]
print(df_features.columns)
```

Change string types to numeric types

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [None]:
encoder = LabelEncoder()
categorical_columns = df_features.select_dtypes(include=[bool, object]).columns
encoded_columns = df_features[categorical_columns].apply(encoder.fit_transform)
encoded_columns

In [None]:
df_encoded_features = df_features.copy()
df_encoded_features[categorical_columns] = encoded_columns
df_encoded_features

Split into X and Y

In [None]:
feat_regions = ["bbox", "mask"]
non_color_features = ["w", "h", "x", "y", "obj_score", "class_score"]

color_channels = {
    "RGB": ("R_BIN_", "G_BIN_", "B_BIN_"),
    "HSV": ("H_HSV_BIN_", "S_HSV_BIN_", "V_HSV_BIN_"),
    "HSL": ("H_HSL_BIN_", "S_HSL_BIN_", "L_HSL_BIN_"),
    "LAB": ("L_LAB_BIN_", "A_LAB_BIN_", "B_LAB_BIN_"),
    "YCBCR": ("Y_BIN_", "CR_BIN_", "CB_BIN_"),
}

label_regions = ["lbbox", "bbox", "face"]

In [None]:
def get_features_and_label(color_space, region, label_model="yf", label_region=None):
    if label_region is None:
        label_region = region if region != "mask" else "face"
        
    features = list(non_color_features)
    for color_channel in color_channels[color_space]: 
        features += [color_channel + region + "_" + str(i) for i in range(26)]
    features += ["LBP_BIN_" + region + "_" + str(i) for i in range(26)]
    features += ["SOBELX_BIN_" + region + "_" + str(i) for i in range(20)]
    features += ["SOBELY_BIN_" + region + "_" + str(i) for i in range(20)]
    features += ["SOBEL_BIN_" + region + "_" + str(i) for i in range(20)]
    
    return features, "e_" + label_region + "_" + label_model

In [None]:
# ASSIGNED_COLOR_SPACE = "HSV" #<-- pick a colorspace
# LABEL_MODEL = "yf"

In [None]:
# features, label = get_features_and_label(ASSIGNED_COLOR_SPACE, "mask")
# X_features_mask =  df_encoded_features.loc[:,  features]
# y_features_mask = df_encoded_features.loc[:, label].values  #<-- pick label

# features, label = get_features_and_label(ASSIGNED_COLOR_SPACE, "bbox")
# X_features_bbox =  df_encoded_features.loc[:,  features]
# y_features_bbox = df_encoded_features.loc[:, label].values  #<-- pick label

In [None]:
# X_train_mask, X_test_mask, y_train_mask, y_test_mask = train_test_split(X_features_mask, y_features_mask, test_size = 0.2, random_state=random_state)

# print("Split shapes")
# print("X_train: ", X_train_mask.shape)
# print("y_train: ", y_train_mask.shape)
# print("X_test: ", X_test_mask.shape)
# print("y_test: ", y_test_mask.shape)

In [None]:
# X_train_bbox, X_test_bbox, y_train_bbox, y_test_bbox = train_test_split(X_features_bbox, y_features_bbox, test_size = 0.2, random_state=random_state)

# print("Split shapes")
# print("X_train: ", X_train_bbox.shape)
# print("y_train: ", y_train_bbox.shape)
# print("X_test: ", X_test_bbox.shape)
# print("y_test: ", y_test_bbox.shape)

## Base Models

### Model Evaluation Functions

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import make_scorer

In [None]:
def rmse(actual, predictions):
    return np.sqrt(np.mean(np.square(predictions - actual)))

In [None]:
def mae(actual, predictions):
    return np.mean(np.abs(predictions - actual))

In [None]:
def positive_error(actual, pred):
    total = 0
    count = 0
    for a,b in zip(pred, actual):
        if a > b:
            total += a - b
            count += 1
    return total / count

In [None]:
def concealment_ratio(actual, pred):
    count = 0
    for a, b in zip(pred, actual):
        if a >= b:
            count+= 1
            
    return count / len(actual)

In [None]:
def custom_scorer(y_true, y_pred):
    squared_error_sum = 0
    for a, b in zip(y_true, y_pred):
        if b < a: #underpredict
            squared_error_sum += ((a - b) ** 2 ) * 5 #penalize
        else: #overpredict or just right
            squared_error_sum += ((a - b) ** 2 )
    
    mse = squared_error_sum / len(y_true)
    
    rmse = np.sqrt(mse)
    
    return rmse
concealment_scorer = make_scorer(custom_scorer, greater_is_better=False)

### Run All Base Models with CV

In [None]:
# from sklearn.ensemble import RandomForestRegressor
# from sklearn.svm import SVR
# from sklearn.neural_network import MLPRegressor
# from sklearn.model_selection import cross_validate

# models = [(RandomForestRegressor, {"random_state": random_state}), (SVR, {}), (MLPRegressor, {"random_state": random_state})]
# label_models = ["yn", "yf", "mp"]

# for cur_label_model in label_models:
#     feature_set_results = pd.DataFrame()
#     for cur_color in color_channels.keys():
#         for cur_region in feat_regions:
#             features, label = get_features_and_label(cur_color, cur_region, label_model=cur_label_model, label_region="lbbox")
#             X = df_encoded_features.loc[:,  features]
#             y = df_encoded_features.loc[:, label].values
#             for model, params in models:
#                 row = {}
#                 model = model(**params)
#                 scores = cross_validate(model, X, y, cv=cv_folds, scoring=('r2', 'neg_root_mean_squared_error', 'neg_mean_absolute_error'), return_train_score=True)
#                 row["color_space"] = cur_color
#                 row["extract_region"] = cur_region
#                 row["perturb_region"] = label
#                 row["model"] = type(model).__name__
#                 print(cur_color, cur_region, label, type(model).__name__)
#                 test_names = ["test_r2", "test_neg_root_mean_squared_error", "test_neg_mean_absolute_error", "train_r2", "train_neg_root_mean_squared_error", "train_neg_mean_absolute_error"]
#                 for test_name in test_names:
#                     row[test_name + "_ave"] = np.mean(scores[test_name])
#                     row[test_name + "_std"] = np.std(scores[test_name])
#                     print('\tave', test_name, row[test_name + "_ave"])
#                     print('\tstd', test_name, row[test_name + "_std"])
#                     for it, val in enumerate(scores[test_name]):
#                         row[test_name + "_" + str(it)] = val
#                 feature_set_results = pd.concat([feature_set_results, pd.DataFrame(row, index=[0])], ignore_index=True)

#     feature_set_results.to_csv(os.path.join(os.getcwd(), "faceseg-outs", cur_label_model + "_feature_set_results_" + LABEL_MODEL + ".csv"))
# raise Exception("STOP")

### Random Forest

Train RF model

filename = "model_dumps/rfr_base_mask_" + ASSIGNED_COLOR_SPACE + ".pkl"

if (os.path.isfile(filename)):
    rfr = joblib.load(filename) 
else:
    rfr = RandomForestRegressor(random_state = random_state)
    rfr.fit(X_train_mask, y_train_mask)
    joblib.dump(rfr, filename) 
    
rfr_pred = rfr.predict(X_test_mask)
print("RMSE:", rmse(rfr_pred, y_test_mask))
print("MAE:", mae(rfr_pred, y_test_mask))

filename = "model_dumps/rfr_base_bbox_" + ASSIGNED_COLOR_SPACE + ".pkl"

if (os.path.isfile(filename)):
    rfr = joblib.load(filename) 
else:
    rfr = RandomForestRegressor(random_state = random_state)
    rfr.fit(X_train_bbox, y_train_bbox)
    joblib.dump(rfr, filename) 
    
rfr_pred = rfr.predict(X_test_bbox)
print("RMSE:", rmse(rfr_pred, y_test_bbox))
print("MAE:", mae(rfr_pred, y_test_bbox))

### Support Vector Machine

filename = "model_dumps/svr_base_mask_" + ASSIGNED_COLOR_SPACE + ".pkl"
if (os.path.isfile(filename)):
    svr = joblib.load(filename) 
else:
    svr = SVR()
    svr.fit(X_train_mask, y_train_mask)
    joblib.dump(svr, filename) 
    
svr_pred = svr.predict(X_test_mask)
print("RMSE:", rmse(svr_pred, y_test_mask))
print("MAE:", mae(svr_pred, y_test_mask))

filename = "model_dumps/svr_base_bbox_" + ASSIGNED_COLOR_SPACE + ".pkl"

if (os.path.isfile(filename)):
    svr = joblib.load(filename) 
else:
    svr = SVR()
    svr.fit(X_train_bbox, y_train_bbox)
    joblib.dump(svr, filename)
    
svr_pred = svr.predict(X_test_bbox)
print("RMSE:", rmse(svr_pred, y_test_bbox))
print("MAE:", mae(svr_pred, y_test_bbox))

### Multilayer Perceptrons



filename = "model_dumps/mpr_base_mask_" + ASSIGNED_COLOR_SPACE + ".pkl"

if (os.path.isfile(filename)):
    mpr = joblib.load(filename) 
else:
    mpr = MLPRegressor(random_state = random_state)
    mpr.fit(X_train_mask, y_train_mask)
    joblib.dump(mpr, filename) 
    
mpr_pred = mpr.predict(X_test_mask)
print("RMSE:", rmse(mpr_pred, y_test_mask))
print("MAE:", mae(mpr_pred, y_test_mask))

filename = "model_dumps/mpr_base_bbox_e" + ASSIGNED_COLOR_SPACE + ".pkl"

if (os.path.isfile(filename)):
    mpr = joblib.load(filename) 
else:
    mpr = MLPRegressor(random_state = random_state)
    mpr.fit(X_train_bbox, y_train_bbox)
    joblib.dump(mpr, filename) 

mpr_pred = mpr.predict(X_test_bbox)
print("RMSE:", rmse(mpr_pred, y_test_bbox))
print("MAE:", mae(mpr_pred, y_test_bbox))

___

raise Exception("STOP HERE")

## Feature selection and Hyperparameter tuning



In [None]:
CHOSEN_COLOR_SPACE = "HSV" #<-- pick a colorspace
CHOSEN_REGION = "bbox" #lbbox next

features, _ = get_features_and_label(CHOSEN_COLOR_SPACE, CHOSEN_REGION)
label = "e_face_yf" #"e_bbox_mp"  #<-- pick label

X_features =  df_encoded_features.loc[:,  features]
y_features = df_encoded_features.loc[:, label].values

X_train, X_test, y_train, y_test = train_test_split(X_features, y_features, test_size = 0.2, random_state=random_state)

In [None]:
# from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.base import clone as clone_model

### Random Forest

**Grid Search**

param_space = [
    {
        "n_estimators": list(range(100, 351, 50)),
        "criterion": ["squared_error", "absolute_error", "friedman_mse", "poisson"],
        "max_depth": [None, 1, 10, 30, 80, 150],
        "min_samples_split": [2, 4, 8, 16, 32],
        "min_samples_leaf": [1, 5, 10, 20],
        "max_features": [None, "sqrt", "log2", 0.25, 0.5, 0.75],
        "max_leaf_nodes": [None, 50, 100, 300],
        "min_impurity_decrease": [0.0, 0.5],
        "bootstrap": [False, True],
        "ccp_alpha": [0.0, 0.5, 1.0],
    }
]

rfr = RandomForestRegressor()
gs_rfr = GridSearchCV(rfr, param_space, n_jobs=-1, cv=cv_folds, scoring=concealmnt_ratio)
gs_rfr.fit(X_train, y_train)

print('Best parameters found:\n', gs_svr.best_params_)
y_pred = gs_rfr.predict(X_val)
print("Best accuracy: ", rmse(y_val, y_pred))
print(classification_report(y_val, y_pred))

**Bayesian Optimization**

In [None]:
# search_space = {
#     "n_estimators": Integer(100, 350),
#     "criterion": Categorical(["squared_error", "absolute_error", "friedman_mse", "poisson"]),
#     "max_depth": Integer(1, 300),
#     "min_samples_split": Integer(2, 32),
#     "min_samples_leaf": Integer(1, 20),
#     "max_features": Categorical([None, "sqrt", "log2", 0.25, 0.5, 0.75]),
#     "max_leaf_nodes": Integer(50, 300),
#     "min_impurity_decrease": Real(0.0, 2.0),
#     "bootstrap": Categorical([False, True]),
#     "ccp_alpha": Real(0.0, 2.0),
# }

In [None]:
# rfr = RandomForestRegressor()
# bo_rfr = BayesSearchCV(rfr, search_space, n_iter=50, n_jobs=-1, cv=cv_folds, random_state=random_state, verbose=2, scoring=concealment_scorer)
# bo_rfr.fit(X_train, y_train)

# joblib.dump(bo_rfr, "model_dumps/rfr_tuned_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

In [None]:
# print('Best parameters found:\n', bo_rfr.best_params_)

# print('Test')
# y_pred = bo_rfr.predict(X_test)
# print("RMSE: ", rmse(y_test, y_pred))
# print("MAE: ", mae(y_test, y_pred))
# print("Positive Error", positive_error(y_test, y_pred))
# print("Concealment Ratio", concealment_ratio(y_test, y_pred))

# print('Train')
# y_pred = bo_rfr.predict(X_train)
# print("RMSE: ", rmse(y_train, y_pred))
# print("MAE: ", mae(y_train, y_pred))
# print("Positive Error", positive_error(y_train, y_pred))
# print("Concealment Ratio", concealment_ratio(y_train, y_pred))

### Feature Selection

In [None]:
# rfr = RandomForestRegressor()
# pipe = Pipeline([('selector', SelectKBest(f_regression)), ('rfr', rfr)])

# search_space = {
#     "selector__k": Integer(X_train.shape[1] // 2, X_train.shape[1] - 1),
#     "rfr__n_estimators": Integer(100, 350),
#     "rfr__criterion": Categorical(["squared_error", "absolute_error", "friedman_mse", "poisson"]),
#     "rfr__max_depth": Integer(1, 300),
#     "rfr__min_samples_split": Integer(2, 32),
#     "rfr__min_samples_leaf": Integer(1, 20),
#     "rfr__max_features": Categorical([None, "sqrt", "log2", 0.25, 0.5, 0.75]),
#     "rfr__max_leaf_nodes": Integer(50, 300),
#     "rfr__min_impurity_decrease": Real(0.0, 2.0),
#     "rfr__bootstrap": Categorical([False, True]),
#     "rfr__ccp_alpha": Real(0.0, 2.0),
# }

# bo_rfr = BayesSearchCV(pipe, search_space, n_iter=75, n_jobs=-1, cv=cv_folds, random_state=random_state, scoring=concealment_scorer)
# bo_rfr.fit(X_train, y_train)
# joblib.dump(bo_rfr, "model_dumps/rfr_tunedfs_bo_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

# selected_feat = bo_rfr.best_estimator_.named_steps["selector"].get_support()
# best_rfr = clone_model(bo_rfr.best_estimator_)
# best_rfr.fit(X_train, y_train)
# joblib.dump(best_rfr, "model_dumps/rfr_tunedfs_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

In [None]:
# print('Best params:\n', bo_rfr.best_params_)
# print('Best features found:\n', X_train.columns[selected_feat])

# print("Test")
# y_pred = best_rfr.predict(X_test)
# print("RMSE: ", rmse(y_test, y_pred))
# print("MAE: ", mae(y_test, y_pred))
# print("Positive Error", positive_error(y_test, y_pred))
# print("Concealment Ratio", concealment_ratio(y_test, y_pred))

# print("Train")
# y_pred = best_rfr.predict(X_train)
# print("RMSE: ", rmse(y_train, y_pred))
# print("MAE: ", mae(y_train, y_pred))
# print("Positive Error", positive_error(y_train, y_pred))
# print("Concealment Ratio", concealment_ratio(y_train, y_pred))

### Support Vector Machine

**Grid Search**

```
param_space = [
    {
        "kernel": "poly",
        "degree": [3, 5, 10, 15, 20],
        "gamma": ["auto", "scale"],
        "coef0": [0.0, 2.5, 5.0],
        "tol": [0.0001, 0.001, 0.01, 0.1],
        "C": [0.0001, 0.01, 1.0, 100.0, 1000.0],
        "epsilon": [0.05, 0.1, .5],
        "shrinking": [False, True],
        "max_iter": [-1, 100, 500, 1000],
    },
    {
        "kernel": "rbf",
        "gamma": ["auto", "scale"],
        "tol": [0.0001, 0.001, 0.01, 0.1],
        "C": [0.0001, 0.01, 1.0, 100.0, 1000.0],
        "epsilon": [0.05, 0.1, .5],
        "shrinking": [False, True],
        "max_iter": [-1, 100, 500, 1000],
    },
    {
        "kernel": "sigmoid",
        "gamma": ["auto", "scale"],
        "coef0": [0.0, 2.5, 5.0],
        "tol": [0.0001, 0.001, 0.01, 0.1],
        "C": [0.0001, 0.01, 1.0, 100.0, 1000.0],
        "epsilon": [0.05, 0.1, .5],
        "shrinking": [False, True],
        "max_iter": [-1, 100, 500, 1000],
    },
    {
        "kernel": ["linear"],
        "tol": [0.0001, 0.001, 0.01, 0.1],
        "C": [0.0001, 0.01, 1.0, 100.0, 1000.0],
        "epsilon": [0.05, 0.1, .5],
        "shrinking": [False, True],
        "max_iter": [-1, 100, 500, 1000],
    }
]
```

```
svr = SVR()
gs_svr = GridSearchCV(svr, param_space, n_jobs=-1, cv=cv_folds)
gs_svr.fit(X_train, y_train)
```

```
print('Best parameters found:\n', gs_svr.best_params_)
y_pred = gs_svr.predict(X_val)
print("Best accuracy: ", rmse(y_val, y_pred))
print(classification_report(y_val, y_pred))
```

**Bayesian Optimization**

In [None]:
search_space = {
    "kernel": Categorical(["linear", "poly", "rbf", "sigmoid"]),
    "degree": Integer(3, 20),
    "gamma": Categorical(["auto", "scale"]),
    "coef0": Real(0.0, 5.0),
    "tol": Real(0.0001, 0.1),
    "C": Real(0.0001, 1000.0),
    "epsilon": Real(0.05, .5),
    "shrinking": Categorical([False, True]),
    "max_iter": Integer(100, 5000),
}

In [None]:
svr = SVR()
bo_svr = BayesSearchCV(svr, search_space, n_iter=50, n_jobs=-1, cv=cv_folds, random_state=random_state, verbose=2, scoring=concealment_scorer)
bo_svr.fit(X_train, y_train)

joblib.dump(bo_svr, "model_dumps/svr_tuned_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

In [None]:
print('Best parameters found:\n', bo_svr.best_params_)

print('Test')
y_pred = bo_svr.predict(X_test)
print("RMSE: ", rmse(y_test, y_pred))
print("MAE: ", mae(y_test, y_pred))
print("Positive Error", positive_error(y_test, y_pred))
print("Concealment Ratio", concealment_ratio(y_test, y_pred))

print('Train')
y_pred = bo_svr.predict(X_train)
print("RMSE: ", rmse(y_train, y_pred))
print("MAE: ", mae(y_train, y_pred))
print("Positive Error", positive_error(y_train, y_pred))
print("Concealment Ratio", concealment_ratio(y_train, y_pred))

### Feature Selection

In [None]:
svr = SVR()
pipe = Pipeline([('selector', SelectKBest(f_regression)), ('svr', svr)])

search_space = {
    "selector__k": Integer(X_train.shape[1] // 2, X_train.shape[1] - 1),
    "svr__kernel": Categorical(["linear", "poly", "rbf", "sigmoid"]),
    "svr__degree": Integer(3, 20),
    "svr__gamma": Categorical(["auto", "scale"]),
    "svr__coef0": Real(0.0, 5.0),
    "svr__tol": Real(0.0001, 0.1),
    "svr__C": Real(0.0001, 1000.0),
    "svr__epsilon": Real(0.05, .5),
    "svr__shrinking": Categorical([False, True]),
    "svr__max_iter": Integer(100, 5000),
}

bo_svr = BayesSearchCV(pipe, search_space, n_iter=75, n_jobs=-1, cv=cv_folds, random_state=random_state, scoring=concealment_scorer)
bo_svr.fit(X_train, y_train)
joblib.dump(bo_svr, "model_dumps/svr_tunedfs_bo_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

selected_feat = bo_svr.best_estimator_.named_steps["selector"].get_support()
best_svr = clone_model(bo_svr.best_estimator_)
best_svr.fit(X_train, y_train)
joblib.dump(best_svr, "model_dumps/svr_tunedfs_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

In [None]:
print('Best params:\n', bo_svr.best_params_)
print('Best features found:\n', X_train.columns[selected_feat])

print("Test")
y_pred = best_svr.predict(X_test)
print("RMSE: ", rmse(y_test, y_pred))
print("MAE: ", mae(y_test, y_pred))
print("Positive Error", positive_error(y_test, y_pred))
print("Concealment Ratio", concealment_ratio(y_test, y_pred))

print("Train")
y_pred = best_svr.predict(X_train)
print("RMSE: ", rmse(y_train, y_pred))
print("MAE: ", mae(y_train, y_pred))
print("Positive Error", positive_error(y_train, y_pred))
print("Concealment Ratio", concealment_ratio(y_train, y_pred))

### Multilayer Perceptron

**Grid Search**

param_space = [
    {
        "solver": "lbfgs",
        "hidden_layer_sizes": [(100,), (50, 50,), (50, 25, 25,)], # pick better ones
        "activation": ["identity", "logistic", "tanh", "relu"],
        "alpha": [0.00001, 0.0001, 0.001],
        "max_iter": [200, 500, 1000],
        "random_state": random_state,
        "tol": [0.0001, 0.001, 0.01, 0.1],
        "max_fun": [10000, 15000],
    },
    {
        "solver": "adam",
        "hidden_layer_sizes": [(100,), (50, 50,), (50, 25, 25,)], # pick better ones
        "activation": ["identity", "logistic", "tanh", "relu"],
        "alpha": [0.00001, 0.0001, 0.001],
        "max_iter": [200, 500, 1000],
        "random_state": random_state,
        "tol": [0.0001, 0.001, 0.01, 0.1],
        "batch_size": ['auto', n_samples // 5, n_samples // 10],
        "learning_rate_init": [0.0005, 0.001, 0.005],
        "shuffle": [False, True],
        "early_stopping": [False, True],
        "validation_fraction": [0.1, 0.15],
        "n_iter_no_change": [10, 15],
        # not sure abt these values, should we be changing these?
        "beta_1": [0.75, 0.9],
        "beta_2": [0.85, 0.999],
        "epsilon": [1e-07, 1e-08],
    },
    {
        "solver": "sgd",
        "hidden_layer_sizes": [(100,), (50, 50,), (50, 25, 25,)], # pick better ones
        "activation": ["identity", "logistic", "tanh", "relu"],
        "alpha": [0.00001, 0.0001, 0.001],
        "max_iter": [200, 500, 1000],
        "random_state": random_state,
        "tol": [0.0001, 0.001, 0.01, 0.1],
        "batch_size": ['auto', n_samples // 5, n_samples // 10],
        "learning_rate_init": [0.0005, 0.001, 0.005],
        "shuffle": [False, True],
        "early_stopping": [False, True],
        "validation_fraction": [0.1, 0.15],
        "n_iter_no_change": [10, 15],
        "learning_rate": ["constant", "invscaling", "adaptive"],
        "momentum": [0.75, 0.9],
        "nesterovs_momentum": [False, True],
    }
]

```
mpr = MLPRegressor()
gs_mpr = GridSearchCV(mpr, param_space, n_jobs=-1, cv=cv_folds)
gs_mpr.fit(X_train, y_train)
```

```
print('Best parameters found:\n', gs_mpr.best_params_)
y_pred = gs_mpr.predict(X_val)
print("Best accuracy: ", rmse(y_val, y_pred))
print(classification_report(y_val, y_pred))
```

**Bayesian Optimization**

In [None]:
hidden_sz = X_train.shape[1] * 2 // 3 + 1
search_space = {
    "activation": Categorical(["identity", "logistic", "tanh", "relu"]),
    "solver": Categorical(["lbfgs", "adam", "sgd"]),
    "alpha": Real(0.00001, 0.001),
    "learning_rate": Categorical(["constant", "invscaling", "adaptive"]),
    "learning_rate_init": Real(0.0005, 0.005),
    "max_iter": Integer(200, 1000),
    "shuffle": Categorical([False, True]),
    "tol": Real(0.0001, 0.1),
    "momentum": Real(0.75, 0.9),
    "nesterovs_momentum": Categorical([False, True]),
    "early_stopping": Categorical([False, True]),
    "validation_fraction": Real(0.1, 0.15),
    "beta_1": Real(0.75, 0.9),
    "beta_2": Real(0.85, 0.999),
    "epsilon": Real(1e-08, 1e-07),
    "n_iter_no_change": Integer(10, 15),
    "max_fun": Integer(10000, 15000),
}

In [None]:
#"""
def custom_loss(y_true, y_pred):
    squared_error_sum = 0
    for a, b in zip(y_true, y_pred):
        if b < a: #underpredict
            squared_error_sum += ((a - b) ** 2 ) * 2 #penalize
        else: #overpredict or just right
            squared_error_sum += ((a - b) ** 2 )
    
    mse = squared_error_sum / len(y_true)
    
    rmse = np.sqrt(mse)
    
    return rmse
"""
def custom_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2 * (np.sign(y_true - y_pred) + 1) ** 2)
#"""

In [None]:
from sklearn.neural_network._base import DERIVATIVES
from sklearn.utils.extmath import safe_sparse_dot

class CustomMLP(MLPRegressor):
    def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
        n_samples = X.shape[0]

        # Forward propagate
        activations = self._forward_pass(activations)

        # Get loss
        loss_func_name = "custom_loss"
        loss = custom_loss(y, activations[-1])
        """
        loss_func_name = self.loss
        if loss_func_name == "log_loss" and self.out_activation_ == "logistic":
            loss_func_name = "binary_log_loss"
        loss = LOSS_FUNCTIONS[loss_func_name](y, activations[-1])
        """
        # Add L2 regularization term to loss
        values = 0
        for s in self.coefs_:
            s = s.ravel()
            values += np.dot(s, s)
        loss += (0.5 * self.alpha) * values / n_samples

        # Backward propagate
        last = self.n_layers_ - 2

        # The calculation of delta[last] here works with following
        # combinations of output activation and loss function:
        # sigmoid and binary cross entropy, softmax and categorical cross
        # entropy, and identity with squared loss
        deltas[last] = activations[-1] - y

        # Compute gradient for the last layer
        self._compute_loss_grad(
            last, n_samples, activations, deltas, coef_grads, intercept_grads
        )

        inplace_derivative = DERIVATIVES[self.activation]
        # Iterate over the hidden layers
        for i in range(self.n_layers_ - 2, 0, -1):
            deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T)
            inplace_derivative(activations[i], deltas[i - 1])

            self._compute_loss_grad(
                i - 1, n_samples, activations, deltas, coef_grads, intercept_grads
            )

        return loss, coef_grads, intercept_grads

In [None]:
mpr = CustomMLP(random_state=random_state, hidden_layer_sizes=(hidden_sz, hidden_sz * 2 // 3))
bo_mpr = BayesSearchCV(mpr, search_space, n_iter=50, n_jobs=-1, cv=cv_folds, random_state=random_state, verbose=2, scoring=concealment_scorer)
bo_mpr.fit(X_train, y_train)

joblib.dump(bo_mpr, "model_dumps/mpr_tuned_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

In [None]:
# print('Best parameters found:\n', bo_mpr.best_params_)

print("Test")
y_pred = bo_mpr.predict(X_test)
print("RMSE", rmse(y_test, y_pred))
print("MAE", mae(y_test, y_pred))
print("Positive Error", positive_error(y_test, y_pred))
print("Concealment Ratio", concealment_ratio(y_test, y_pred))

print("Train")
y_pred = bo_mpr.predict(X_train)
print("RMSE", rmse(y_train, y_pred))
print("MAE", mae(y_train, y_pred))
print("Positive Error", positive_error(y_train, y_pred))
print("Concealment Ratio", concealment_ratio(y_train, y_pred))

### Feature Selection

In [None]:
mpr = CustomMLP(random_state=random_state, hidden_layer_sizes=(hidden_sz, hidden_sz * 2 // 3))
pipe = Pipeline([('selector', SelectKBest(f_regression)), ('mpr', mpr)])

search_space = {
    "selector__k": Integer(X_train.shape[1] // 2, X_train.shape[1] - 1),
    "mpr__activation": Categorical(["identity", "logistic", "tanh", "relu"]),
    "mpr__solver": Categorical(["lbfgs", "adam", "sgd"]),
    "mpr__alpha": Real(0.00001, 0.001),
    "mpr__learning_rate": Categorical(["constant", "invscaling", "adaptive"]),
    "mpr__learning_rate_init": Real(0.0005, 0.005),
    "mpr__max_iter": Integer(200, 1000),
    "mpr__shuffle": Categorical([False, True]),
    "mpr__tol": Real(0.0001, 0.1),
    "mpr__momentum": Real(0.75, 0.9),
    "mpr__nesterovs_momentum": Categorical([False, True]),
    "mpr__early_stopping": Categorical([False, True]),
    "mpr__validation_fraction": Real(0.1, 0.15),
    "mpr__beta_1": Real(0.75, 0.9),
    "mpr__beta_2": Real(0.85, 0.999),
    "mpr__epsilon": Real(1e-08, 1e-07),
    "mpr__n_iter_no_change": Integer(10, 15),
    "mpr__max_fun": Integer(10000, 15000),
}

bo_mpr = BayesSearchCV(pipe, search_space, n_iter=76, n_jobs=-1, cv=cv_folds, random_state=random_state, scoring=concealment_scorer)
bo_mpr.fit(X_train, y_train)
joblib.dump(bo_mpr, "model_dumps/mpr_tunedfs_bo_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

selected_feat = bo_mpr.best_estimator_.named_steps["selector"].get_support()
best_mpr = clone_model(bo_mpr.best_estimator_)
best_mpr.fit(X_train, y_train)
joblib.dump(best_mpr, "model_dumps/mpr_tunedfs_" + CHOSEN_COLOR_SPACE + "_" + CHOSEN_REGION + "_" + label + ".pkl") 

In [None]:
print('Best params:\n', bo_mpr.best_params_)
print('Best features found:\n', X_train.columns[selected_feat])

print("Test")
y_pred = best_mpr.predict(X_test)
print("RMSE: ", rmse(y_test, y_pred))
print("MAE: ", mae(y_test, y_pred))
print("Positive Error", positive_error(y_test, y_pred))
print("Face Percent", concealment_ratio(y_test, y_pred))

print("Train")
y_pred = best_mpr.predict(X_train)
print("RMSE: ", rmse(y_train, y_pred))
print("MAE: ", mae(y_train, y_pred))
print("Positive Error", positive_error(y_train, y_pred))
print("Face Percent", concealment_ratio(y_train, y_pred))

### Hidden Layer Sizes Experimentation

mlp_params = dict([('activation', 'relu'), ('alpha', 0.0006077414104241369), ('beta_1', 0.75), ('beta_2', 0.9174938922030286), ('early_stopping', True), ('epsilon', 1e-08), ('learning_rate', 'invscaling'), ('learning_rate_init', 0.0005), ('max_fun', 10000), ('max_iter', 1000), ('momentum', 0.75), ('n_iter_no_change', 15), ('nesterovs_momentum', True), ('shuffle', False), ('solver', 'adam'), ('tol', 0.0001), ('validation_fraction', 0.15)])

In [None]:
"""new_mlp = MLPRegressor(random_state=random_state, **mlp_params)

search_space = {
    "selector__k": Integer(1, X_train.shape[1] - 1),
    "layers__sz0": Integer(1, X_train.shape[1] * 2 // 3),
    "layers__sz1": Integer(1, X_train.shape[1] * 4 // 9),
    "layers__sz2": Integer(1, X_train.shape[1] * 8 // 27),
    "layers__sz3": Integer(1, X_train.shape[1] * 16 // 81),
    "layers__d": Integer(1, 4),
}

from sklearn.base import BaseEstimator, TransformerMixin
class Layers(BaseEstimator, TransformerMixin):
    def __init__(self, d=4, *args, **kwargs):
        for i in range(d):
            if i < len(args):
                setattr(self, "sz" + str(i), args[i])
            else:
                setattr(self, "sz" + str(i), 1)
                
        self.set_params(**kwargs)
        self.d = d
    
    def get_params(self, deep=False):
        sizes = {"sz" + str(i): getattr(self, "sz" + str(i)) for i in range(self.d)}
        sizes["d"] = self.d
        return sizes
    
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
    
    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        global new_mlp
        new_mlp.set_params(hidden_layer_sizes=tuple(getattr(self, "sz" + str(i)) for i in range(self.d)))
        return X
    
pipe = Pipeline([('selector', SelectKBest(f_regression)), ('layers', Layers()), ('mpr', new_mlp)])
search = BayesSearchCV(pipe, search_space, n_iter=50, n_jobs=-1, cv=cv_folds, random_state=random_state)"""

new_mlp = MLPRegressor(random_state=random_state, **mlp_params)

search_space = {
    "selector__k": Integer(X_train.shape[1] // 3, X_train.shape[1] - 1),
    "layers__sz": Integer(1, X_train.shape[1] - 1),
    "layers__d": Integer(1, 3),
}

from sklearn.base import BaseEstimator, TransformerMixin
class Layers(BaseEstimator, TransformerMixin):
    def __init__(self, d=3, sz=100):
        self.d = d
        self.sz = sz
        
    def get_params(self, deep=False):
        return {"sz": self.sz, "d": self.d}
    
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
    
    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        global new_mlp
        new_mlp.set_params(hidden_layer_sizes=tuple(self.sz for i in range(self.d)))
        return X
    
pipe = Pipeline([('selector', SelectKBest(f_regression)), ('layers', Layers()), ('mpr', new_mlp)])
search = BayesSearchCV(pipe, search_space, n_iter=15, n_jobs=-1, cv=cv_folds, random_state=random_state)

search.fit(X_train, y_train)
selected_feat = search.best_estimator_.named_steps["selector"].get_support()
best_mpr = clone_model(search.best_estimator_)
best_mpr.fit(X_train.loc[:, selected_feat], y_train)

print('Best params:\n', search.best_params_)
print('Best features found:\n', X_train.columns[selected_feat])

print("Test")
y_pred = best_mpr.predict(X_test.loc[:, selected_feat])
print("RMSE: ", rmse(y_test, y_pred))
print("MAE: ", mae(y_test, y_pred))

print("Train")
y_pred = best_mpr.predict(X_train.loc[:, selected_feat])
print("RMSE: ", rmse(y_train, y_pred))
print("MAE: ", mae(y_train, y_pred))

## Pytorch MLP

In [None]:
# def custom_loss(y_true, y_pred):
    
# #     y_true = np.array(y_true)
# #     y_pred = np.array(y_pred)
    
#     squared_error_sum = 0
    
#     for a, b in zip(y_true, y_pred):
#         if b < a: #underpredict
#             squared_error_sum += ((a - b) ** 2 ) * 2 #penalize
#         else: #overpredict or just right
#             squared_error_sum += ((a - b) ** 2 )
    
#     mse = squared_error_sum / len(y_true)
    
#     rmse = np.sqrt(mse)
    
#     return rmse

# def baseline_model():
#     # create model
#     model = Sequential()
#     model.add(Dense(13, input_shape=(13,), kernel_initializer='normal', activation='relu'))
#     model.add(Dense(1, kernel_initializer='normal'))

#     model.compile(loss='mean_squared_error', optimizer='adam')
#     return model

# estimator = KerasRegressor(model=baseline_model, epochs=100, batch_size=5, verbose=0)

In [None]:
# import numpay as np
# import torch
# import torch.nn as nn
# import torch.nn.init as init
# import torch.optim as optim
# from skorch import NeuralNetClassifier
# from sklearn.model_selection import GridSearchCV

In [None]:
# param_space = [
#     {
#         "solver": "lbfgs",
#         "hidden_layer_sizes": [(100,), (50, 50,), (50, 25, 25,)], # pick better ones
#         "activation": ["identity", "logistic", "tanh", "relu"],
#         "alpha": [0.00001, 0.0001, 0.001],
#         "max_iter": [200, 500, 1000],
#         "tol": [0.0001, 0.001, 0.01, 0.1],
#         "max_fun": [10000, 15000],
#     },
#     {
#         "solver": "adam",
#         "hidden_layer_sizes": [(100,), (50, 50,), (50, 25, 25,)], # pick better ones
#         "activation": ["identity", "logistic", "tanh", "relu"],
#         "alpha": [0.00001, 0.0001, 0.001],
#         "max_iter": [200, 500, 1000],
#         "tol": [0.0001, 0.001, 0.01, 0.1],
#         "batch_size": ['auto', n_samples // 5, n_samples // 10],
#         "learning_rate_init": [0.0005, 0.001, 0.005],
#         "shuffle": [False, True],
#         "early_stopping": [False, True],
#         "validation_fraction": [0.1, 0.15],
#         "n_iter_no_change": [10, 15],
#         # not sure abt these values, should we be changing these?
#         "beta_1": [0.75, 0.9],
#         "beta_2": [0.85, 0.999],
#         "epsilon": [1e-07, 1e-08],
#     },
#     {
#         "solver": "sgd",
#         "hidden_layer_sizes": [(100,), (50, 50,), (50, 25, 25,)], # pick better ones
#         "activation": ["identity", "logistic", "tanh", "relu"],
#         "alpha": [0.00001, 0.0001, 0.001],
#         "max_iter": [200, 500, 1000],
#         "tol": [0.0001, 0.001, 0.01, 0.1],
#         "batch_size": ['auto', n_samples // 5, n_samples // 10],
#         "learning_rate_init": [0.0005, 0.001, 0.005],
#         "shuffle": [False, True],
#         "early_stopping": [False, True],
#         "validation_fraction": [0.1, 0.15],
#         "n_iter_no_change": [10, 15],
#         "learning_rate": ["constant", "invscaling", "adaptive"],
#         "momentum": [0.75, 0.9],
#         "nesterovs_momentum": [False, True],
#     }
# ]

In [None]:
# class DataLoader(object):
#     def __init__(self, x, y, batch_size=128, shuffle=True):
#         self.x = x
#         self.y = y
#         self.batch_size = batch_size
#         self.shuffle = shuffle
#         self.start_idx = 0
#         self.data_size = x.shape[0]
#         if self.shuffle:
#             self.reset()
    
#     def reset(self):
#         self.x, self.y = shuffle(self.x, self.y)
    
#     def __iter__(self):
#         return self
    
#     def __next__(self):
#         if self.start_idx >= self.data_size:
#             if self.shuffle:
#                 self.reset()
#             self.start_idx = 0
#             raise StopIteration
    
#         batch_x = self.x[self.start_idx:self.start_idx+self.batch_size]
#         batch_y = self.y[self.start_idx:self.start_idx+self.batch_size]

#         batch_x = torch.tensor(batch_x, dtype=torch.float, device=device)
#         batch_y = torch.tensor(batch_y, dtype=torch.float, device=device)

#         self.start_idx += self.batch_size

#         return (batch_x,batch_y)

In [None]:
# class CustomMLPRegressor(nn.Module):
#     def __init__(self, in_dim, hidden_layers=(50, 50), activation=nn.ReLU, dropout_rate=0.5, weight_constraint=1.0, weight_init=torch.nn.init.xavier_uniform_):
#         super(MLP, self).__init__()
        
#         self.layers = []
#         self.activation = activation()
        
#         assert(len(hidden_layers) > 0)
        
#         for i, count in enumerate(hidden_layers):
#             if i == 0:
#                 self.layers.append(nn.Linear(self.in_dim, count))
#             if i == len(hidden_layers) - 1:
#                 self.layers.append(nn.Linear(count, 1))
#             elif i != 0 and i != len(hidden_layers) - 1:
#                 self.layers.append(nn.Linear(hidden_layers[i - 1], count))
#             weight_init(self.layers[-1].weight)
        
#         self.dropout = nn.Dropout(dropout_rate)
    
#     def forward(self, x):
#         # maxnorm weight before actual forward pass
#         with torch.no_grad():
#             for layer in self.layers:
#                 norm = layer.weight.norm(2, dim=0, keepdim=True).clamp(min=self.weight_constraint / 2)
#                 desired = torch.clamp(norm, max=self.weight_constraint)
#                 layer.weight *= (desired / norm)
         
#         for i, layer in enumerate(self.layers):
#             if i == len(hidden_layers) - 1:
#                 x = self.dropout(x)
#             x = self.activation(layer(x))
            
#         return x

## Evaluation

#