In [1]:
import pandas as pd

In [2]:
with open('vr_demo.csv', encoding='utf-8') as f:
    df = pd.read_csv(f, skipinitialspace=True, encoding='utf-8')
df

Unnamed: 0,Base MD.Sample,Lat.Any,Long.Any,VR Mean.Any
0,1740.0,47.122103,-47.957358,0.33
1,1920.0,47.122103,-47.957358,0.39
2,2730.0,47.122103,-47.957358,0.42
3,3990.0,47.122103,-47.957358,0.49
4,4410.0,47.122103,-47.957358,0.59
...,...,...,...,...
283,2460.0,46.458936,-48.282383,0.45
284,2560.0,46.458936,-48.282383,0.47
285,2740.0,46.458936,-48.282383,0.49
286,3580.0,46.458936,-48.282383,0.67


In [3]:
def get_n_hidden_units(n_rows: int, n_features: int) -> int:
    return int(min(100, max(10, (n_features * n_rows)/5)))

hidden_layer_sizes = [get_n_hidden_units(*df.shape)]
print(f"Hidden layer sizes: {hidden_layer_sizes}")

Hidden layer sizes: [100]


In [4]:
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.pipeline import Pipeline
import numpy as np

features = df.columns.values[:-1]
targets = df.columns.values[-1]
seed = 42

X, y = df[features], df[targets]
X_trn, X_test, y_trn, y_test = train_test_split(X, y, train_size=0.7, random_state=seed)

regressor = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, random_state=seed)

def build_and_eval_pipeline(regressor):
    pipeline = Pipeline(steps=[
        ('Scale', StandardScaler()),
        ('Impute', KNNImputer()),
        ('Regression', regressor)])

    pipeline.fit(X_trn, y_trn)
    y_pred = pipeline.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    print(f"R²: {r2:.3f}, RMSE: {rmse:.2f}")
    
build_and_eval_pipeline(regressor)

R²: 0.579, RMSE: 0.19


In [5]:
# try with grid search
from sklearn.model_selection import GridSearchCV

param_grid = {
    "hidden_layer_sizes": [
        (100),
        (50, 50), (60, 40), (40, 60), 
        (33, 33, 33), (25, 50, 25)],  
    "solver": ["lbfgs", "adam"]}

from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

regressorSearch = GridSearchCV(regressor, param_grid)
build_and_eval_pipeline(regressorSearch)

regressorSearch.best_params_

R²: 0.752, RMSE: 0.14


{'hidden_layer_sizes': (40, 60), 'solver': 'lbfgs'}

In [6]:
pd.DataFrame(regressorSearch.cv_results_).sort_values("rank_test_score")

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_hidden_layer_sizes,param_solver,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
6,0.4154,0.005986,0.0004,0.00049,"(40, 60)",lbfgs,"{'hidden_layer_sizes': (40, 60), 'solver': 'lb...",0.814267,0.821731,0.787829,0.845131,0.807507,0.815293,0.018696,1
10,0.146394,0.001609,0.000606,0.000495,"(25, 50, 25)",lbfgs,"{'hidden_layer_sizes': (25, 50, 25), 'solver':...",0.838262,0.819093,0.763246,0.829664,0.798316,0.809716,0.026796,2
2,0.3284,0.007089,0.0002,0.0004,"(50, 50)",lbfgs,"{'hidden_layer_sizes': (50, 50), 'solver': 'lb...",0.824083,0.815841,0.811264,0.822782,0.771867,0.809168,0.019226,3
8,0.1402,0.001619,0.0004,0.00049,"(33, 33, 33)",lbfgs,"{'hidden_layer_sizes': (33, 33, 33), 'solver':...",0.828242,0.772697,0.793491,0.847903,0.802464,0.808959,0.0264,4
0,0.170344,0.00621,0.0006,0.00049,100,lbfgs,"{'hidden_layer_sizes': 100, 'solver': 'lbfgs'}",0.844172,0.800767,0.77911,0.830084,0.776532,0.806133,0.02704,5
4,0.329003,0.005694,0.0004,0.00049,"(60, 40)",lbfgs,"{'hidden_layer_sizes': (60, 40), 'solver': 'lb...",0.843498,0.768765,0.789869,0.825148,0.798559,0.805168,0.026349,6
9,0.049013,0.000883,0.000388,0.000475,"(33, 33, 33)",adam,"{'hidden_layer_sizes': (33, 33, 33), 'solver':...",0.657136,0.701316,0.365388,0.695285,0.625344,0.608894,0.124818,7
1,0.036599,0.00393,0.000801,0.0004,100,adam,"{'hidden_layer_sizes': 100, 'solver': 'adam'}",0.623808,0.629558,0.436627,0.64513,0.620215,0.591068,0.077689,8
3,0.075403,0.001354,0.0006,0.00049,"(50, 50)",adam,"{'hidden_layer_sizes': (50, 50), 'solver': 'ad...",0.605194,0.607961,0.450982,0.657758,0.596065,0.583592,0.069717,9
7,0.1142,0.018638,0.0004,0.00049,"(40, 60)",adam,"{'hidden_layer_sizes': (40, 60), 'solver': 'ad...",0.587058,0.640221,0.446612,0.663922,0.544942,0.576551,0.077047,10
