In [1]:
import pandas as pd

In [2]:
with open('vr_demo.csv', encoding='utf-8') as f:
    df = pd.read_csv(f, skipinitialspace=True, encoding='utf-8')
df

Unnamed: 0,Base MD.Sample,Lat.Any,Long.Any,VR Mean.Any
0,1740.0,47.122103,-47.957358,0.33
1,1920.0,47.122103,-47.957358,0.39
2,2730.0,47.122103,-47.957358,0.42
3,3990.0,47.122103,-47.957358,0.49
4,4410.0,47.122103,-47.957358,0.59
...,...,...,...,...
283,2460.0,46.458936,-48.282383,0.45
284,2560.0,46.458936,-48.282383,0.47
285,2740.0,46.458936,-48.282383,0.49
286,3580.0,46.458936,-48.282383,0.67


In [3]:
def get_n_hidden_units(n_rows: int, n_features: int) -> int:
    return int(min(100, max(10, (n_features * n_rows)/5)))

hidden_layer_sizes = [get_n_hidden_units(*df.shape)]
print(f"Hidden layer sizes: {hidden_layer_sizes}")

Hidden layer sizes: [100]


In [4]:
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.pipeline import Pipeline
import numpy as np

features = df.columns.values[:-1]
targets = df.columns.values[-1]
seed = 42

X, y = df[features], df[targets]
X_trn, X_test, y_trn, y_test = train_test_split(X, y, train_size=0.7, random_state=seed)

regressor = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, random_state=seed)

def build_and_eval_pipeline(regressor):
    pipeline = Pipeline(steps=[
        ('Scale', StandardScaler()),
        ('Regression', regressor)])

    pipeline.fit(X_trn, y_trn)
    y_pred = pipeline.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    print(f"R²: {r2:.3f}, RMSE: {rmse:.2f}")
    
build_and_eval_pipeline(regressor)

R²: 0.579, RMSE: 0.19


In [5]:
# try with grid search
from sklearn.model_selection import GridSearchCV

param_grid = {
    "hidden_layer_sizes": [
        (100),
        (50, 50), (60, 40), (40, 60), 
        (33, 33, 33), (25, 50, 25)],  
    "solver": ["lbfgs", "adam"]}

from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

regressorSearch = GridSearchCV(regressor, param_grid)
build_and_eval_pipeline(regressorSearch)

regressorSearch.best_params_

R²: 0.752, RMSE: 0.14


{'hidden_layer_sizes': (40, 60), 'solver': 'lbfgs'}