In [1]:
import pandas as pd
import numpy as np
from joblib import dump, load

from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import GridSearchCV

Helper Functions

In [2]:
# initial random seed
seed = 42

In [3]:
def compute_metrics(y_true, y_pred):
    """Compute RMSE, MAE, and R2."""
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return rmse, mae, r2

In [4]:
def compute_stability(model, X, y, noise_level=0.01, n_trials=5):
    """
    Add Gaussian noise to numeric features and measure average relative RMSE change.
    noise_level is fraction of std-dev of each feature.
    """
    numeric_cols = X.select_dtypes(include=[np.number]).columns
    base_rmse = np.sqrt(mean_squared_error(y, model.predict(X)))
    rel_changes = []
    
    for _ in range(n_trials):
        Xp = X.copy()
        noise = np.random.normal(0, noise_level * Xp[numeric_cols].std(), 
                                 size=Xp[numeric_cols].shape)
        Xp[numeric_cols] += noise
        rp = model.predict(Xp)
        rmse_p = np.sqrt(mean_squared_error(y, rp))
        rel_changes.append((rmse_p - base_rmse) / base_rmse)
    
    # Return average relative change (lower = more stable)
    return np.mean(rel_changes)

# Model Definition

In [None]:
models = {
    'Ridge': Ridge(random_state=42, alpha=1.0), 
    'RandomForest': RandomForestRegressor(n_estimators=100, random_state=seed),
    'XGBoost': XGBRegressor(
        n_estimators=100, 
        eval_metric='rmse', 
        random_state=seed,
        learning_rate=0.1,
        ),
    'MLP': MLPRegressor(
        hidden_layer_sizes=(64, 64),    # implemented a simple MLP with sklearn for easy compatibility with the rest of the code
        activation='relu',
        solver='adam',
        max_iter=200,
        random_state=seed
        )
}

In [6]:
param_grids = {
    'Ridge': {
        'alpha': [0.01, 0.1, 1.0, 10.0, 100.0]
        },
    'RandomForest': {
        'n_estimators': [100, 200],
        'max_depth': [None, 10, 20]
        },
    'XGBoost': {
        'n_estimators': [100, 200],
        'learning_rate': [0.01, 0.1],
        'max_depth': [3, 6],
        'subsample': [0.8, 1.0],
        },
    'MLP': {
        'hidden_layer_sizes': [(64,), (64, 64)],
        'alpha': [1e-4, 1e-3, 1e-2],
        'learning_rate_init': [1e-3, 1e-2],
        'max_iter': [300]
        }
}

In [7]:
datasets = load('..\data\experimental\experiment_datasets_2.joblib')  # Load datasets from joblib file

# Manual Training

In [6]:
train_df = datasets['baselines']['full']['within_sample']['within_sample']['train']
test_df = datasets['baselines']['full']['within_sample']['within_sample']['test']

train_df.shape, test_df.shape

((291, 82), (73, 82))

In [7]:
X_train = train_df.drop(columns=['totalEsg'])
y_train = train_df['totalEsg']
X_test  = test_df.drop(columns=['totalEsg'])
y_test  = test_df['totalEsg']

In [8]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((291, 81), (291,), (73, 81), (73,))

In [9]:
model = models['Ridge']  # Choose the model you want to use

In [10]:
model

In [11]:
model.fit(X_train, y_train)

  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T


In [12]:
# predict
y_pred = model.predict(X_test)

In [13]:
# compute metrics
rmse, mae, r2 = compute_metrics(y_test, y_pred)
stability = compute_stability(model, X_test, y_test,
                            noise_level=0.01, n_trials=5)

In [14]:
manual_results = {
            'RMSE': rmse,
            'MAE': mae,
            'R2': r2,
            'Stability': stability
        }

In [15]:
pd.DataFrame(manual_results, index=[0])

Unnamed: 0,RMSE,MAE,R2,Stability
0,44.795695,15.571981,-32.245542,-0.001084


# Iterative Training

In [8]:
def train_and_evaluate(model_name, model, X_train, y_train, X_test, y_test, param_grid):
    """Train model with grid search and evaluate performance"""
    # Prepare grid search
    grid = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        cv=3,
        scoring='neg_root_mean_squared_error',
        n_jobs=-1
    )
    
    # Train model
    grid.fit(X_train, y_train)
    best_model = grid.best_estimator_
    
    # Make predictions
    y_pred = best_model.predict(X_test)
    
    # Compute metrics
    rmse, mae, r2 = compute_metrics(y_test, y_pred)
    stability = compute_stability(best_model, X_test, y_test, 
                                noise_level=0.01, n_trials=5)
    
    return {
        'model': model_name,
        'RMSE': rmse,
        'MAE': mae,
        'R2': r2,
        'Stability': stability
    }

In [9]:
def prepare_data(df, target_col='totalEsg'):
    """Separate features and target"""
    X = df.drop(columns=[target_col])
    y = df[target_col]
    return X, y

In [None]:
def process_dataset(train_df, test_df, models, param_grids, scenario_info):
    """Process a single dataset with all models"""
    results = []
    
    # Prepare data
    X_train, y_train = prepare_data(train_df)
    X_test, y_test = prepare_data(test_df)
    
    # print(f"Data shapes - Train: {train_df.shape}, Test: {test_df.shape}")
    
    # Train and evaluate all models
    for model_name, model in models.items():
        result = train_and_evaluate(
            model_name, model, 
            X_train, y_train, 
            X_test, y_test,
            param_grids[model_name]
        )
        results.append({**scenario_info, **result})
    
    return results

In [11]:
datasets['baselines']['full']['region_holdout']['europe_n_central_asia']['train'].shape

(275, 82)

In [12]:
results = []
n_runs = 5 # number of runs for statistical comparison

In [None]:
%%time
for run in range(n_runs):
    for scenario_type, scenario_dict in datasets.items():        # e.g., 'baselines', 'diversified'
        for scenario_name, splits in scenario_dict.items():      # e.g., 'full', 'constrained', 'max_balanced', etc.
            for split_type, data_group in splits.items():        # 'within_sample', 'region_holdout', 'size_holdout'
                # if split_type == 'original_data':
                #     continue  # skip original data entries

                # Handle different split types
                if split_type == 'within_sample':
                    data = data_group['within_sample'] # for within_sample, the data structure is one level deeper

                    scenario_info = {
                        'run': run,
                        'scenario_type': scenario_type,
                        'scenario': scenario_name,
                        'split': split_type,
                        'context': 'within_sample',
                    }
                    
                    results.extend(process_dataset(
                        data['train'], 
                        data['test'], 
                        models,
                        param_grids,
                        scenario_info
                    ))
                
                elif split_type in ['region_holdout', 'size_holdout']:
                    for context_name, data in data_group.items():
                        if 'train' not in data or 'test' not in data:
                            continue # skip if train/test not available

                        scenario_info = {
                            'run': run,
                            'scenario_type': scenario_type,
                            'scenario': scenario_name,
                            'split': split_type,
                            'context': context_name
                        }
                        
                        results.extend(process_dataset(
                            data['train'],
                            data['test'],
                            models,
                            param_grids,
                            scenario_info
                        ))

results_df = pd.DataFrame(results)

Data shapes - Train: (291, 83), Test: (73, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (301, 82), Test: (63, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 82), Test: (89, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (322, 82), Test: (42, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (250, 82), Test: (114, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (327, 82), Test: (37, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (345, 82), Test: (19, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (251, 83), Test: (113, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (249, 83), Test: (115, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (228, 83), Test: (136, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (113, 83), Test: (29, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (87, 78), Test: (55, 78)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (55, 78), Test: (87, 78)


Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (81, 82), Test: (61, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (61, 82), Test: (81, 82)


Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (563, 83), Test: (141, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (616, 82), Test: (88, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (616, 82), Test: (88, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (440, 83), Test: (264, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (484, 83), Test: (220, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (484, 83), Test: (220, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (320, 83), Test: (80, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (350, 82), Test: (50, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (350, 82), Test: (50, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (250, 83), Test: (150, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 83), Test: (125, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 83), Test: (125, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (291, 83), Test: (73, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (301, 82), Test: (63, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 82), Test: (89, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (322, 82), Test: (42, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (250, 82), Test: (114, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (327, 82), Test: (37, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (345, 82), Test: (19, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (251, 83), Test: (113, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (249, 83), Test: (115, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (228, 83), Test: (136, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (113, 83), Test: (29, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (87, 78), Test: (55, 78)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (55, 78), Test: (87, 78)


Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (81, 82), Test: (61, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (61, 82), Test: (81, 82)


Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (563, 83), Test: (141, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (616, 82), Test: (88, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (616, 82), Test: (88, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (440, 83), Test: (264, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (484, 83), Test: (220, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (484, 83), Test: (220, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (320, 83), Test: (80, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (350, 82), Test: (50, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (350, 82), Test: (50, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (250, 83), Test: (150, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 83), Test: (125, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 83), Test: (125, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (291, 83), Test: (73, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (301, 82), Test: (63, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 82), Test: (89, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (322, 82), Test: (42, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (250, 82), Test: (114, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (327, 82), Test: (37, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (345, 82), Test: (19, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (251, 83), Test: (113, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (249, 83), Test: (115, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (228, 83), Test: (136, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (113, 83), Test: (29, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (87, 78), Test: (55, 78)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (55, 78), Test: (87, 78)


Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (81, 82), Test: (61, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (61, 82), Test: (81, 82)


Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (563, 83), Test: (141, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (616, 82), Test: (88, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (616, 82), Test: (88, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (440, 83), Test: (264, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (484, 83), Test: (220, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (484, 83), Test: (220, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (320, 83), Test: (80, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (350, 82), Test: (50, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (350, 82), Test: (50, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (250, 83), Test: (150, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 83), Test: (125, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 83), Test: (125, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (291, 83), Test: (73, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (301, 82), Test: (63, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 82), Test: (89, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (322, 82), Test: (42, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (250, 82), Test: (114, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (327, 82), Test: (37, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (345, 82), Test: (19, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (251, 83), Test: (113, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (249, 83), Test: (115, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (228, 83), Test: (136, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (113, 83), Test: (29, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (87, 78), Test: (55, 78)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (55, 78), Test: (87, 78)


Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (81, 82), Test: (61, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (61, 82), Test: (81, 82)


Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (563, 83), Test: (141, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (616, 82), Test: (88, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (616, 82), Test: (88, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (440, 83), Test: (264, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (484, 83), Test: (220, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (484, 83), Test: (220, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (320, 83), Test: (80, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (350, 82), Test: (50, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (350, 82), Test: (50, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (250, 83), Test: (150, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 83), Test: (125, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 83), Test: (125, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (291, 83), Test: (73, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (301, 82), Test: (63, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 82), Test: (89, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (322, 82), Test: (42, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (250, 82), Test: (114, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (327, 82), Test: (37, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (345, 82), Test: (19, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (251, 83), Test: (113, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (249, 83), Test: (115, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (228, 83), Test: (136, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (113, 83), Test: (29, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (87, 78), Test: (55, 78)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (55, 78), Test: (87, 78)


Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (81, 82), Test: (61, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (61, 82), Test: (81, 82)


Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (563, 83), Test: (141, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (616, 82), Test: (88, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (616, 82), Test: (88, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (572, 82), Test: (132, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (440, 83), Test: (264, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (484, 83), Test: (220, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (484, 83), Test: (220, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (320, 83), Test: (80, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (350, 82), Test: (50, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (350, 82), Test: (50, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (325, 82), Test: (75, 82)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (250, 83), Test: (150, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 83), Test: (125, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Data shapes - Train: (275, 83), Test: (125, 83)


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
Parameters: { "boosting_type" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


CPU times: total: 21min 28s
Wall time: 28min 4s


In [19]:
# %%time
# for scenario_type, scenario_dict in datasets.items():        # e.g., 'baselines', 'diversified'
#     for scenario_name, splits in scenario_dict.items():      # e.g., 'full', 'constrained', 'max_balanced', etc.
#         for split_type, data_group in splits.items():        # 'within_sample', 'region_holdout', 'size_holdout'
#             if split_type == 'original_data':
#                 continue  # Skip original data entries
            
#             # if split_type == 'region_holdout':
#             #     continue # TEMP TO CHECK SMTH

#             # For within-sample, we have one group; for others, multiple contexts
#             # contexts = {'within_sample': data_group} if split_type == 'within_sample' else data_group

#             # for context_name, data in contexts.items():
#             #     print(f"Scenario: {scenario_type}, {scenario_name}, Split: {split_type}, Context: {context_name}")

#             # Handle different split types
#             if split_type == 'within_sample':
#                 # For within_sample, data structure is one level deeper
#                 data = data_group['within_sample']
#                 print(f"Scenario: {scenario_type}, {scenario_name}, Split: {split_type}")
#                 if 'train' not in data or 'test' not in data:
#                     continue
                    
#                 train_df = data['train']
#                 test_df = data['test']

#                 print(train_df.shape, test_df.shape)

#                 # Separate features and target
#                 X_train = train_df.drop(columns=['totalEsg'])
#                 y_train = train_df['totalEsg']
#                 X_test  = test_df.drop(columns=['totalEsg'])
#                 y_test  = test_df['totalEsg']
                
#                 # Train and evaluate models
#                 for model_name, model in models.items():
#                     # Prepare grid search
#                     grid = GridSearchCV(
#                         estimator=model,
#                         param_grid=param_grids[model_name],
#                         cv=3,
#                         scoring='neg_root_mean_squared_error',
#                         n_jobs=-1
#                     )
#                     # Tune on training data
#                     grid.fit(X_train, y_train)
#                     best_model = grid.best_estimator_

#                     # train
#                     # model.fit(X_train, y_train)


#                     # predict
#                     # y_pred = model.predict(X_test)
#                     y_pred = best_model.predict(X_test)

#                     # compute metrics
#                     rmse, mae, r2 = compute_metrics(y_test, y_pred)
#                     stability = compute_stability(best_model, X_test, y_test,
#                                                 noise_level=0.01, n_trials=5)
                    
#                     results.append({
#                         'scenario_type': scenario_type,
#                         'scenario': scenario_name,
#                         'split': split_type,
#                         'context': 'within_sample',
#                         'model': model_name,
#                         'RMSE': rmse,
#                         'MAE': mae,
#                         'R2': r2,
#                         'Stability': stability
#                     })
            
#             else:
#                 # For region_holdout and size_holdout, process each context
#                 for context_name, data in data_group.items():
#                     print(f"Scenario: {scenario_type}, {scenario_name}, Split: {split_type}, Context: {context_name}")
#                     if 'train' not in data or 'test' not in data:
#                         continue
                        
#                     train_df = data['train']
#                     test_df = data['test']

#                     print(train_df.shape, test_df.shape)

#                     # Separate features and target
#                     X_train = train_df.drop(columns=['totalEsg'])
#                     y_train = train_df['totalEsg']
#                     X_test  = test_df.drop(columns=['totalEsg'])
#                     y_test  = test_df['totalEsg']
                    
#                     # Train and evaluate models
#                     for model_name, model in models.items():
#                         # Prepare grid search
#                         grid = GridSearchCV(
#                             estimator=model,
#                             param_grid=param_grids[model_name],
#                             cv=3,
#                             scoring='neg_root_mean_squared_error',
#                             n_jobs=-1
#                         )
#                         # Tune on training data
#                         grid.fit(X_train, y_train)
#                         best_model = grid.best_estimator_

#                         # train
#                         # model.fit(X_train, y_train)


#                         # predict
#                         # y_pred = model.predict(X_test)
#                         y_pred = best_model.predict(X_test)

#                         # compute metrics
#                         rmse, mae, r2 = compute_metrics(y_test, y_pred)
#                         stability = compute_stability(best_model, X_test, y_test,
#                                                   noise_level=0.01, n_trials=5)
                        
#                         results.append({
#                             'scenario_type': scenario_type,
#                             'scenario': scenario_name,
#                             'split': split_type,
#                             'context': context_name,
#                             'model': model_name,
#                             'RMSE': rmse,
#                             'MAE': mae,
#                             'R2': r2,
#                             'Stability': stability
#                         })

# results_df = pd.DataFrame(results)

In [14]:
results_df = pd.DataFrame(results)

In [15]:
results_df

Unnamed: 0,run,scenario_type,scenario,split,context,model,RMSE,MAE,R2,Stability
0,0.0,baselines,full,within_sample,within_sample,Ridge,3.605126e+01,1.317664e+01,-2.053286e+01,-0.001012
1,0.0,baselines,full,within_sample,within_sample,RandomForest,6.853874e+00,5.183024e+00,2.217250e-01,0.033225
2,0.0,baselines,full,within_sample,within_sample,XGBoost,6.959850e+00,5.228697e+00,1.974713e-01,0.048310
3,0.0,baselines,full,within_sample,within_sample,MLP,1.995883e+08,3.212502e+07,-6.599803e+14,0.000861
4,,baselines,full,region_holdout,east_asia_n_pacific,Ridge,1.221359e+02,6.728334e+01,-2.619003e+02,0.002003
...,...,...,...,...,...,...,...,...,...,...
695,,diversified,median_balanced,size_holdout,Mid-Cap,MLP,1.501225e+09,2.323859e+08,-4.152267e+16,0.000683
696,,diversified,median_balanced,size_holdout,Small-Cap,Ridge,1.530604e+01,9.344703e+00,-1.927470e+00,0.000056
697,,diversified,median_balanced,size_holdout,Small-Cap,RandomForest,8.895846e+00,6.472343e+00,1.112487e-02,-0.000920
698,,diversified,median_balanced,size_holdout,Small-Cap,XGBoost,9.466702e+00,6.653255e+00,-1.198616e-01,-0.006356


In [17]:
results_df['R2'].max()

0.8708870271267248

In [None]:
# 1. Extract within-sample RMSE per scenario/model
within = results_df[results_df['split'] == 'within_sample'][['scenario_type', 'scenario', 'model', 'RMSE']]
within = within.rename(columns={'RMSE': 'RMSE_within'})

# 2. Merge to get RMSE_within alongside all rows
merged = results_df.merge(within, on=['scenario_type', 'scenario', 'model'], how='left')

# 3. Compute Cross-Context Generalization Score:
#    Transfer Score = 1 - (RMSE_holdout / RMSE_within)
#    For within-sample rows, set NaN
merged['CrossContextScore'] = np.where(
    merged['split'] == 'within_sample',
    np.nan,
    1 - merged['RMSE'] / merged['RMSE_within']
)