In [1]:
from sklearn.linear_model import LinearRegression, Ridge, MultiTaskLasso, ElasticNet, Lasso, RidgeCV, LassoCV, ElasticNetCV, MultiTaskLassoCV, MultiTaskElasticNetCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, HistGradientBoostingRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, KFold
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
import numpy as np
import pandas as pd
from xgboost import XGBRegressor  
from lightgbm import LGBMRegressor
from sklearn.feature_selection import SelectKBest, mutual_info_regression, f_regression, SequentialFeatureSelector, RFE
from minepy import MINE
from sklearn.pipeline import Pipeline


data = pd.read_csv('Combined.csv')

X = data[[f'Feature_{i+1}' for i in range(50)]]
y = data[[f'Output_{i+1}' for i in range(5)]]  

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=.25, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print("Training set:", X_train.shape, y_train.shape)
print("Validation set:", X_val.shape, y_val.shape)
print("Test set:", X_test.shape, y_test.shape)
results = {
    'Inherently Multi-Output': {},
    'Single-Output (adapted to Multi-Output)': {}
}


Training set: (600, 50) (600, 5)
Validation set: (100, 50) (100, 5)
Test set: (100, 50) (100, 5)


In [None]:
model_param_grid = {
    'Linear Regression': {
        'model': [LinearRegression()]
    },
    'Polynomial Regression': {
        'model': [Pipeline([
            ('poly', PolynomialFeatures()), 
            ('linear', LinearRegression())
        ])],
        'model__poly__degree': [2, 3],  
        'model__poly__interaction_only': [False, True] 
    },
    'Ridge Regression': {
        'model': [Ridge()],
        'model__alpha': [0.01, 0.1, 1, 10, 100]
    },
    'Lasso Regression': {
        'model': [Lasso(max_iter=10000)],
        'model__alpha': [0.01, 0.1, 1, 10, 100]
    },
    'ElasticNet Regression': {
        'model': [ElasticNet(max_iter=10000)],
        'model__alpha': [0.01, 0.1, 1, 10, 100],
        'model__l1_ratio': [0.1, 0.5, 0.9]
    },
    'Decision Tree Regressor': {
        'model': [DecisionTreeRegressor(random_state=42)],
        'model__max_depth': [5, 10, 20],
        'model__min_samples_split': [2, 5, 10]
    },
    'Random Forest Regressor': {
        'model': [RandomForestRegressor(random_state=42)],
        'model__n_estimators': [50, 100, 200],
        'model__max_depth': [5, 10, None]
    },
    'Extra Trees Regressor': {
        'model': [ExtraTreesRegressor(random_state=42)],
        'model__n_estimators': [50, 100, 200],
        'model__max_depth': [5, 10, None]
    }
    'MLP Regressor': {
        'model': [MLPRegressor(random_state=42, max_iter=10000, early_stopping=True)],
        'model__hidden_layer_sizes': [(50,), (100,), (50, 50)],
        'model__alpha': [0.001, 0.01, 0.1, 1.0],
        'model__learning_rate_init': [0.0001, 0.001, 0.01],
        'model__solver': ['adam', 'sgd'],
        'model__activation': ['relu', 'tanh'],
        'model__learning_rate': ['constant', 'adaptive'],
    }
}

'SVR (MultiOutput)': {
        'model__estimator__kernel': ['linear', 'rbf'],
        'model__estimator__C': [0.1, 1, 10],
        'base_model': SVR()  # Base estimator for MultiOutputRegressor
    },
    'KNeighbors Regressor (MultiOutput)': {
        'model__estimator__n_neighbors': [3, 5, 10],
        'model__estimator__weights': ['uniform', 'distance'],
        'base_model': KNeighborsRegressor()
    },
    'Gradient Boosting Regressor (MultiOutput)': {
        'model__estimator__n_estimators': [50, 100, 200],
        'model__estimator__learning_rate': [0.01, 0.1, 0.2],
        'model__estimator__max_depth': [3, 5, 10],
        'base_model': GradientBoostingRegressor()
    },
    'AdaBoost Regressor (MultiOutput)': {
        'model__estimator__n_estimators': [50, 100, 200],
        'model__estimator__learning_rate': [0.01, 0.1, 1.0],
        'base_model': AdaBoostRegressor()
    },
    'Hist Gradient Boosting Regressor (MultiOutput)': {
        'model__estimator__max_iter': [100, 200],
        'model__estimator__learning_rate': [0.01, 0.1, 0.2],
        'base_model': HistGradientBoostingRegressor(random_state=42)
    },
    'XGBoost Regressor (MultiOutput)': {
        'model__estimator__n_estimators': [50, 100, 200],
        'model__estimator__learning_rate': [0.01, 0.1, 0.2],
        'model__estimator__max_depth': [3, 5, 10],
        'base_model': XGBRegressor(random_state=42)
    }

In [None]:
model_param_grid = {
    'Linear Regression': {
        'model': [LinearRegression()]
    },
    'Polynomial Regression': {
        'model': [Pipeline([
            ('poly', PolynomialFeatures()), 
            ('linear', LinearRegression())
        ])],
        'model__poly__degree': [2, 3],  
        'model__poly__interaction_only': [False, True] 
    },
    'Ridge Regression': {
        'model': [Ridge()],
        'model__alpha': [0.01, 0.1, 1, 10, 100]
    },
    'ElasticNet Regression': {
        'model': [ElasticNet(max_iter=10000)],
        'model__alpha': [0.01, 0.1, 1, 10, 100],
        'model__l1_ratio': [0.1, 0.5, 0.9]
    },
    'Random Forest Regressor': {
        'model': [RandomForestRegressor(random_state=42)],
        'model__n_estimators': [50, 100, 200],
        'model__max_depth': [5, 10, None]
    },
    'MLP Regressor': {
        'model': [MLPRegressor(random_state=42, max_iter=10000, early_stopping=True)],
        'model__hidden_layer_sizes': [(50,), (100,), (50, 50)],
        'model__alpha': [0.001, 0.01, 0.1, 1.0],
        'model__learning_rate_init': [0.0001, 0.001, 0.01],
        'model__solver': ['adam', 'sgd'],
        'model__activation': ['relu', 'tanh'],
        'model__learning_rate': ['constant', 'adaptive'],
    }
}

model_param_grid_non_inherent = {
    'SVR (MultiOutput)': {
        'model__estimator__kernel': ['linear', 'rbf'],
        'model__estimator__C': [0.1, 1, 10],
        'base_model': SVR()  # Base estimator for MultiOutputRegressor
    },
    'KNeighbors Regressor (MultiOutput)': {
        'model__estimator__n_neighbors': [3, 5, 10],
        'model__estimator__weights': ['uniform', 'distance'],
        'base_model': KNeighborsRegressor()
    },
    'AdaBoost Regressor (MultiOutput)': {
        'model__estimator__n_estimators': [50, 100, 200],
        'model__estimator__learning_rate': [0.01, 0.1, 1.0],
        'base_model': AdaBoostRegressor()
    },
    'Hist Gradient Boosting Regressor (MultiOutput)': {
        'model__estimator__max_iter': [100, 200],
        'model__estimator__learning_rate': [0.01, 0.1, 0.2],
        'base_model': HistGradientBoostingRegressor(random_state=42)
    },
    'CatBoost Regressor': {
        'model': [CatBoostRegressor(verbose=0, random_state=42)],
        'model__iterations': [100, 200, 500],  # Same as n_estimators
        'model__learning_rate': np.logspace(-3, 0, 4),  # Learning rate for CatBoost
        'model__depth': [3, 5, 10],  # Similar to max_depth in XGBoost
        'model__l2_leaf_reg': np.logspace(-3, 2, 5),  # Equivalent to reg_lambda
    }
}


In [3]:
X_train_val = pd.concat([X_train, X_val])
from statsmodels.stats.outliers_influence import variance_inflation_factor
# Scale the data
scaler = StandardScaler()
X_train_val_scaled = pd.DataFrame(scaler.fit_transform(X_train_val), columns=X_train_val.columns)

# Function to calculate VIF for all features
def calculate_vif(X):
    vif_data = pd.DataFrame()
    vif_data['Feature'] = X.columns
    vif_data['VIF'] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
    # Sort by VIF in descending order
    return vif_data.sort_values(by="VIF", ascending=False).reset_index(drop=True)

# Calculate VIF for scaled data
vif_data = calculate_vif(X_train_val_scaled)

# Display the VIF results
print("\nVIF Scores (after scaling):")
print(vif_data)

# Split VIF data into Feature 1-25 and Feature 26-50 using explicit range matching
vif_below_25 = vif_data[vif_data['Feature'].apply(lambda x: int(x.split('_')[1]) <= 25)]
vif_above_25 = vif_data[vif_data['Feature'].apply(lambda x: int(x.split('_')[1]) > 25)]

# Count features with VIF > 10 in each group
count_below_25 = vif_below_25[vif_below_25['VIF'] > 10].shape[0]
count_above_25 = vif_above_25[vif_above_25['VIF'] > 10].shape[0]

print(f"Number of features with VIF > 10 in Feature 1-25: {count_below_25}")
print(f"Number of features with VIF > 10 in Feature 26-50: {count_above_25}")


VIF Scores (after scaling):
       Feature         VIF
0    Feature_4  167.653940
1   Feature_15  157.184582
2   Feature_25  153.335490
3   Feature_19  141.294080
4   Feature_12   78.641846
5   Feature_22   54.993071
6   Feature_18   51.318104
7   Feature_16   48.392623
8    Feature_2   47.562583
9    Feature_7   16.861612
10  Feature_17   10.612797
11  Feature_11    6.542223
12   Feature_6    6.521484
13  Feature_21    5.888673
14   Feature_9    5.744592
15   Feature_3    5.734591
16  Feature_14    5.700567
17  Feature_13    5.339858
18  Feature_20    4.841857
19  Feature_23    4.809383
20   Feature_8    4.712241
21   Feature_1    4.417642
22  Feature_24    4.299875
23  Feature_36    4.155770
24  Feature_48    3.703300
25  Feature_31    3.464973
26  Feature_26    3.333339
27  Feature_27    3.145881
28  Feature_49    3.130971
29  Feature_38    3.062360
30  Feature_41    3.041318
31  Feature_42    3.034389
32   Feature_5    3.006772
33  Feature_43    2.931877
34  Feature_34    2.901118

In [8]:
def clean_params_with_model(grid):
    best_params = {key.split('__')[-1]: value for key, value in grid.best_params_.items()}
    # Extract the MultiOutputRegressor object from the pipeline
    multioutput_model = grid.best_estimator_['model']  # Access the 'model' step in the pipeline
    
    # If it's a MultiOutputRegressor, retrieve its underlying estimator
    if isinstance(multioutput_model, MultiOutputRegressor):
        underlying_model = multioutput_model.estimator
        # Update the 'model' key with the full MultiOutputRegressor and its tuned estimator
        best_params['model'] = MultiOutputRegressor(estimator=underlying_model)
    else:
        # For non-MultiOutputRegressor models, store the model as is
        best_params['model'] = multioutput_model
    return best_params


# Combine training and validation sets
X_train_val = pd.concat([X_train, X_val])
y_train_val = pd.concat([y_train, y_val])

# Define a 5-fold cross-validator
cv = KFold(n_splits=5, shuffle=True, random_state=42)

# Define models and their parameter grids
model_param_grid = {
    'Linear Regression': {
        'model': [LinearRegression()]
    },
    'Ridge Regression': {
        'model': [Ridge()],
        'model__alpha': [0.01, 0.1, 1, 10, 100]
    }
}

# Initialize results dictionary
results = {'Inherently Multi-Output': {}}

for name, param_grid in model_param_grid.items():
    print(f"Tuning and evaluating: {name}")
    pipeline = Pipeline([('model', param_grid['model'][0])])  # Start with the model from the grid
    grid = GridSearchCV(pipeline, param_grid, cv=cv, scoring='neg_mean_squared_error', n_jobs=-1)
    grid.fit(X_train_val, y_train_val)

    # Clean parameters and add best model
    best_params = clean_params_with_model(grid)

    # Cross-validation results
    best_neg_mse = grid.best_score_
    best_mse = -best_neg_mse  # Convert to positive
    best_r2 = cross_val_score(grid.best_estimator_, X_train_val, y_train_val, cv=cv, scoring='r2').mean()

    # Save standardized results
    results['Inherently Multi-Output'][name] = {
        'Best Parameters': best_params,
        'Cross-Validation Mean MSE': best_mse,
        'Cross-Validation Mean R^2': best_r2
    }


Tuning and evaluating: Linear Regression
Tuning and evaluating: Ridge Regression


In [9]:
# Define models and their parameter grids
model_param_grid_non_inherent = {
    'SVR (MultiOutput)': {
        'model__estimator__kernel': ['linear', 'rbf'],
        'model__estimator__C': [0.1, 1, 10],
        'base_model': SVR()  # Base estimator for MultiOutputRegressor
    },
    'KNeighbors Regressor (MultiOutput)': {
        'model__estimator__n_neighbors': [3, 5, 10],
        'model__estimator__weights': ['uniform', 'distance'],
        'base_model': KNeighborsRegressor()
    }
}

# Initialize results dictionary
results['Single-Output (adapted to Multi-Output)'] = {}

# Standardize Single-Output (Adapted to Multi-Output) Models
for name, param_grid in model_param_grid_non_inherent.items():
    print(f"Tuning and evaluating: {name}")
    
    # Define the MultiOutputRegressor with the specified base model
    base_model = MultiOutputRegressor(param_grid.pop('base_model'))
    
    # Create pipeline
    pipeline = Pipeline([('model', base_model)])
    
    # Initialize GridSearchCV
    grid = GridSearchCV(pipeline, param_grid, cv=cv, scoring='neg_mean_squared_error', n_jobs=-1)
    grid.fit(X_train_val, y_train_val)

    # Clean parameters and add best model
    best_params = clean_params_with_model(grid)

    # Cross-validation results
    best_neg_mse = grid.best_score_
    best_mse = -best_neg_mse  # Convert to positive
    best_r2 = cross_val_score(grid.best_estimator_, X_train_val, y_train_val, cv=cv, scoring='r2').mean()

    # Save standardized results
    results['Single-Output (adapted to Multi-Output)'][name] = {
        'Best Parameters': best_params,
        'Cross-Validation Mean MSE': best_mse,
        'Cross-Validation Mean R^2': best_r2
    }

Tuning and evaluating: SVR (MultiOutput)
Tuning and evaluating: KNeighbors Regressor (MultiOutput)


In [10]:
results

{'Inherently Multi-Output': {'Linear Regression': {'Best Parameters': {'model': LinearRegression(),
    'copy_X': True,
    'fit_intercept': True,
    'n_jobs': None,
    'positive': False},
   'Cross-Validation Mean MSE': 0.004857987153760264,
   'Cross-Validation Mean R^2': 0.7543301290317218},
  'Ridge Regression': {'Best Parameters': {'model': Ridge(alpha=0.01),
    'alpha': 0.01,
    'copy_X': True,
    'fit_intercept': True,
    'max_iter': None,
    'positive': False,
    'random_state': None,
    'solver': 'auto',
    'tol': 0.0001},
   'Cross-Validation Mean MSE': 0.004823524288458389,
   'Cross-Validation Mean R^2': 0.7576450703351456}},
 'Single-Output (adapted to Multi-Output)': {'SVR (MultiOutput)': {'Best Parameters': {'model': MultiOutputRegressor(estimator=SVR(C=10)),
    'C': 10,
    'cache_size': 200,
    'coef0': 0.0,
    'degree': 3,
    'epsilon': 0.1,
    'gamma': 'scale',
    'kernel': 'rbf',
    'max_iter': -1,
    'shrinking': True,
    'tol': 0.001,
    'verbo

In [None]:
{'Inherently Multi-Output': {'Linear Regression': {'Best Parameters': {'model': LinearRegression()},
   'Cross-Validation Mean MSE': 0.004857987153760264,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.06969926221819184,
   'Training RMSE': 0.06424419536046766,
   'Training Time (seconds)': 3.6064281463623047},
  'Polynomial Regression': {'Best Parameters': {'model': Pipeline(steps=[('poly', PolynomialFeatures(degree=3)),
                    ('linear', LinearRegression())]),
    'degree': 3,
    'interaction_only': False},
   'Cross-Validation Mean MSE': 0.0018547076742969327,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.04306631716663189,
   'Training RMSE': 4.93947692342546e-15,
   'Training Time (seconds)': 17.90014410018921},
  'Ridge Regression': {'Best Parameters': {'model': Ridge(alpha=0.01),
    'alpha': 0.01},
   'Cross-Validation Mean MSE': 0.004823524288458389,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.06945159673080518,
   'Training RMSE': 0.06483304135596038,
   'Training Time (seconds)': 0.05801820755004883},
  'Lasso Regression': {'Best Parameters': {'model': Lasso(alpha=0.01, max_iter=10000),
    'alpha': 0.01},
   'Cross-Validation Mean MSE': 0.015213622332485296,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.12334351354037754,
   'Training RMSE': 0.1231867521187686,
   'Training Time (seconds)': 0.05580902099609375},
  'ElasticNet Regression': {'Best Parameters': {'model': ElasticNet(alpha=0.01, l1_ratio=0.1, max_iter=10000),
    'alpha': 0.01,
    'l1_ratio': 0.1},
   'Cross-Validation Mean MSE': 0.009584443755940784,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.09790017239995435,
   'Training RMSE': 0.09757134651315037,
   'Training Time (seconds)': 0.1472609043121338},
  'Decision Tree Regressor': {'Best Parameters': {'model': DecisionTreeRegressor(max_depth=10, min_samples_split=10, random_state=42),
    'max_depth': 10,
    'min_samples_split': 10},
   'Cross-Validation Mean MSE': 0.0021434064801014903,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.04629693812879519,
   'Training RMSE': 0.02146894066587302,
   'Training Time (seconds)': 0.30356597900390625},
  'Random Forest Regressor': {'Best Parameters': {'model': RandomForestRegressor(max_depth=10, n_estimators=200, random_state=42),
    'max_depth': 10,
    'n_estimators': 200},
   'Cross-Validation Mean MSE': 0.0011659760861937906,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.03414639199379329,
   'Training RMSE': 0.01386447595690514,
   'Training Time (seconds)': 14.692317962646484},
  'Extra Trees Regressor': {'Best Parameters': {'model': ExtraTreesRegressor(n_estimators=200, random_state=42),
    'max_depth': None,
    'n_estimators': 200},
   'Cross-Validation Mean MSE': 0.0008755586600433284,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.02958984048695309,
   'Training RMSE': 4.1132105921053872e-16,
   'Training Time (seconds)': 5.419846773147583},
  'MLP Regressor': {'Best Parameters': {'model': MLPRegressor(alpha=0.1, hidden_layer_sizes=(50, 50), max_iter=10000,
                 random_state=42),
    'alpha': 0.1,
    'hidden_layer_sizes': (50, 50)},
   'Cross-Validation Mean MSE': 0.0030404589366212625,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.055140356696536365,
   'Training RMSE': 0.052504725131995146,
   'Training Time (seconds)': 1.3190867900848389}},
 'Single-Output (adapted to Multi-Output)': {'SVR (MultiOutput)': {'Best Parameters': {'C': 10,
    'kernel': 'rbf',
    'model': MultiOutputRegressor(estimator=SVR(C=10))},
   'Cross-Validation Mean MSE': 0.003626830749264604,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.06022317451998528,
   'Training RMSE': 0.05686616163717545,
   'Training Time (seconds)': 0.31290698051452637},
  'KNeighbors Regressor (MultiOutput)': {'Best Parameters': {'n_neighbors': 3,
    'weights': 'distance',
    'model': MultiOutputRegressor(estimator=KNeighborsRegressor(n_neighbors=3,
                                                       weights='distance'))},
   'Cross-Validation Mean MSE': 0.0010907914842112122,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.03302713254600242,
   'Training RMSE': 7.955895616343309e-09,
   'Training Time (seconds)': 0.22691583633422852},
  'Gradient Boosting Regressor (MultiOutput)': {'Best Parameters': {'learning_rate': 0.1,
    'max_depth': 3,
    'n_estimators': 200,
    'model': MultiOutputRegressor(estimator=GradientBoostingRegressor(n_estimators=200))},
   'Cross-Validation Mean MSE': 0.0008403602411317434,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.028988967576161512,
   'Training RMSE': 0.005220915659566175,
   'Training Time (seconds)': 175.91037797927856},
  'AdaBoost Regressor (MultiOutput)': {'Best Parameters': {'learning_rate': 0.1,
    'n_estimators': 200,
    'model': MultiOutputRegressor(estimator=AdaBoostRegressor(learning_rate=0.1,
                                                     n_estimators=200))},
   'Cross-Validation Mean MSE': 0.001400265679510122,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.03742012399111101,
   'Training RMSE': 0.029891522192936976,
   'Training Time (seconds)': 39.81767296791077},
  'Hist Gradient Boosting Regressor (MultiOutput)': {'Best Parameters': {'learning_rate': 0.1,
    'max_iter': 200,
    'model': MultiOutputRegressor(estimator=HistGradientBoostingRegressor(max_iter=200,
                                                                 random_state=42))},
   'Cross-Validation Mean MSE': 0.0007780435908282246,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.02789343275447152,
   'Training RMSE': 0.0027981055670238918,
   'Training Time (seconds)': 97.65210294723511},
  'XGBoost Regressor (MultiOutput)': {'Best Parameters': {'learning_rate': 0.1,
    'max_depth': 3,
    'n_estimators': 200,
    'model': MultiOutputRegressor(estimator=XGBRegressor(base_score=None, booster=None,
                                                callbacks=None,
                                                colsample_bylevel=None,
                                                colsample_bynode=None,
                                                colsample_bytree=None, device=None,
                                                early_stopping_rounds=None,
                                                enable_categorical=False,
                                                eval_metric=None,
                                                feature_types=None, gamma=None,
                                                grow_policy=None,
                                                importance_type=None,
                                                interaction_constraints=None,
                                                learning_rate=0.1, max_bin=None,
                                                max_cat_threshold=None,
                                                max_cat_to_onehot=None,
                                                max_delta_step=None, max_depth=3,
                                                max_leaves=None,
                                                min_child_weight=None, missing=nan,
                                                monotone_constraints=None,
                                                multi_strategy=None,
                                                n_estimators=200, n_jobs=None,
                                                num_parallel_tree=None,
                                                random_state=42, ...))},
   'Cross-Validation Mean MSE': 0.0008293260407401294,
   'Cross-Validation Mean R^2': 0,
   'Cross-Validation Mean RMSE': 0.028798021472665955,
   'Training RMSE': 0.0057045095006792584,
   'Training Time (seconds)': 110.89792799949646}}}

In [None]:
{'5 features': {'Avg MI': ['Feature_18',
   'Feature_2',
   'Feature_25',
   'Feature_19',
   'Feature_17'],
  'Avg MIC': ['Feature_18',
   'Feature_2',
   'Feature_19',
   'Feature_25',
   'Feature_17'],
  'Avg Correlation': ['Feature_7',
   'Feature_18',
   'Feature_2',
   'Feature_25',
   'Feature_19'],
  'Avg ANOVA F-value': ['Feature_18',
   'Feature_7',
   'Feature_2',
   'Feature_19',
   'Feature_25'],
  'Avg Distance Correlation': ['Feature_7',
   'Feature_18',
   'Feature_2',
   'Feature_19',
   'Feature_25'],
  'Most voted': ['Feature_18',
   'Feature_19',
   'Feature_12',
   'Feature_2',
   'Feature_17'],
  'Linear Regression': ['Feature_10',
   'Feature_17',
   'Feature_18',
   'Feature_20',
   'Feature_22'],
  'Polynomial Regression': ['Feature_2',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_24'],
  'Ridge Regression': ['Feature_10',
   'Feature_17',
   'Feature_18',
   'Feature_20',
   'Feature_22'],
  'Lasso Regression': ['Feature_1',
   'Feature_3',
   'Feature_5',
   'Feature_7',
   'Feature_18'],
  'ElasticNet Regression': ['Feature_1',
   'Feature_2',
   'Feature_7',
   'Feature_18',
   'Feature_22'],
  'Decision Tree Regressor': ['Feature_12',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_45'],
  'Random Forest Regressor': ['Feature_12',
   'Feature_18',
   'Feature_19',
   'Feature_21',
   'Feature_45'],
  'Extra Trees Regressor': ['Feature_12',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_45'],
  'MLP Regressor': ['Feature_1',
   'Feature_7',
   'Feature_13',
   'Feature_16',
   'Feature_18'],
  'SVR (MultiOutput)': ['Feature_4',
   'Feature_12',
   'Feature_18',
   'Feature_19',
   'Feature_22'],
  'KNeighbors Regressor (MultiOutput)': ['Feature_12',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_35'],
  'Gradient Boosting Regressor (MultiOutput)': ['Feature_12',
   'Feature_18',
   'Feature_19',
   'Feature_21',
   'Feature_32'],
  'AdaBoost Regressor (MultiOutput)': ['Feature_12',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_20'],
  'Hist Gradient Boosting Regressor (MultiOutput)': ['Feature_12',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_45'],
  'XGBoost Regressor (MultiOutput)': ['Feature_12',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21']},
 '10 features': {'Avg MI': ['Feature_18',
   'Feature_2',
   'Feature_25',
   'Feature_19',
   'Feature_17',
   'Feature_4',
   'Feature_3',
   'Feature_15',
   'Feature_7',
   'Feature_12'],
  'Avg MIC': ['Feature_18',
   'Feature_2',
   'Feature_19',
   'Feature_25',
   'Feature_17',
   'Feature_15',
   'Feature_4',
   'Feature_12',
   'Feature_3',
   'Feature_13'],
  'Avg Correlation': ['Feature_7',
   'Feature_18',
   'Feature_2',
   'Feature_25',
   'Feature_19',
   'Feature_22',
   'Feature_15',
   'Feature_16',
   'Feature_12',
   'Feature_13'],
  'Avg ANOVA F-value': ['Feature_18',
   'Feature_7',
   'Feature_2',
   'Feature_19',
   'Feature_25',
   'Feature_22',
   'Feature_15',
   'Feature_16',
   'Feature_12',
   'Feature_17'],
  'Avg Distance Correlation': ['Feature_7',
   'Feature_18',
   'Feature_2',
   'Feature_19',
   'Feature_25',
   'Feature_15',
   'Feature_22',
   'Feature_16',
   'Feature_12',
   'Feature_13'],
  'Most voted': ['Feature_18',
   'Feature_19',
   'Feature_12',
   'Feature_20',
   'Feature_2',
   'Feature_17',
   'Feature_32',
   'Feature_22',
   'Feature_21',
   'Feature_25'],
  'Linear Regression': ['Feature_1',
   'Feature_5',
   'Feature_10',
   'Feature_13',
   'Feature_17',
   'Feature_18',
   'Feature_20',
   'Feature_22',
   'Feature_32',
   'Feature_37'],
  'Polynomial Regression': ['Feature_1',
   'Feature_2',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21',
   'Feature_22',
   'Feature_24',
   'Feature_32'],
  'Ridge Regression': ['Feature_1',
   'Feature_5',
   'Feature_10',
   'Feature_13',
   'Feature_17',
   'Feature_18',
   'Feature_20',
   'Feature_22',
   'Feature_32',
   'Feature_37'],
  'Lasso Regression': ['Feature_1',
   'Feature_3',
   'Feature_5',
   'Feature_6',
   'Feature_7',
   'Feature_8',
   'Feature_9',
   'Feature_10',
   'Feature_11',
   'Feature_18'],
  'ElasticNet Regression': ['Feature_1',
   'Feature_2',
   'Feature_7',
   'Feature_13',
   'Feature_18',
   'Feature_20',
   'Feature_21',
   'Feature_22',
   'Feature_24',
   'Feature_32'],
  'Decision Tree Regressor': ['Feature_4',
   'Feature_12',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21',
   'Feature_23',
   'Feature_45',
   'Feature_50'],
  'Random Forest Regressor': ['Feature_12',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21',
   'Feature_32',
   'Feature_39',
   'Feature_44',
   'Feature_45'],
  'Extra Trees Regressor': ['Feature_11',
   'Feature_12',
   'Feature_14',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21',
   'Feature_32',
   'Feature_44',
   'Feature_45'],
  'MLP Regressor': ['Feature_1',
   'Feature_2',
   'Feature_7',
   'Feature_12',
   'Feature_13',
   'Feature_16',
   'Feature_18',
   'Feature_20',
   'Feature_21',
   'Feature_25'],
  'SVR (MultiOutput)': ['Feature_4',
   'Feature_12',
   'Feature_15',
   'Feature_18',
   'Feature_19',
   'Feature_22',
   'Feature_27',
   'Feature_37',
   'Feature_38',
   'Feature_41'],
  'KNeighbors Regressor (MultiOutput)': ['Feature_12',
   'Feature_13',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_33',
   'Feature_35',
   'Feature_36',
   'Feature_40',
   'Feature_44'],
  'Gradient Boosting Regressor (MultiOutput)': ['Feature_2',
   'Feature_12',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21',
   'Feature_32',
   'Feature_39',
   'Feature_45',
   'Feature_46'],
  'AdaBoost Regressor (MultiOutput)': ['Feature_2',
   'Feature_12',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_29',
   'Feature_32',
   'Feature_39',
   'Feature_45'],
  'Hist Gradient Boosting Regressor (MultiOutput)': ['Feature_11',
   'Feature_12',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_21',
   'Feature_25',
   'Feature_32',
   'Feature_39',
   'Feature_45'],
  'XGBoost Regressor (MultiOutput)': ['Feature_11',
   'Feature_12',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21',
   'Feature_32',
   'Feature_39',
   'Feature_43',
   'Feature_45']},
 '15 features': {'Avg MI': ['Feature_18',
   'Feature_2',
   'Feature_25',
   'Feature_19',
   'Feature_17',
   'Feature_4',
   'Feature_3',
   'Feature_15',
   'Feature_7',
   'Feature_12',
   'Feature_22',
   'Feature_16',
   'Feature_21',
   'Feature_13',
   'Feature_6'],
  'Avg MIC': ['Feature_18',
   'Feature_2',
   'Feature_19',
   'Feature_25',
   'Feature_17',
   'Feature_15',
   'Feature_4',
   'Feature_12',
   'Feature_3',
   'Feature_13',
   'Feature_7',
   'Feature_16',
   'Feature_22',
   'Feature_21',
   'Feature_9'],
  'Avg Correlation': ['Feature_7',
   'Feature_18',
   'Feature_2',
   'Feature_25',
   'Feature_19',
   'Feature_22',
   'Feature_15',
   'Feature_16',
   'Feature_12',
   'Feature_13',
   'Feature_4',
   'Feature_17',
   'Feature_11',
   'Feature_38',
   'Feature_24'],
  'Avg ANOVA F-value': ['Feature_18',
   'Feature_7',
   'Feature_2',
   'Feature_19',
   'Feature_25',
   'Feature_22',
   'Feature_15',
   'Feature_16',
   'Feature_12',
   'Feature_17',
   'Feature_4',
   'Feature_13',
   'Feature_38',
   'Feature_11',
   'Feature_32'],
  'Avg Distance Correlation': ['Feature_7',
   'Feature_18',
   'Feature_2',
   'Feature_19',
   'Feature_25',
   'Feature_15',
   'Feature_22',
   'Feature_16',
   'Feature_12',
   'Feature_13',
   'Feature_4',
   'Feature_17',
   'Feature_11',
   'Feature_38',
   'Feature_6'],
  'Most voted': ['Feature_18',
   'Feature_19',
   'Feature_17',
   'Feature_12',
   'Feature_2',
   'Feature_22',
   'Feature_13',
   'Feature_20',
   'Feature_25',
   'Feature_11',
   'Feature_32',
   'Feature_7',
   'Feature_21',
   'Feature_4',
   'Feature_15'],
  'Linear Regression': ['Feature_1',
   'Feature_4',
   'Feature_5',
   'Feature_10',
   'Feature_13',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_22',
   'Feature_25',
   'Feature_29',
   'Feature_32',
   'Feature_37',
   'Feature_41'],
  'Polynomial Regression': ['Feature_1',
   'Feature_2',
   'Feature_3',
   'Feature_11',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21',
   'Feature_22',
   'Feature_24',
   'Feature_30',
   'Feature_32',
   'Feature_39',
   'Feature_41'],
  'Ridge Regression': ['Feature_1',
   'Feature_5',
   'Feature_10',
   'Feature_13',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_22',
   'Feature_25',
   'Feature_29',
   'Feature_32',
   'Feature_37',
   'Feature_41',
   'Feature_48'],
  'Lasso Regression': ['Feature_1',
   'Feature_3',
   'Feature_5',
   'Feature_6',
   'Feature_7',
   'Feature_8',
   'Feature_9',
   'Feature_10',
   'Feature_11',
   'Feature_12',
   'Feature_13',
   'Feature_14',
   'Feature_16',
   'Feature_17',
   'Feature_18'],
  'ElasticNet Regression': ['Feature_1',
   'Feature_2',
   'Feature_3',
   'Feature_6',
   'Feature_7',
   'Feature_13',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21',
   'Feature_22',
   'Feature_24',
   'Feature_26',
   'Feature_32'],
  'Decision Tree Regressor': ['Feature_2',
   'Feature_4',
   'Feature_12',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21',
   'Feature_22',
   'Feature_23',
   'Feature_25',
   'Feature_26',
   'Feature_32',
   'Feature_45',
   'Feature_50'],
  'Random Forest Regressor': ['Feature_5',
   'Feature_9',
   'Feature_11',
   'Feature_12',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21',
   'Feature_27',
   'Feature_32',
   'Feature_39',
   'Feature_43',
   'Feature_44',
   'Feature_45'],
  'Extra Trees Regressor': ['Feature_2',
   'Feature_11',
   'Feature_12',
   'Feature_14',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21',
   'Feature_22',
   'Feature_28',
   'Feature_32',
   'Feature_44',
   'Feature_45',
   'Feature_49'],
  'MLP Regressor': ['Feature_1',
   'Feature_2',
   'Feature_4',
   'Feature_7',
   'Feature_12',
   'Feature_13',
   'Feature_16',
   'Feature_17',
   'Feature_18',
   'Feature_20',
   'Feature_21',
   'Feature_22',
   'Feature_23',
   'Feature_24',
   'Feature_25'],
  'SVR (MultiOutput)': ['Feature_3',
   'Feature_4',
   'Feature_7',
   'Feature_12',
   'Feature_15',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_22',
   'Feature_27',
   'Feature_36',
   'Feature_37',
   'Feature_38',
   'Feature_39',
   'Feature_41'],
  'KNeighbors Regressor (MultiOutput)': ['Feature_11',
   'Feature_12',
   'Feature_13',
   'Feature_15',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_33',
   'Feature_35',
   'Feature_36',
   'Feature_37',
   'Feature_40',
   'Feature_42',
   'Feature_44',
   'Feature_49'],
  'Gradient Boosting Regressor (MultiOutput)': ['Feature_2',
   'Feature_11',
   'Feature_12',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21',
   'Feature_27',
   'Feature_32',
   'Feature_37',
   'Feature_39',
   'Feature_42',
   'Feature_45',
   'Feature_46',
   'Feature_49'],
  'AdaBoost Regressor (MultiOutput)': ['Feature_2',
   'Feature_6',
   'Feature_7',
   'Feature_12',
   'Feature_13',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21',
   'Feature_25',
   'Feature_27',
   'Feature_31',
   'Feature_32',
   'Feature_45'],
  'Hist Gradient Boosting Regressor (MultiOutput)': ['Feature_11',
   'Feature_12',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_21',
   'Feature_25',
   'Feature_27',
   'Feature_32',
   'Feature_36',
   'Feature_39',
   'Feature_40',
   'Feature_41',
   'Feature_45',
   'Feature_49'],
  'XGBoost Regressor (MultiOutput)': ['Feature_2',
   'Feature_11',
   'Feature_12',
   'Feature_17',
   'Feature_18',
   'Feature_19',
   'Feature_20',
   'Feature_21',
   'Feature_27',
   'Feature_32',
   'Feature_39',
   'Feature_43',
   'Feature_45',
   'Feature_46',
   'Feature_49']}}