# General Imports

**Importing all libraries**

In [20]:
from sklearn.datasets import make_regression
import numpy as np

from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import cross_val_predict
from sklearn.inspection import permutation_importance

import plotly.graph_objects as go
from bayes_opt import BayesianOptimization
import matplotlib.pyplot as plt

import warnings

Disable all warnings

In [21]:
# Disable all warnings
warnings.filterwarnings("ignore")

# Enable warnings again
## warnings.filterwarnings("default")

In [22]:
# Generate regression toy data
n_samples = 1000
n_features = 5
X, y = make_regression(n_samples=n_samples, n_features=n_features, random_state=42)

# Split data into training and testing sets
train_ratio = 0.8
train_size = int(train_ratio * n_samples)

X_train = X[:train_size]
y_train = y[:train_size]

# Test data
X_test = X[train_size:]
y_test = y[train_size:]

In [23]:
def bo_params_generic(model, params, X_train, y_train):
    # Create the model instance with the specified parameters
    regressor = model(**params)
    
    # Assuming you have X_train, y_train defined for regression
    scores = cross_val_score(regressor, X_train, y_train, cv=10, scoring='neg_root_mean_squared_error')
    return -scores.mean()

In [24]:

#results = dt_bo.maximize(n_iter=5, init_points=20)

In [25]:
all_models = {}

# MLR

**feature selection**

In [26]:
# Assuming you have X_train and y_train defined for training data

# Initialize the Linear Regression model
lr_model = LinearRegression()

# Fit the Linear Regression model to the training data
lr_model.fit(X_train, y_train)

# Get feature coefficients
coefficients = lr_model.coef_

# Create a list of feature names or indices paired with their coefficients
feature_coefficients = list(zip(range(X_train.shape[1]), coefficients))

# Sort the features based on absolute coefficient values in descending order
feature_coefficients.sort(key=lambda x: abs(x[1]), reverse=True)

# Print the ranked feature coefficients
print("Feature Coefficients:")
for feature_index, coefficient in feature_coefficients:
    print(f"Feature {feature_index}: {coefficient}")

Feature Coefficients:
Feature 2: 46.07121713482753
Feature 3: 28.6279862111941
Feature 4: 24.74629812331462
Feature 1: 18.993474366101992
Feature 0: 16.823657910849178


**Hyperparameter optimalisatie**

In [27]:
# Define the MLR model evaluation function using cross-validation
def evaluate_mlr_model(fit_intercept):
    # Convert fit_intercept to a boolean value
    fit_intercept = bool(fit_intercept)
    
    # Create and configure the MLR model
    model = LinearRegression(fit_intercept=fit_intercept)
    
    # Perform cross-validation on the training data
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
    
    # Return the negative mean squared error (Bayesian Optimization maximizes the objective)
    return np.mean(scores)

# Define the parameter ranges for Bayesian Optimization
params_ranges = {
    'fit_intercept': (0, 1)
}

# Perform Bayesian Optimization
mlr_bo = BayesianOptimization(f=evaluate_mlr_model, pbounds=params_ranges)
mlr_bo.maximize(n_iter=10, init_points=5)

# Get the best hyperparameters
best_params = mlr_bo.max['params']
best_fit_intercept = bool(best_params['fit_intercept'])

# Create the best MLR model with the tuned hyperparameters
best_model_mlr = LinearRegression(fit_intercept=best_fit_intercept)

# Fit the best model to the training data
best_model_mlr.fit(X_train, y_train)

# Calculate evaluation metrics
y_pred = best_model_mlr.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
aed = np.abs(y_test.mean() - y_pred)
r2 = r2_score(y_test, y_pred)
n = len(X_test)
k = X_test.shape[1]
r2_adj = 1 - (1 - r2) * ((n - 1) / (n - k - 1))

# Print the evaluation metrics
print("RMSE:", rmse)
print("MSE:", mse)
print("MAE:", mae)
print("AED:", aed)
print("R2:", r2)
print("Adjusted R2:", r2_adj)

|   iter    |  target   | fit_in... |
-------------------------------------
| [0m1        [0m | [0m-2.967e-2[0m | [0m0.477    [0m |
| [0m2        [0m | [0m-2.967e-2[0m | [0m0.8143   [0m |
| [0m3        [0m | [0m-2.967e-2[0m | [0m0.8421   [0m |
| [0m4        [0m | [0m-2.967e-2[0m | [0m0.6926   [0m |
| [0m5        [0m | [0m-2.967e-2[0m | [0m0.04117  [0m |
| [0m6        [0m | [0m-2.967e-2[0m | [0m0.0001245[0m |
| [0m7        [0m | [0m-2.967e-2[0m | [0m0.9998   [0m |
| [0m8        [0m | [0m-2.967e-2[0m | [0m0.2635   [0m |
| [0m9        [0m | [0m-2.967e-2[0m | [0m5.436e-05[0m |
| [0m10       [0m | [0m-2.967e-2[0m | [0m0.9999   [0m |
| [0m11       [0m | [0m-2.967e-2[0m | [0m5.989e-05[0m |
| [0m12       [0m | [0m-2.967e-2[0m | [0m0.7488   [0m |
| [0m13       [0m | [0m-2.967e-2[0m | [0m0.6068   [0m |
| [0m14       [0m | [0m-2.967e-2[0m | [0m0.7373   [0m |
| [0m15       [0m | [0m-2.967e-2[0m | [0m0.04081  

# Support Vector Machines

**Feature selection**

In [28]:
# Assuming you have X_train and y_train defined for training data

# Initialize the SVM model
svm_model = SVR(kernel='rbf')  # Replace 'rbf' with your desired kernel

# Fit the SVM model to the training data
svm_model.fit(X_train, y_train)

# Compute permutation importances
result = permutation_importance(svm_model, X_train, y_train, n_repeats=10, random_state=42)

# Get feature importances
importances = result.importances_mean

# Create a list of feature names or indices paired with their importances
feature_importances = list(zip(range(X_train.shape[1]), importances))

# Sort the features based on importance in descending order
feature_importances.sort(key=lambda x: x[1], reverse=True)

# Print the ranked feature importances
print("Feature Importances:")
for feature_index, importance in feature_importances:
    print(f"Feature {feature_index}: {importance}")

Feature Importances:
Feature 2: 0.5100907245192264
Feature 3: 0.1812236125303029
Feature 4: 0.13843292474891247
Feature 1: 0.10140750241353605
Feature 0: 0.07508208211413692


**Hyperparameter optimalisatie**

In [29]:
# Define the SVM model evaluation function using cross-validation
def evaluate_svm_model(C, epsilon, gamma):
    # Create and configure the SVM model
    model = SVR(C=C, epsilon=epsilon, gamma=gamma)
    
    # Perform cross-validation on the training data
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
    
    # Return the negative mean squared error (Bayesian Optimization maximizes the objective)
    return np.mean(scores)

# Define the parameter ranges for Bayesian Optimization
params_ranges = {
    'C': (0.1, 10),
    'epsilon': (0.01, 1),
    'gamma': (0.001, 0.1)
}

# Perform Bayesian Optimization
svm_bo = BayesianOptimization(f=evaluate_svm_model, pbounds=params_ranges)
svm_bo.maximize(n_iter=10, init_points=5)

# Get the best hyperparameters
best_params = svm_bo.max['params']
best_C = best_params['C']
best_epsilon = best_params['epsilon']
best_gamma = best_params['gamma']

# Create the best SVM model with the tuned hyperparameters
best_model_svm = SVR(C=best_C, epsilon=best_epsilon, gamma=best_gamma)

# Fit the best model to the training data
best_model_svm.fit(X_train, y_train)

# Calculate evaluation metrics
y_pred = best_model_svm.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
aed = np.abs(y_test.mean() - y_pred)
r2 = r2_score(y_test, y_pred)
n = len(X_test)
k = X_test.shape[1]
r2_adj = 1 - (1 - r2) * ((n - 1) / (n - k - 1))

# Print the evaluation metrics
print("RMSE:", rmse)
print("MSE:", mse)
print("MAE:", mae)
print("AED:", aed)
print("R2:", r2)
print("Adjusted R2:", r2_adj)

|   iter    |  target   |     C     |  epsilon  |   gamma   |
-------------------------------------------------------------
| [0m1        [0m | [0m-59.1    [0m | [0m7.93     [0m | [0m0.5033   [0m | [0m0.0278   [0m |
| [0m2        [0m | [0m-165.8   [0m | [0m4.952    [0m | [0m0.2117   [0m | [0m0.05368  [0m |
| [0m3        [0m | [0m-317.7   [0m | [0m3.953    [0m | [0m0.9692   [0m | [0m0.08861  [0m |
| [0m4        [0m | [0m-98.0    [0m | [0m4.555    [0m | [0m0.1411   [0m | [0m0.03447  [0m |
| [0m5        [0m | [0m-3.253e+0[0m | [0m1.86     [0m | [0m0.5456   [0m | [0m0.004355 [0m |
| [0m6        [0m | [0m-209.1   [0m | [0m10.0     [0m | [0m1.0      [0m | [0m0.1      [0m |
| [0m7        [0m | [0m-3.391e+0[0m | [0m6.564    [0m | [0m1.0      [0m | [0m0.001    [0m |
| [0m8        [0m | [0m-119.5   [0m | [0m8.98     [0m | [0m0.1639   [0m | [0m0.05013  [0m |
| [0m9        [0m | [0m-3.143e+0[0m | [0m8.749    [0m 

# Random Forest

**Feature selection**

In [30]:
# Assuming you have X_train and y_train defined for training data

# Initialize the Random Forest model
rf_model = RandomForestRegressor()

# Fit the Random Forest model to the training data
rf_model.fit(X_train, y_train)

# Get feature importances
importances = rf_model.feature_importances_

# Create a list of feature names or indices paired with their importances
feature_importances = list(zip(range(X_train.shape[1]), importances))

# Sort the features based on importance in descending order
feature_importances.sort(key=lambda x: x[1], reverse=True)

# Print the ranked feature importances
print("Feature Importances:")
for feature_index, importance in feature_importances:
    print(f"Feature {feature_index}: {importance}")


Feature Importances:
Feature 2: 0.5529367952796153
Feature 3: 0.1720251981639729
Feature 4: 0.13322349028164135
Feature 1: 0.0838755989380649
Feature 0: 0.05793891733670567


**Hyperparameter optimalisatie**

In [31]:
params_ranges = {
    'n_estimators': (10, 100),
    'max_depth': (1, 20),
    'min_samples_leaf': (1, 10),
    'min_weight_fraction_leaf': (0.0, 0.5),
    'max_features': (0.1, 1),
    'max_leaf_nodes': (10, 100)
}

# Example usage with Random Forest
model = RandomForestRegressor
dt_bo = BayesianOptimization(f=lambda n_estimators, max_depth, min_samples_leaf, min_weight_fraction_leaf,
                                    max_features, max_leaf_nodes: bo_params_generic(model, {
                                        'n_estimators': int(round(n_estimators)),
                                        'max_depth': int(round(max_depth)),
                                        'min_samples_leaf': round(min_samples_leaf),
                                        'min_weight_fraction_leaf': min_weight_fraction_leaf,
                                        'max_features': max_features,
                                        'max_leaf_nodes': int(round(max_leaf_nodes))
                                    }, X_train, y_train),
                             pbounds=params_ranges)
results = dt_bo.maximize(n_iter=5, init_points=20)
params = dt_bo.max['params']

# Creating a model with the best hyperparameters
best_model_random_forest = model(
    n_estimators=int(round(params['n_estimators'])),
    max_depth=int(round(params['max_depth'])),
    min_samples_leaf=round(params['min_samples_leaf']),
    min_weight_fraction_leaf=params['min_weight_fraction_leaf'],
    max_features=params['max_features'],
    max_leaf_nodes=int(round(params['max_leaf_nodes']))
)

# Fit the model
best_model_random_forest.fit(X_train, y_train)

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

y_pred = best_model_random_forest.predict(X_test)

mse_scores = mean_squared_error(y_test, y_pred)
mae_scores = mean_absolute_error(y_test, y_pred)
aed_scores = np.abs(y_test.mean() - y_pred)
r2_scores = r2_score(y_test, y_pred)

n = len(X_test)
k = X_test.shape[1]
r2_adj_scores = 1 - (1 - r2_scores) * ((n - 1) / (n - k - 1))
rmse_scores = np.sqrt(mse_scores)

print("MSE:", mse_scores)
print("MAE:", mae_scores)
print("AED:", aed_scores)
print("R2:", r2_scores)
print("Adjusted R2:", r2_adj_scores)
print("RMSE:", rmse_scores)


|   iter    |  target   | max_depth | max_fe... | max_le... | min_sa... | min_we... | n_esti... |
-------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m51.26    [0m | [0m10.38    [0m | [0m0.6169   [0m | [0m26.0     [0m | [0m1.41     [0m | [0m0.3626   [0m | [0m92.35    [0m |
| [95m2        [0m | [95m53.26    [0m | [95m18.45    [0m | [95m0.5303   [0m | [95m84.86    [0m | [95m8.705    [0m | [95m0.4477   [0m | [95m16.65    [0m |
| [0m3        [0m | [0m40.12    [0m | [0m16.31    [0m | [0m0.8918   [0m | [0m60.06    [0m | [0m8.622    [0m | [0m0.1275   [0m | [0m17.4     [0m |
| [0m4        [0m | [0m47.05    [0m | [0m12.36    [0m | [0m0.827    [0m | [0m94.01    [0m | [0m4.661    [0m | [0m0.2731   [0m | [0m63.03    [0m |
| [0m5        [0m | [0m47.85    [0m | [0m14.73    [0m | [0m0.378    [0m | [0m30.01    [0m | [0m8.245    [0m | [0m0.1692   [0m | [0m12

# neural network

**Feature selection**

In [32]:
# Assuming you have X and y defined for the dataset

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the neural network regressor
nn_model = MLPRegressor(hidden_layer_sizes=(10, 10))  # Adjust the architecture as needed

# Fit the neural network model to the training data
nn_model.fit(X_train_scaled, y_train)

# Perform feature selection using Recursive Feature Elimination (RFE)
selector = RFE(estimator=nn_model, n_features_to_select=10)  # Adjust n_features_to_select as needed
selector.fit(X_train_scaled, y_train)

# Transform the training and testing sets to keep only the selected features
X_train_selected = selector.transform(X_train_scaled)
X_test_selected = selector.transform(X_test_scaled)

# Print the selected feature support
selected_support = selector.support_
print("Selected Feature Support:")
print(selected_support)

Selected Feature Support:
[ True  True  True  True  True]


**Hyperparameter optimalisatie**

In [33]:
params_ranges = {
    'hidden_layer_sizes': (10, 100),
    'alpha': (0.0001, 0.1),
    'learning_rate_init': (0.001, 0.1),
    'max_iter': (100, 1000),
}

# Example usage with Neural Network
model = MLPRegressor
dt_bo = BayesianOptimization(f=lambda hidden_layer_sizes, alpha, learning_rate_init, max_iter:
                                    bo_params_generic(model, {
                                        'hidden_layer_sizes': (int(round(hidden_layer_sizes)),),
                                        'alpha': alpha,
                                        'learning_rate_init': learning_rate_init,
                                        'max_iter': int(round(max_iter))
                                    }, X_train, y_train),
                             pbounds=params_ranges)

results = dt_bo.maximize(n_iter=5, init_points=20)
params = dt_bo.max['params']

# Creating a model with the best hyperparameters
best_model_neural_network = model(
    hidden_layer_sizes=(int(round(params['hidden_layer_sizes'])),),
    alpha=params['alpha'],
    learning_rate_init=params['learning_rate_init'],
    max_iter=int(round(params['max_iter']))
)


# Fit the model
best_model_neural_network.fit(X_train, y_train)


# Calculate the evaluation metrics
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
aed = np.abs(y_test.mean() - y_pred)
r2 = r2_score(y_test, y_pred)
n = len(X_test)
k = X_test.shape[1]
r2_adj = 1 - (1 - r2) * ((n - 1) / (n - k - 1))

# Print the evaluation metrics
print("RMSE:", rmse)
print("MSE:", mse)
print("MAE:", mae)
print("AED:", aed)
print("R2:", r2)
print("Adjusted R2:", r2_adj)


|   iter    |  target   |   alpha   | hidden... | learni... | max_iter  |
-------------------------------------------------------------------------
| [0m1        [0m | [0m0.2209   [0m | [0m0.0555   [0m | [0m33.54    [0m | [0m0.05828  [0m | [0m805.2    [0m |
| [95m2        [0m | [95m0.2213   [0m | [95m0.01985  [0m | [95m54.01    [0m | [95m0.06171  [0m | [95m313.9    [0m |
| [95m3        [0m | [95m6.177    [0m | [95m0.02684  [0m | [95m22.36    [0m | [95m0.007285 [0m | [95m124.8    [0m |
| [0m4        [0m | [0m0.1912   [0m | [0m0.04255  [0m | [0m81.1     [0m | [0m0.05324  [0m | [0m316.6    [0m |
| [0m5        [0m | [0m0.5038   [0m | [0m0.04832  [0m | [0m31.35    [0m | [0m0.03214  [0m | [0m185.9    [0m |
| [0m6        [0m | [0m0.2045   [0m | [0m0.07295  [0m | [0m38.66    [0m | [0m0.08226  [0m | [0m300.7    [0m |
| [0m7        [0m | [0m0.2572   [0m | [0m0.08061  [0m | [0m30.82    [0m | [0m0.08347  [0m | [0m19

In [34]:
# Append model to list
all_models['Random forest'] = best_model_random_forest
all_models['SVM'] = best_model_svm
all_models['Neural Network'] = best_model_neural_network
all_models['MLR']= best_model_mlr

# plotting the bar chart

**using train dataset**

In [35]:
# Evaluation metric labels
metric_labels = ['RMSE', 'MSE', 'MAE', 'AED', 'R2', 'Adjusted R2']

# Calculate evaluation metrics using cross-validation for each model
metrics = {
    'RMSE': lambda y_true, y_pred: np.sqrt(mean_squared_error(y_true, y_pred)),
    'MSE': mean_squared_error,
    'MAE': mean_absolute_error,
    'AED': lambda y_true, y_pred: np.abs(np.mean(y_true) - y_pred),
    'R2': r2_score,
    'Adjusted R2': lambda y_true, y_pred: 1 - ((1 - r2_score(y_true, y_pred)) * (len(y_true) - 1) / (len(y_true) - X_train.shape[1] - 1))
}

model_names = ['Random Forest', 'SVM', 'Neural Network', 'MLR']

models = {
    'Random Forest': best_model_random_forest,
    'SVM': best_model_svm,
    'Neural Network': best_model_neural_network,
    'MLR': best_model_mlr
}

metric_scores = {metric: [] for metric in metric_labels}

for model_name in model_names:
    model = models[model_name]
    y_pred = cross_val_predict(model, X_train, y_train, cv=5)
    for metric in metric_labels:
        metric_scores[metric].append(metrics[metric](y_train, y_pred))

# Plotting the bar chart
fig = go.Figure()

for metric in metric_labels:
    fig.add_trace(go.Bar(
        x=model_names,
        y=metric_scores[metric],
        name=metric
    ))

# Updating the layout
fig.update_layout(
    title='Evaluation Metrics Comparison',
    xaxis_title='Models',
    yaxis_title='Scores',
    barmode='group'
)

# Display the plot
fig.show()


**using test dataset**

In [36]:
# Evaluation metric labels
metric_labels = ['RMSE', 'MSE', 'MAE', 'AED', 'R2', 'Adjusted R2']

# Calculate evaluation metrics using cross-validation for each model
metrics = {
    'RMSE': lambda y_true, y_pred: np.sqrt(mean_squared_error(y_true, y_pred)),
    'MSE': mean_squared_error,
    'MAE': mean_absolute_error,
    'AED': lambda y_true, y_pred: np.abs(np.mean(y_true) - y_pred),
    'R2': r2_score,
    'Adjusted R2': lambda y_true, y_pred: 1 - ((1 - r2_score(y_true, y_pred)) * (len(y_true) - 1) / (len(y_true) - X_train.shape[1] - 1))
}

model_names = ['Random Forest', 'SVM', 'Neural Network', 'MLR']

models = {
    'Random Forest': best_model_random_forest,
    'SVM': best_model_svm,
    'Neural Network': best_model_neural_network,
    'MLR': best_model_mlr
}

# Train each model on the training data and predict the test data
predictions = {}
for model_name in model_names:
    model = models[model_name]
    model.fit(X_train, y_train)  # Train the model
    predictions[model_name] = model.predict(X_test)  # Predict the test data

# Calculate each metric for each model's predictions
metric_scores = {metric: [] for metric in metric_labels}
for model_name in model_names:
    y_pred = predictions[model_name]
    for metric in metric_labels:
        metric_scores[metric].append(metrics[metric](y_test, y_pred))  # Use the test data here

# Plotting the bar chart
fig = go.Figure()

for metric in metric_labels:
    fig.add_trace(go.Bar(
        x=model_names,
        y=metric_scores[metric],
        name=metric
    ))

# Updating the layout
fig.update_layout(
    title='Evaluation Metrics Comparison',
    xaxis_title='Models',
    yaxis_title='Scores',
    barmode='group'
)

# Display the plot
fig.show()