In [5]:
from sklearn.datasets import make_regression
import numpy as np
import warnings

# Disable all warnings
warnings.filterwarnings("ignore")

# Your code here without warnings

# Enable warnings again (optional)
warnings.filterwarnings("default")

# Generate regression toy data
n_samples = 1000
n_features = 5
X, y = make_regression(n_samples=n_samples, n_features=n_features, random_state=42)

# Split data into training and testing sets
train_ratio = 0.8
train_size = int(train_ratio * n_samples)

X_train = X[:train_size]
y_train = y[:train_size]

# Test data
X_test = X[train_size:]
y_test = y[train_size:]

In [6]:
from sklearn.model_selection import cross_val_score


def bo_params_generic(model, params, X_train, y_train):
    # Create the model instance with the specified parameters
    regressor = model(**params)
    
    # Assuming you have X_train, y_train defined for regression
    scores = cross_val_score(regressor, X_train, y_train, cv=10, scoring='neg_root_mean_squared_error')
    return -scores.mean()

In [7]:

#results = dt_bo.maximize(n_iter=5, init_points=20)

In [8]:
all_models = {}

# MLR

**feature selection**

In [9]:
from sklearn.linear_model import LinearRegression

# Assuming you have X_train and y_train defined for training data

# Initialize the Linear Regression model
lr_model = LinearRegression()

# Fit the Linear Regression model to the training data
lr_model.fit(X_train, y_train)

# Get feature coefficients
coefficients = lr_model.coef_

# Create a list of feature names or indices paired with their coefficients
feature_coefficients = list(zip(range(X_train.shape[1]), coefficients))

# Sort the features based on absolute coefficient values in descending order
feature_coefficients.sort(key=lambda x: abs(x[1]), reverse=True)

# Print the ranked feature coefficients
print("Feature Coefficients:")
for feature_index, coefficient in feature_coefficients:
    print(f"Feature {feature_index}: {coefficient}")

Feature Coefficients:
Feature 2: 46.07121713482753
Feature 3: 28.6279862111941
Feature 4: 24.74629812331462
Feature 1: 18.993474366101992
Feature 0: 16.823657910849178


**Hyperparameter optimalisatie**
#heb deze even uitgecomment omdat hij breekt als er geen min en max values zijn gedefinieerd

In [10]:
import subprocess
import sys
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import cross_val_score
from bayes_opt import BayesianOptimization
from sklearn.linear_model import LinearRegression

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

install("bayesian-optimization")

# Define the MLR model evaluation function using cross-validation
def evaluate_mlr_model(fit_intercept):
    # Convert fit_intercept to a boolean value
    fit_intercept = bool(fit_intercept)
    
    # Create and configure the MLR model
    model = LinearRegression(fit_intercept=fit_intercept)
    
    # Perform cross-validation on the training data
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
    
    # Return the negative mean squared error (Bayesian Optimization maximizes the objective)
    return np.mean(scores)

# Define the parameter ranges for Bayesian Optimization
params_ranges = {
    'fit_intercept': (0, 1)
}

# Perform Bayesian Optimization
mlr_bo = BayesianOptimization(f=evaluate_mlr_model, pbounds=params_ranges)
mlr_bo.maximize(n_iter=10, init_points=5)

# Get the best hyperparameters
best_params = mlr_bo.max['params']
best_fit_intercept = bool(best_params['fit_intercept'])

# Create the best MLR model with the tuned hyperparameters
best_model_mlr = LinearRegression(fit_intercept=best_fit_intercept)

# Fit the best model to the training data
best_model_mlr.fit(X_train, y_train)

# Calculate evaluation metrics
y_pred = best_model_mlr.predict(X_train)
rmse = np.sqrt(mean_squared_error(y_train, y_pred))
mse = mean_squared_error(y_train, y_pred)
mae = mean_absolute_error(y_train, y_pred)
aed = np.abs(y_train.mean() - y_pred)
r2 = r2_score(y_train, y_pred)
n = len(X_train)
k = X_train.shape[1]
r2_adj = 1 - (1 - r2) * ((n - 1) / (n - k - 1))

# Print the evaluation metrics
print("RMSE:", rmse)
print("MSE:", mse)
print("MAE:", mae)
print("AED:", aed)
print("R2:", r2)
print("Adjusted R2:", r2_adj)

|   iter    |  target   | fit_in... |
-------------------------------------
| [0m1        [0m | [0m-2.967e-2[0m | [0m0.9833   [0m |
| [0m2        [0m | [0m-2.967e-2[0m | [0m0.5384   [0m |
| [0m3        [0m | [0m-2.967e-2[0m | [0m0.3624   [0m |
| [0m4        [0m | [0m-2.967e-2[0m | [0m0.4769   [0m |
| [0m5        [0m | [0m-2.967e-2[0m | [0m0.303    [0m |
| [0m6        [0m | [0m-2.967e-2[0m | [0m2.899e-05[0m |
| [0m7        [0m | [0m-2.967e-2[0m | [0m1.0      [0m |
| [0m8        [0m | [0m-2.967e-2[0m | [0m0.3602   [0m |
| [0m9        [0m | [0m-2.967e-2[0m | [0m0.0006675[0m |
| [0m10       [0m | [0m-2.967e-2[0m | [0m0.9998   [0m |
| [0m11       [0m | [0m-2.967e-2[0m | [0m0.0003239[0m |
| [0m12       [0m | [0m-2.967e-2[0m | [0m0.9771   [0m |
| [0m13       [0m | [0m-2.967e-2[0m | [0m0.08925  [0m |
| [0m14       [0m | [0m-2.967e-2[0m | [0m0.5353   [0m |
| [0m15       [0m | [0m-2.967e-2[0m | [0m0.3202   

# Support Vector Machines

**Feature selection**

In [11]:
from sklearn.inspection import permutation_importance
from sklearn.svm import SVR

# Assuming you have X_train and y_train defined for training data

# Initialize the SVM model
svm_model = SVR(kernel='rbf')  # Replace 'rbf' with your desired kernel

# Fit the SVM model to the training data
svm_model.fit(X_train, y_train)

# Compute permutation importances
result = permutation_importance(svm_model, X_train, y_train, n_repeats=10, random_state=42)

# Get feature importances
importances = result.importances_mean

# Create a list of feature names or indices paired with their importances
feature_importances = list(zip(range(X_train.shape[1]), importances))

# Sort the features based on importance in descending order
feature_importances.sort(key=lambda x: x[1], reverse=True)

# Print the ranked feature importances
print("Feature Importances:")
for feature_index, importance in feature_importances:
    print(f"Feature {feature_index}: {importance}")

Feature Importances:
Feature 2: 0.5100907245192264
Feature 3: 0.1812236125303029
Feature 4: 0.13843292474891247
Feature 1: 0.10140750241353605
Feature 0: 0.07508208211413692


**Hyperparameter optimalisatie**

In [12]:
import subprocess
import sys
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVR
from bayes_opt import BayesianOptimization

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

install("bayesian-optimization")
 
# Define the SVM model evaluation function using cross-validation
def evaluate_svm_model(C, epsilon, gamma):
    # Create and configure the SVM model
    model = SVR(C=C, epsilon=epsilon, gamma=gamma)
    
    # Perform cross-validation on the training data
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
    
    # Return the negative mean squared error (Bayesian Optimization maximizes the objective)
    return np.mean(scores)

# Define the parameter ranges for Bayesian Optimization
params_ranges = {
    'C': (0.1, 10),
    'epsilon': (0.01, 1),
    'gamma': (0.001, 0.1)
}

# Perform Bayesian Optimization
svm_bo = BayesianOptimization(f=evaluate_svm_model, pbounds=params_ranges)
svm_bo.maximize(n_iter=10, init_points=5)

# Get the best hyperparameters
best_params = svm_bo.max['params']
best_C = best_params['C']
best_epsilon = best_params['epsilon']
best_gamma = best_params['gamma']

# Create the best SVM model with the tuned hyperparameters
best_model_svm = SVR(C=best_C, epsilon=best_epsilon, gamma=best_gamma)

# Fit the best model to the training data
best_model_svm.fit(X_train, y_train)

# Calculate evaluation metrics
y_pred = best_model_svm.predict(X_train)
rmse = np.sqrt(mean_squared_error(y_train, y_pred))
mse = mean_squared_error(y_train, y_pred)
mae = mean_absolute_error(y_train, y_pred)
aed = np.abs(y_train.mean() - y_pred)
r2 = r2_score(y_train, y_pred)
n = len(X_train)
k = X_train.shape[1]
r2_adj = 1 - (1 - r2) * ((n - 1) / (n - k - 1))

# Print the evaluation metrics
print("RMSE:", rmse)
print("MSE:", mse)
print("MAE:", mae)
print("AED:", aed)
print("R2:", r2)
print("Adjusted R2:", r2_adj)

|   iter    |  target   |     C     |  epsilon  |   gamma   |
-------------------------------------------------------------
| [0m1        [0m | [0m-86.18   [0m | [0m8.172    [0m | [0m0.8988   [0m | [0m0.0371   [0m |
| [95m2        [0m | [95m-54.55   [0m | [95m5.671    [0m | [95m0.1908   [0m | [95m0.01185  [0m |
| [0m3        [0m | [0m-191.4   [0m | [0m2.43     [0m | [0m0.231    [0m | [0m0.04242  [0m |
| [0m4        [0m | [0m-3.377e+0[0m | [0m0.2177   [0m | [0m0.768    [0m | [0m0.06261  [0m |
| [0m5        [0m | [0m-864.5   [0m | [0m1.892    [0m | [0m0.8337   [0m | [0m0.02563  [0m |
| [0m6        [0m | [0m-75.6    [0m | [0m4.026    [0m | [0m0.9872   [0m | [0m0.02489  [0m |
| [0m7        [0m | [0m-208.1   [0m | [0m10.0     [0m | [0m0.01     [0m | [0m0.1      [0m |
| [0m8        [0m | [0m-263.9   [0m | [0m7.268    [0m | [0m0.01     [0m | [0m0.1      [0m |
| [0m9        [0m | [0m-356.5   [0m | [0m4.022    

# Random Forest

**Feature selection**

In [13]:
from sklearn.ensemble import RandomForestRegressor

# Assuming you have X_train and y_train defined for training data

# Initialize the Random Forest model
rf_model = RandomForestRegressor()

# Fit the Random Forest model to the training data
rf_model.fit(X_train, y_train)

# Get feature importances
importances = rf_model.feature_importances_

# Create a list of feature names or indices paired with their importances
feature_importances = list(zip(range(X_train.shape[1]), importances))

# Sort the features based on importance in descending order
feature_importances.sort(key=lambda x: x[1], reverse=True)

# Print the ranked feature importances
print("Feature Importances:")
for feature_index, importance in feature_importances:
    print(f"Feature {feature_index}: {importance}")


Feature Importances:
Feature 2: 0.5481717110748182
Feature 3: 0.17688212054349414
Feature 4: 0.13537528435417223
Feature 1: 0.08283216041124039
Feature 0: 0.056738723616274925


**Hyperparameter optimalisatie**

In [14]:
import subprocess
import sys
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from bayes_opt import BayesianOptimization

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

install("bayesian-optimization")

params_ranges = {
    'n_estimators': (10, 100),
    'max_depth': (1, 20),
    'min_samples_leaf': (1, 10),
    'min_weight_fraction_leaf': (0.0, 0.5),
    'max_features': (0.1, 1),
    'max_leaf_nodes': (10, 100)
}

# Example usage with Random Forest
model = RandomForestRegressor
dt_bo = BayesianOptimization(f=lambda n_estimators, max_depth, min_samples_leaf, min_weight_fraction_leaf,
                                    max_features, max_leaf_nodes: bo_params_generic(model, {
                                        'n_estimators': int(round(n_estimators)),
                                        'max_depth': int(round(max_depth)),
                                        'min_samples_leaf': round(min_samples_leaf),
                                        'min_weight_fraction_leaf': min_weight_fraction_leaf,
                                        'max_features': max_features,
                                        'max_leaf_nodes': int(round(max_leaf_nodes))
                                    }, X_train, y_train),
                             pbounds=params_ranges)
results = dt_bo.maximize(n_iter=5, init_points=20)
params = dt_bo.max['params']

# Creating a model with the best hyperparameters
best_model_random_forest = model(
    n_estimators=int(round(params['n_estimators'])),
    max_depth=int(round(params['max_depth'])),
    min_samples_leaf=round(params['min_samples_leaf']),
    min_weight_fraction_leaf=params['min_weight_fraction_leaf'],
    max_features=params['max_features'],
    max_leaf_nodes=int(round(params['max_leaf_nodes']))
)

# Fit the model
best_model_random_forest.fit(X_train, y_train)

# Calculate evaluation metrics
mse_scores = -cross_val_score(best_model_random_forest, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
mae_scores = -cross_val_score(best_model_random_forest, X_train, y_train, cv=5, scoring='neg_mean_absolute_error')
aed_scores = np.abs(y_train.mean() - best_model_random_forest.predict(X_train))
r2_scores = cross_val_score(best_model_random_forest, X_train, y_train, cv=5, scoring='r2')
n = len(X_train)
k = X_train.shape[1]
r2_adj_scores = 1 - (1 - r2_scores) * ((n - 1) / (n - k - 1))
rmse_scores = np.sqrt(-mse_scores)

# Print the evaluation metrics
print("Average MSE:", np.mean(mse_scores))
print("Average MAE:", np.mean(mae_scores))
print("Average AED:", np.mean(aed_scores))
print("Average R2:", np.mean(r2_scores))
print("Average Adjusted R2:", np.mean(r2_adj_scores))
print("Average RMSE:", np.mean(rmse_scores))

|   iter    |  target   | max_depth | max_fe... | max_le... | min_sa... | min_we... | n_esti... |
-------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m52.89    [0m | [0m15.61    [0m | [0m0.5747   [0m | [0m70.34    [0m | [0m3.68     [0m | [0m0.4699   [0m | [0m82.83    [0m |
| [95m2        [0m | [95m55.94    [0m | [95m16.28    [0m | [95m0.2042   [0m | [95m63.94    [0m | [95m3.206    [0m | [95m0.32     [0m | [95m44.38    [0m |
| [0m3        [0m | [0m31.81    [0m | [0m4.128    [0m | [0m0.9186   [0m | [0m80.97    [0m | [0m6.259    [0m | [0m9.358e-05[0m | [0m32.07    [0m |
| [0m4        [0m | [0m53.07    [0m | [0m12.65    [0m | [0m0.5922   [0m | [0m89.88    [0m | [0m4.731    [0m | [0m0.3492   [0m | [0m78.99    [0m |
| [0m5        [0m | [0m52.85    [0m | [0m6.812    [0m | [0m0.3202   [0m | [0m40.15    [0m | [0m9.837    [0m | [0m0.2598   [0m | [0m87

  rmse_scores = np.sqrt(-mse_scores)


# neural network

**Feature selection**

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.feature_selection import RFE

# Assuming you have X and y defined for the dataset

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the neural network regressor
nn_model = MLPRegressor(hidden_layer_sizes=(10, 10))  # Adjust the architecture as needed

# Fit the neural network model to the training data
nn_model.fit(X_train_scaled, y_train)

# Perform feature selection using Recursive Feature Elimination (RFE)
selector = RFE(estimator=nn_model, n_features_to_select=10)  # Adjust n_features_to_select as needed
selector.fit(X_train_scaled, y_train)

# Transform the training and testing sets to keep only the selected features
X_train_selected = selector.transform(X_train_scaled)
X_test_selected = selector.transform(X_test_scaled)

# Print the selected feature support
selected_support = selector.support_
print("Selected Feature Support:")
print(selected_support)



Selected Feature Support:
[ True  True  True  True  True]




**Hyperparameter optimalisatie**

In [16]:
import subprocess
import sys
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPRegressor
from bayes_opt import BayesianOptimization

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

install("bayesian-optimization")

params_ranges = {
    'hidden_layer_sizes': (10, 100),
    'alpha': (0.0001, 0.1),
    'learning_rate_init': (0.001, 0.1),
    'max_iter': (100, 1000),
}

# Example usage with Neural Network
model = MLPRegressor
dt_bo = BayesianOptimization(f=lambda hidden_layer_sizes, alpha, learning_rate_init, max_iter:
                                    bo_params_generic(model, {
                                        'hidden_layer_sizes': (int(round(hidden_layer_sizes)),),
                                        'alpha': alpha,
                                        'learning_rate_init': learning_rate_init,
                                        'max_iter': int(round(max_iter))
                                    }, X_train, y_train),
                             pbounds=params_ranges)

results = dt_bo.maximize(n_iter=5, init_points=20)
params = dt_bo.max['params']

# Creating a model with the best hyperparameters
best_model_neural_network = model(
    hidden_layer_sizes=(int(round(params['hidden_layer_sizes'])),),
    alpha=params['alpha'],
    learning_rate_init=params['learning_rate_init'],
    max_iter=int(round(params['max_iter']))
)

# Fit the model
best_model_neural_network.fit(X_train, y_train)

# Calculate RMSE using cross-validation
rmse_scores = np.sqrt(-cross_val_score(best_model_neural_network, X_train, y_train, cv=5, scoring='neg_mean_squared_error'))
mse_scores = -cross_val_score(best_model_neural_network, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
mae_scores = -cross_val_score(best_model_neural_network, X_train, y_train, cv=5, scoring='neg_mean_absolute_error')
aed_scores = np.abs(y_train.mean() - best_model_neural_network.predict(X_train))
r2_scores = cross_val_score(best_model_neural_network, X_train, y_train, cv=5, scoring='r2')
n = len(X_train)
k = X_train.shape[1]
r2_adj_scores = 1 - (1 - r2_scores) * ((n - 1) / (n - k - 1))

# Print the evaluation metrics
print("Average RMSE:", np.mean(rmse_scores))
print("Average MSE:", np.mean(mse_scores))
print("Average MAE:", np.mean(mae_scores))
print("Average AED:", np.mean(aed_scores))
print("Average R2:", np.mean(r2_scores))
print("Average Adjusted R2:", np.mean(r2_adj_scores))

|   iter    |  target   |   alpha   | hidden... | learni... | max_iter  |
-------------------------------------------------------------------------
| [0m1        [0m | [0m0.1852   [0m | [0m0.03139  [0m | [0m84.7     [0m | [0m0.07962  [0m | [0m349.4    [0m |
| [95m2        [0m | [95m0.1935   [0m | [95m0.09015  [0m | [95m42.34    [0m | [95m0.07618  [0m | [95m830.6    [0m |
| [95m3        [0m | [95m0.2235   [0m | [95m0.06534  [0m | [95m58.41    [0m | [95m0.04498  [0m | [95m453.7    [0m |
| [95m4        [0m | [95m0.2381   [0m | [95m0.03393  [0m | [95m30.17    [0m | [95m0.05675  [0m | [95m620.8    [0m |
| [0m5        [0m | [0m0.157    [0m | [0m0.08428  [0m | [0m96.38    [0m | [0m0.08924  [0m | [0m482.5    [0m |
| [0m6        [0m | [0m0.192    [0m | [0m0.02933  [0m | [0m82.95    [0m | [0m0.08571  [0m | [0m496.9    [0m |




| [95m7        [0m | [95m20.25    [0m | [95m0.03001  [0m | [95m58.48    [0m | [95m0.00122  [0m | [95m132.1    [0m |
| [0m8        [0m | [0m0.2043   [0m | [0m0.01045  [0m | [0m32.59    [0m | [0m0.09857  [0m | [0m445.2    [0m |
| [0m9        [0m | [0m0.2079   [0m | [0m0.006594 [0m | [0m73.33    [0m | [0m0.08097  [0m | [0m915.7    [0m |
| [0m10       [0m | [0m0.1816   [0m | [0m0.05991  [0m | [0m20.23    [0m | [0m0.07396  [0m | [0m599.6    [0m |
| [0m11       [0m | [0m0.1795   [0m | [0m0.03801  [0m | [0m85.84    [0m | [0m0.07726  [0m | [0m253.4    [0m |
| [0m12       [0m | [0m0.2609   [0m | [0m0.007347 [0m | [0m25.08    [0m | [0m0.03559  [0m | [0m562.1    [0m |




| [0m13       [0m | [0m0.2173   [0m | [0m0.09961  [0m | [0m68.29    [0m | [0m0.07596  [0m | [0m183.4    [0m |




| [0m14       [0m | [0m0.2361   [0m | [0m0.02291  [0m | [0m23.3     [0m | [0m0.09363  [0m | [0m245.3    [0m |
| [0m15       [0m | [0m0.2539   [0m | [0m0.07923  [0m | [0m37.53    [0m | [0m0.02367  [0m | [0m751.0    [0m |
| [0m16       [0m | [0m0.1702   [0m | [0m0.001399 [0m | [0m76.1     [0m | [0m0.09781  [0m | [0m486.0    [0m |
| [0m17       [0m | [0m0.2256   [0m | [0m0.03636  [0m | [0m61.82    [0m | [0m0.05978  [0m | [0m373.9    [0m |




| [0m18       [0m | [0m0.2955   [0m | [0m0.04551  [0m | [0m67.97    [0m | [0m0.01245  [0m | [0m727.2    [0m |
| [0m19       [0m | [0m0.2388   [0m | [0m0.01095  [0m | [0m38.8     [0m | [0m0.04985  [0m | [0m802.8    [0m |
| [0m20       [0m | [0m0.1949   [0m | [0m0.08682  [0m | [0m46.91    [0m | [0m0.08679  [0m | [0m776.7    [0m |




| [0m21       [0m | [0m1.147    [0m | [0m0.0261   [0m | [0m58.16    [0m | [0m0.01299  [0m | [0m132.6    [0m |




| [0m22       [0m | [0m0.2641   [0m | [0m0.07904  [0m | [0m28.47    [0m | [0m0.08526  [0m | [0m175.4    [0m |




| [0m23       [0m | [0m0.2107   [0m | [0m0.05191  [0m | [0m97.6     [0m | [0m0.03411  [0m | [0m161.2    [0m |




| [0m24       [0m | [0m0.3693   [0m | [0m0.08322  [0m | [0m58.22    [0m | [0m0.03174  [0m | [0m133.6    [0m |
| [0m25       [0m | [0m0.1752   [0m | [0m0.04281  [0m | [0m25.29    [0m | [0m0.0855   [0m | [0m620.2    [0m |




Average RMSE: 20.50553064188731
Average MSE: 442.8608892104947
Average MAE: 15.229374723044737
Average AED: 43.56978996248231
Average R2: 0.9028233365981115
Average Adjusted R2: 0.9022113928739183




In [18]:
# Append model to list
all_models['Random forest'] = best_model_random_forest
all_models['SVM'] = best_model_svm
all_models['Neural Network'] = best_model_neural_network
all_models['MLR']= best_model_mlr

In [28]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import cross_val_predict
import plotly.graph_objects as go

# Evaluation metric labels
metric_labels = ['RMSE', 'MSE', 'MAE', 'AED', 'R2', 'Adjusted R2']

# Calculate evaluation metrics using cross-validation for each model
rmse_scores = []
mse_scores = []
mae_scores = []
aed_scores = []
r2_scores = []
r2_adj_scores = []

model_names = ['Random Forest', 'SVM', 'Neural Network', 'MLR']

models = {
    'Random Forest': best_model_random_forest,
    'SVM': best_model_svm,
    'Neural Network': best_model_neural_network,
    'MLR': best_model_mlr
}

for model_name in model_names:
    model = models[model_name]
    y_pred = cross_val_predict(model, X_train, y_train, cv=5)
    rmse_scores.append(np.sqrt(mean_squared_error(y_train, y_pred)))
    mse_scores.append(mean_squared_error(y_train, y_pred))
    mae_scores.append(mean_absolute_error(y_train, y_pred))
    aed_scores.append(np.abs(y_train.mean() - y_pred))
    r2_scores.append(r2_score(y_train, y_pred))
    r2_adj_scores.append(1 - ((1 - r2_score(y_train, y_pred)) * (len(X_train) - 1) / (len(X_train) - X_train.shape[1] - 1)))

# Plotting the bar chart
fig = go.Figure()

fig.add_trace(go.Bar(
    x=model_names,
    y=rmse_scores,
    name='RMSE'
))

fig.add_trace(go.Bar(
    x=model_names,
    y=mse_scores,
    name='MSE'
))

fig.add_trace(go.Bar(
    x=model_names,
    y=mae_scores,
    name='MAE'
))

fig.add_trace(go.Bar(
    x=model_names,
    y=aed_scores,
    name='AED'
))

fig.add_trace(go.Bar(
    x=model_names,
    y=r2_scores,
    name='R2'
))

fig.add_trace(go.Bar(
    x=model_names,
    y=r2_adj_scores,
    name='Adjusted R2'
))

# Updating the layout
fig.update_layout(
    title='Evaluation Metrics Comparison',
    xaxis_title='Models',
    yaxis_title='Scores',
    barmode='group'
)

# Display the plot
fig.show()




Stochastic Optimizer: Maximum iterations (132) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (132) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (132) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (132) reached and the optimization hasn't converged yet.


Stochastic Optimizer: Maximum iterations (132) reached and the optimization hasn't converged yet.

