# General Imports

**Importing all libraries**

In [1]:
from sklearn.datasets import make_regression
import numpy as np
import pandas as pd

from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import cross_val_predict
from sklearn.inspection import permutation_importance

import plotly.graph_objects as go
from bayes_opt import BayesianOptimization
import matplotlib.pyplot as plt

import warnings

Disable all warnings

In [2]:
# Disable all warnings
warnings.filterwarnings("ignore")

# Enable warnings again
## warnings.filterwarnings("default")

In [3]:
df_finished = pd.read_csv('data_finish_prep.csv')

In [4]:
df_finished.head()

Unnamed: 0,verschil_Lengte,verschil_Gewicht,verschil_6 MWT,verschil_TUG,verschil_BMI,verschil_Conditie,verschil_Lenigheid,verschil_Knijpkracht,NederlandseAntillenEnAruba,GeboorteRelatief,...,Ondergewicht,MatigOvergewicht,ErnstigOvergewichtObesitas,VoldoetAanBeweegrichtlijn,Mantelzorger,ZwaarBelasteMantelzorgers,UrenMantelzorgPerWeek,Rokers,VoldoetAanRichtlijnAlcoholgebruik,OvermatigDrinker
0,0.0,-2.3,-52.0,2.94,-0.906068,-0.1252,,30.8,-0.063003,-0.933187,...,0.570059,0.531251,0.934008,-0.796811,0.94649,-0.053768,-0.233017,1.074978,0.932579,-0.702505
1,0.0,-7.2,-104.0,-2.43,-3.13314,-0.384503,-4.0,11.5,-0.063003,-0.933187,...,0.570059,0.531251,0.934008,-0.796811,0.94649,-0.053768,-0.233017,1.074978,0.932579,-0.702505
2,1.0,-4.9,63.0,-1.92,-2.143719,0.080729,7.5,2.7,-0.063003,-0.933187,...,0.570059,0.531251,0.934008,-0.796811,0.94649,-0.053768,-0.233017,1.074978,0.932579,-0.702505
3,0.0,-1.3,23.0,-2.07,-0.521359,0.020537,4.25,-1.7,-0.063003,-0.933187,...,0.570059,0.531251,0.934008,-0.796811,0.94649,-0.053768,-0.233017,1.074978,0.932579,-0.702505
4,0.0,2.0,126.0,0.69,0.8,0.221264,-2.5,5.5,-0.063003,-0.933187,...,0.570059,0.531251,0.934008,-0.796811,0.94649,-0.053768,-0.233017,1.074978,0.932579,-0.702505


In [5]:
df_finished.dropna(inplace=True)

In [6]:
# Making dataframes for each dependent variable

# Fixing 6MWT column name
df_finished = df_finished.rename(columns={'verschil_6 MWT': 'verschil_6_MWT'})

# Verschil in gewicht
df_target_gewicht = df_finished['verschil_Gewicht']
df_test_Gewicht = df_finished.drop(['verschil_Gewicht'], axis=1)

# Verschil in 6MWT
df_target_6_MWT = df_finished['verschil_6_MWT']
df_test_6_MWT = df_finished.drop(['verschil_6_MWT'], axis=1)

# Verschil in TUG
df_target_TUG = df_finished['verschil_TUG']
df_test_TUG = df_finished.drop(['verschil_TUG'], axis=1)


# Verschil in BMI
df_target_BMI = df_finished['verschil_BMI']
df_test_BMI = df_finished.drop(['verschil_BMI'], axis=1)


# Verschil in Conditie
df_target_Conditie = df_finished['verschil_Conditie']
df_test_Conditie = df_finished.drop(['verschil_Conditie'], axis=1)


# Verschil in Lenigheid
df_target_Lenigheid = df_finished['verschil_Lenigheid']
df_test_Lenigheid = df_finished.drop(['verschil_Lenigheid'], axis=1)


# Verschil in Knijpkracht
df_target_Knijpkracht = df_finished['verschil_Knijpkracht']
df_test_Knijpkracht = df_finished.drop(['verschil_Knijpkracht'], axis=1)

In [7]:
# Count the number of rows
num_rows = df_finished.shape[0]

# Print the number of rows
print("Number of rows:", num_rows)

Number of rows: 1099


In [8]:
# Split data for Verschil in Gewicht
X_train_Gewicht, X_test_Gewicht, y_train_Gewicht, y_test_Gewicht = train_test_split(
    df_test_Gewicht, df_target_gewicht, test_size=0.2, random_state=42)

# Split data for Verschil in 6MWT
X_train_6_MWT, X_test_6_MWT, y_train_6_MWT, y_test_6_MWT = train_test_split(
    df_test_6_MWT, df_target_6_MWT, test_size=0.2, random_state=42)

# Split data for Verschil in TUG
X_train_TUG, X_test_TUG, y_train_TUG, y_test_TUG = train_test_split(
    df_test_TUG, df_target_TUG, test_size=0.2, random_state=42)

# Split data for Verschil in BMI
X_train_BMI, X_test_BMI, y_train_BMI, y_test_BMI = train_test_split(
    df_test_BMI, df_target_BMI, test_size=0.2, random_state=42)

# Split data for Verschil in Conditie
X_train_Conditie, X_test_Conditie, y_train_Conditie, y_test_Conditie = train_test_split(
    df_test_Conditie, df_target_Conditie, test_size=0.2, random_state=42)

# Split data for Verschil in Lenigheid
X_train_Lenigheid, X_test_Lenigheid, y_train_Lenigheid, y_test_Lenigheid = train_test_split(
    df_test_Lenigheid, df_target_Lenigheid, test_size=0.2, random_state=42)

# Split data for Verschil in Knijpkracht
X_train_Knijpkracht, X_test_Knijpkracht, y_train_Knijpkracht, y_test_Knijpkracht = train_test_split(
    df_test_Knijpkracht, df_target_Knijpkracht, test_size=0.2, random_state=42)

# # Split data into training and testing sets
# train_ratio = 0.8
# train_size = int(train_ratio * num_rows)

# X_train = X[:train_size]
# y_train = y[:train_size]

# # Test data
# X_test = X[train_size:]
# y_test = y[train_size:]

In [9]:
# Concatenate the feature variables into X
X = np.concatenate((X_train_Gewicht, X_train_6_MWT, X_train_TUG, X_train_BMI, X_train_Conditie, X_train_Lenigheid, X_train_Knijpkracht))

# Concatenate the target variables into y
y = np.concatenate((y_train_Gewicht, y_train_6_MWT, y_train_TUG, y_train_BMI, y_train_Conditie, y_train_Lenigheid, y_train_Knijpkracht))

# Test data
X_test = np.concatenate((X_test_Gewicht, X_test_6_MWT, X_test_TUG, X_test_BMI, X_test_Conditie, X_test_Lenigheid, X_test_Knijpkracht))
y_test = np.concatenate((y_test_Gewicht, y_test_6_MWT, y_test_TUG, y_test_BMI, y_test_Conditie, y_test_Lenigheid, y_test_Knijpkracht))

# Split data into training and testing sets
train_ratio = 0.8
train_size = int(train_ratio * len(X))

X_train = X[:train_size]
y_train = y[:train_size]

In [10]:
def bo_params_generic(model, params, X_train, y_train):
    # Create the model instance with the specified parameters
    regressor = model(**params)
    
    # Assuming you have X_train, y_train defined for regression
    scores = cross_val_score(regressor, X_train, y_train, cv=10, scoring='neg_root_mean_squared_error')
    return -scores.mean()

In [11]:

#results = dt_bo.maximize(n_iter=5, init_points=20)

In [12]:
all_models = {}

# MLR

**feature selection**

In [13]:
# Assuming you have X_train and y_train defined for training data

# Initialize the Linear Regression model
lr_model = LinearRegression()

# Fit the Linear Regression model to the training data
lr_model.fit(X_train, y_train)

# Get feature coefficients
coefficients = lr_model.coef_

# Create a list of feature names or indices paired with their coefficients
feature_coefficients = list(zip(range(X_train.shape[1]), coefficients))

# Sort the features based on absolute coefficient values in descending order
feature_coefficients.sort(key=lambda x: abs(x[1]), reverse=True)

# Print the ranked feature coefficients
print("Feature Coefficients:")
for feature_index, coefficient in feature_coefficients:
    print(f"Feature {feature_index}: {coefficient}")

Feature Coefficients:
Feature 50: 148.18228651982133
Feature 60: -147.53045902153002
Feature 33: 77.98641807699985
Feature 54: 66.6448127066419
Feature 34: 63.29586560353903
Feature 59: -53.950473011576264
Feature 32: 53.229963200358185
Feature 35: -49.14526062800492
Feature 30: -29.928311411819603
Feature 58: 28.649953909340002
Feature 45: 28.581497333744974
Feature 55: 28.36333581776917
Feature 65: 27.48371236077007
Feature 31: -27.01639096894545
Feature 57: 25.941948982519058
Feature 56: 25.643573112531143
Feature 29: -25.02547400814238
Feature 9: -17.629037971976828
Feature 14: 16.966229886396405
Feature 38: -15.533730125218412
Feature 47: -15.412505845506297
Feature 40: 14.584594837611832
Feature 51: -14.531648801348645
Feature 28: 13.929463847988018
Feature 74: -13.794527656437573
Feature 62: -13.59782701125398
Feature 52: -13.387682983132803
Feature 73: -12.802850456650564
Feature 46: 12.693166874416104
Feature 48: 11.940274169104939
Feature 43: 11.290722082790644
Feature 37: 11

**Hyperparameter optimalisatie**

In [14]:
# Define the MLR model evaluation function using cross-validation
def evaluate_mlr_model(fit_intercept):
    # Convert fit_intercept to a boolean value
    fit_intercept = bool(fit_intercept)
    
    # Create and configure the MLR model
    model = LinearRegression(fit_intercept=fit_intercept)
    
    # Perform cross-validation on the training data
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
    
    # Return the negative mean squared error (Bayesian Optimization maximizes the objective)
    return np.mean(scores)

# Define the parameter ranges for Bayesian Optimization
params_ranges = {
    'fit_intercept': (0, 1)
}

# Perform Bayesian Optimization
mlr_bo = BayesianOptimization(f=evaluate_mlr_model, pbounds=params_ranges)
mlr_bo.maximize(n_iter=10, init_points=5)

# Get the best hyperparameters
best_params = mlr_bo.max['params']
best_fit_intercept = bool(best_params['fit_intercept'])

# Create the best MLR model with the tuned hyperparameters
best_model_mlr = LinearRegression(fit_intercept=best_fit_intercept)

# Fit the best model to the training data
best_model_mlr.fit(X_train, y_train)

# Calculate evaluation metrics
y_pred = best_model_mlr.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
aed = np.abs(y_test.mean() - y_pred)
r2 = r2_score(y_test, y_pred)
n = len(X_test)
k = X_test.shape[1]
r2_adj = 1 - (1 - r2) * ((n - 1) / (n - k - 1))

# Print the evaluation metrics
print("RMSE:", rmse)
print("MSE:", mse)
print("MAE:", mae)
print("AED:", aed)
print("R2:", r2)
print("Adjusted R2:", r2_adj)

|   iter    |  target   | fit_in... |
-------------------------------------
| [0m1        [0m | [0m-1.451e+0[0m | [0m0.9777   [0m |
| [0m2        [0m | [0m-1.451e+0[0m | [0m0.1801   [0m |
| [0m3        [0m | [0m-1.451e+0[0m | [0m0.8115   [0m |
| [0m4        [0m | [0m-1.451e+0[0m | [0m0.07396  [0m |
| [0m5        [0m | [0m-1.451e+0[0m | [0m0.5982   [0m |
| [0m6        [0m | [0m-1.451e+0[0m | [0m0.0002154[0m |
| [0m7        [0m | [0m-1.451e+0[0m | [0m0.3309   [0m |
| [0m8        [0m | [0m-1.451e+0[0m | [0m0.5086   [0m |
| [0m9        [0m | [0m-1.451e+0[0m | [0m0.9998   [0m |
| [0m10       [0m | [0m-1.451e+0[0m | [0m0.9999   [0m |
| [0m11       [0m | [0m-1.451e+0[0m | [0m6.875e-05[0m |
| [0m12       [0m | [0m-1.451e+0[0m | [0m0.9018   [0m |
| [0m13       [0m | [0m-1.451e+0[0m | [0m0.6319   [0m |
| [0m14       [0m | [0m-1.451e+0[0m | [0m0.00012  [0m |
| [0m15       [0m | [0m-1.451e+0[0m | [0m0.0001581

# Feature Selection

In [None]:
# Insert Feature Selection here

# Support Vector Machines

**Feature selection**

In [17]:
# Assuming you have X_train and y_train defined for training data

# Initialize the SVM model
svm_model = SVR(kernel='linear')  # Replace 'rbf' with your desired kernel

# Fit the SVM model to the training data
svm_model.fit(X_train, y_train)

model.n_jobs = -1

# Compute permutation importances
result = permutation_importance(svm_model, X_train, y_train, n_repeats=10, random_state=42)

# Get feature importances
importances = result.importances_mean

# Create a list of feature names or indices paired with their importances
feature_importances = list(zip(range(X_train.shape[1]), importances))

# Sort the features based on importance in descending order
feature_importances.sort(key=lambda x: x[1], reverse=True)


# Print the ranked feature importances
print("Feature Importances:")
for feature_index, importance in feature_importances:
    print(f"Feature {feature_index}: {importance}")

**Hyperparameter optimalisatie**

In [None]:
# Define the SVM model evaluation function using cross-validation
def evaluate_svm_model(C, epsilon, gamma):
    # Create and configure the SVM model
    model = SVR(C=C, epsilon=epsilon, gamma=gamma)
    
    # Perform cross-validation on the training data
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
    
    # Return the negative mean squared error (Bayesian Optimization maximizes the objective)
    return np.mean(scores)

# Define the parameter ranges for Bayesian Optimization
params_ranges = {
    'C': (0.1, 10),
    'epsilon': (0.01, 1),
    'gamma': (0.001, 0.1)
}

# Perform Bayesian Optimization
svm_bo = BayesianOptimization(f=evaluate_svm_model, pbounds=params_ranges)
svm_bo.maximize(n_iter=10, init_points=5)

# Get the best hyperparameters
best_params = svm_bo.max['params']
best_C = best_params['C']
best_epsilon = best_params['epsilon']
best_gamma = best_params['gamma']

# Create the best SVM model with the tuned hyperparameters
best_model_svm = SVR(C=best_C, epsilon=best_epsilon, gamma=best_gamma)

# Fit the best model to the training data
best_model_svm.fit(X_train, y_train)

# Calculate evaluation metrics
y_pred = best_model_svm.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
aed = np.abs(y_test.mean() - y_pred)
r2 = r2_score(y_test, y_pred)
n = len(X_test)
k = X_test.shape[1]
r2_adj = 1 - (1 - r2) * ((n - 1) / (n - k - 1))

# Print the evaluation metrics
print("RMSE:", rmse)
print("MSE:", mse)
print("MAE:", mae)
print("AED:", aed)
print("R2:", r2)
print("Adjusted R2:", r2_adj)

# Random Forest

**Feature selection**

In [None]:
# Assuming you have X_train and y_train defined for training data

# Initialize the Random Forest model
rf_model = RandomForestRegressor()

# Fit the Random Forest model to the training data
rf_model.fit(X_train, y_train)

# Get feature importances
importances = rf_model.feature_importances_

# Create a list of feature names or indices paired with their importances
feature_importances = list(zip(range(X_train.shape[1]), importances))

# Sort the features based on importance in descending order
feature_importances.sort(key=lambda x: x[1], reverse=True)

# Print the ranked feature importances
print("Feature Importances:")
for feature_index, importance in feature_importances:
    print(f"Feature {feature_index}: {importance}")


**Hyperparameter optimalisatie**

In [None]:
params_ranges = {
    'n_estimators': (10, 100),
    'max_depth': (1, 20),
    'min_samples_leaf': (1, 10),
    'min_weight_fraction_leaf': (0.0, 0.5),
    'max_features': (0.1, 1),
    'max_leaf_nodes': (10, 100)
}

# Example usage with Random Forest
model = RandomForestRegressor
dt_bo = BayesianOptimization(f=lambda n_estimators, max_depth, min_samples_leaf, min_weight_fraction_leaf,
                                    max_features, max_leaf_nodes: bo_params_generic(model, {
                                        'n_estimators': int(round(n_estimators)),
                                        'max_depth': int(round(max_depth)),
                                        'min_samples_leaf': round(min_samples_leaf),
                                        'min_weight_fraction_leaf': min_weight_fraction_leaf,
                                        'max_features': max_features,
                                        'max_leaf_nodes': int(round(max_leaf_nodes))
                                    }, X_train, y_train),
                             pbounds=params_ranges)
results = dt_bo.maximize(n_iter=5, init_points=20)
params = dt_bo.max['params']

# Creating a model with the best hyperparameters
best_model_random_forest = model(
    n_estimators=int(round(params['n_estimators'])),
    max_depth=int(round(params['max_depth'])),
    min_samples_leaf=round(params['min_samples_leaf']),
    min_weight_fraction_leaf=params['min_weight_fraction_leaf'],
    max_features=params['max_features'],
    max_leaf_nodes=int(round(params['max_leaf_nodes']))
)

# Fit the model
best_model_random_forest.fit(X_train, y_train)

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

y_pred = best_model_random_forest.predict(X_test)

mse_scores = mean_squared_error(y_test, y_pred)
mae_scores = mean_absolute_error(y_test, y_pred)
aed_scores = np.abs(y_test.mean() - y_pred)
r2_scores = r2_score(y_test, y_pred)

n = len(X_test)
k = X_test.shape[1]
r2_adj_scores = 1 - (1 - r2_scores) * ((n - 1) / (n - k - 1))
rmse_scores = np.sqrt(mse_scores)

print("MSE:", mse_scores)
print("MAE:", mae_scores)
print("AED:", aed_scores)
print("R2:", r2_scores)
print("Adjusted R2:", r2_adj_scores)
print("RMSE:", rmse_scores)


# neural network

**Feature selection**

In [None]:
# Assuming you have X and y defined for the dataset

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the neural network regressor
nn_model = MLPRegressor(hidden_layer_sizes=(10, 10))  # Adjust the architecture as needed

# Fit the neural network model to the training data
nn_model.fit(X_train_scaled, y_train)

# Perform feature selection using Recursive Feature Elimination (RFE)
selector = RFE(estimator=nn_model, n_features_to_select=10)  # Adjust n_features_to_select as needed
selector.fit(X_train_scaled, y_train)

# Transform the training and testing sets to keep only the selected features
X_train_selected = selector.transform(X_train_scaled)
X_test_selected = selector.transform(X_test_scaled)

# Print the selected feature support
selected_support = selector.support_
print("Selected Feature Support:")
print(selected_support)

**Hyperparameter optimalisatie**

In [None]:
params_ranges = {
    'hidden_layer_sizes': (10, 100),
    'alpha': (0.0001, 0.1),
    'learning_rate_init': (0.001, 0.1),
    'max_iter': (100, 1000),
}

# Example usage with Neural Network
model = MLPRegressor
dt_bo = BayesianOptimization(f=lambda hidden_layer_sizes, alpha, learning_rate_init, max_iter:
                                    bo_params_generic(model, {
                                        'hidden_layer_sizes': (int(round(hidden_layer_sizes)),),
                                        'alpha': alpha,
                                        'learning_rate_init': learning_rate_init,
                                        'max_iter': int(round(max_iter))
                                    }, X_train, y_train),
                             pbounds=params_ranges)

results = dt_bo.maximize(n_iter=5, init_points=20)
params = dt_bo.max['params']

# Creating a model with the best hyperparameters
best_model_neural_network = model(
    hidden_layer_sizes=(int(round(params['hidden_layer_sizes'])),),
    alpha=params['alpha'],
    learning_rate_init=params['learning_rate_init'],
    max_iter=int(round(params['max_iter']))
)


# Fit the model
best_model_neural_network.fit(X_train, y_train)


# Calculate the evaluation metrics
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
aed = np.abs(y_test.mean() - y_pred)
r2 = r2_score(y_test, y_pred)
n = len(X_test)
k = X_test.shape[1]
r2_adj = 1 - (1 - r2) * ((n - 1) / (n - k - 1))

# Print the evaluation metrics
print("RMSE:", rmse)
print("MSE:", mse)
print("MAE:", mae)
print("AED:", aed)
print("R2:", r2)
print("Adjusted R2:", r2_adj)


In [None]:
# Append model to list
all_models['Random forest'] = best_model_random_forest
all_models['SVM'] = best_model_svm
all_models['Neural Network'] = best_model_neural_network
all_models['MLR']= best_model_mlr

# plotting the bar chart

**using train dataset**

In [None]:
# Evaluation metric labels
metric_labels = ['RMSE', 'MSE', 'MAE', 'AED', 'R2', 'Adjusted R2']

# Calculate evaluation metrics using cross-validation for each model
metrics = {
    'RMSE': lambda y_true, y_pred: np.sqrt(mean_squared_error(y_true, y_pred)),
    'MSE': mean_squared_error,
    'MAE': mean_absolute_error,
    'AED': lambda y_true, y_pred: np.abs(np.mean(y_true) - y_pred),
    'R2': r2_score,
    'Adjusted R2': lambda y_true, y_pred: 1 - ((1 - r2_score(y_true, y_pred)) * (len(y_true) - 1) / (len(y_true) - X_train.shape[1] - 1))
}

model_names = ['Random Forest', 'SVM', 'Neural Network', 'MLR']

models = {
    'Random Forest': best_model_random_forest,
    'SVM': best_model_svm,
    'Neural Network': best_model_neural_network,
    'MLR': best_model_mlr
}

metric_scores = {metric: [] for metric in metric_labels}

for model_name in model_names:
    model = models[model_name]
    y_pred = cross_val_predict(model, X_train, y_train, cv=5)
    for metric in metric_labels:
        metric_scores[metric].append(metrics[metric](y_train, y_pred))

# Plotting the bar chart
fig = go.Figure()

for metric in metric_labels:
    fig.add_trace(go.Bar(
        x=model_names,
        y=metric_scores[metric],
        name=metric
    ))

# Updating the layout
fig.update_layout(
    title='Evaluation Metrics Comparison',
    xaxis_title='Models',
    yaxis_title='Scores',
    barmode='group'
)

# Display the plot
fig.show()


**using test dataset**

In [None]:
# Evaluation metric labels
metric_labels = ['RMSE', 'MSE', 'MAE', 'AED', 'R2', 'Adjusted R2']

# Calculate evaluation metrics using cross-validation for each model
metrics = {
    'RMSE': lambda y_true, y_pred: np.sqrt(mean_squared_error(y_true, y_pred)),
    'MSE': mean_squared_error,
    'MAE': mean_absolute_error,
    'AED': lambda y_true, y_pred: np.abs(np.mean(y_true) - y_pred),
    'R2': r2_score,
    'Adjusted R2': lambda y_true, y_pred: 1 - ((1 - r2_score(y_true, y_pred)) * (len(y_true) - 1) / (len(y_true) - X_train.shape[1] - 1))
}

model_names = ['Random Forest', 'SVM', 'Neural Network', 'MLR']

models = {
    'Random Forest': best_model_random_forest,
    'SVM': best_model_svm,
    'Neural Network': best_model_neural_network,
    'MLR': best_model_mlr
}

# Train each model on the training data and predict the test data
predictions = {}
for model_name in model_names:
    model = models[model_name]
    model.fit(X_train, y_train)  # Train the model
    predictions[model_name] = model.predict(X_test)  # Predict the test data

# Calculate each metric for each model's predictions
metric_scores = {metric: [] for metric in metric_labels}
for model_name in model_names:
    y_pred = predictions[model_name]
    for metric in metric_labels:
        metric_scores[metric].append(metrics[metric](y_test, y_pred))  # Use the test data here

# Plotting the bar chart
fig = go.Figure()

for metric in metric_labels:
    fig.add_trace(go.Bar(
        x=model_names,
        y=metric_scores[metric],
        name=metric
    ))

# Updating the layout
fig.update_layout(
    title='Evaluation Metrics Comparison',
    xaxis_title='Models',
    yaxis_title='Scores',
    barmode='group'
)

# Display the plot
fig.show()