In [15]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

from sklearn.metrics import (
    mean_absolute_error, mean_squared_error, r2_score,
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
)

# Regression models
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, VotingRegressor
from sklearn.neighbors import KNeighborsRegressor

# Classification models
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

# Load your dataset
df = pd.read_csv("heart.csv")

print("Raw data preview:")
display(df.head())
print("Shape:", df.shape)


Raw data preview:


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


Shape: (1025, 14)


In [4]:
# ---- 1.1 Choose your targets ----
# TODO: adjust these if your column names are different

target_clf = 'target'  
target_reg = 'chol'     
# Keep a copy of original
df_original = df.copy()

# ---- 1.2 Feature Engineering on full df ----

# Age group bins
df['age_group'] = pd.cut(
    df['age'],
    bins=[0, 35, 45, 55, 65, 100],
    labels=['<35', '35-44', '45-54', '55-64', '65+']
)

# Cholesterol per age
if 'chol' in df.columns:
    df['chol_per_age'] = df['chol'] / df['age']

# Blood pressure / cholesterol ratio
if 'trestbps' in df.columns and 'chol' in df.columns:
    df['bp_chol_ratio'] = df['trestbps'] / df['chol']

print("After feature engineering:")
display(df.head())


After feature engineering:


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target,age_group,chol_per_age,bp_chol_ratio
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0,45-54,4.076923,0.589623
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0,45-54,3.830189,0.689655
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0,65+,2.485714,0.833333
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0,55-64,3.327869,0.729064
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0,55-64,4.741935,0.469388


In [5]:
# Separate targets
y_clf = df[target_clf]      # classification target
y_reg = df[target_reg]      # regression target

# Drop targets from features
X = df.drop(columns=[target_clf, target_reg])

print("Feature columns:")
print(X.columns.tolist())


Feature columns:
['age', 'sex', 'cp', 'trestbps', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'age_group', 'chol_per_age', 'bp_chol_ratio']


In [6]:
# ---- Missing values ----
for col in X.columns:
    if X[col].dtype == 'object' or str(X[col].dtype) == 'category':
        X[col] = X[col].fillna(X[col].mode()[0])
    else:
        X[col] = X[col].fillna(X[col].mean())

# ---- Encode categoricals ----
cat_cols = X.select_dtypes(include=['object', 'category']).columns

# Binary categoricals → LabelEncoder
for col in cat_cols:
    if X[col].nunique() <= 2:
        X[col] = LabelEncoder().fit_transform(X[col])

# Remaining categoricals → One-hot
cat_cols = X.select_dtypes(include=['object', 'category']).columns
X = pd.get_dummies(X, columns=cat_cols, drop_first=True)

print("After encoding:")
display(X.head())
print("New shape:", X.shape)


After encoding:


Unnamed: 0,age,sex,cp,trestbps,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,chol_per_age,bp_chol_ratio,age_group_35-44,age_group_45-54,age_group_55-64,age_group_65+
0,52,1,0,125,0,1,168,0,1.0,2,2,3,4.076923,0.589623,False,True,False,False
1,53,1,0,140,1,0,155,1,3.1,0,0,3,3.830189,0.689655,False,True,False,False
2,70,1,0,145,0,1,125,1,2.6,0,0,3,2.485714,0.833333,False,False,False,True
3,61,1,0,148,0,1,161,0,0.0,2,1,3,3.327869,0.729064,False,False,True,False
4,62,0,0,138,1,1,106,0,1.9,1,3,2,4.741935,0.469388,False,False,True,False


New shape: (1025, 18)


In [12]:
# ---- Train/Val/Test Split ----
idx_train, temp_idx = train_test_split(X.index, test_size=0.3, random_state=42)
idx_val, idx_test = train_test_split(temp_idx, test_size=0.5, random_state=42)

X_train = X.iloc[idx_train]
X_val   = X.iloc[idx_val]
X_test  = X.iloc[idx_test]

# Targets - classification
y_clf_train = y_clf.iloc[idx_train]
y_clf_val   = y_clf.iloc[idx_val]
y_clf_test  = y_clf.iloc[idx_test]

# Targets - regression
y_reg_train = y_reg.iloc[idx_train]
y_reg_val   = y_reg.iloc[idx_val]
y_reg_test  = y_reg.iloc[idx_test]

print("Shapes:")
print("X_train:", X_train.shape, "X_val:", X_val.shape, "X_test:", X_test.shape)

Shapes:
X_train: (717, 18) X_val: (154, 18) X_test: (154, 18)


In [13]:
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s   = scaler.transform(X_val)
X_test_s  = scaler.transform(X_test)


In [14]:
# ==========================================================
#   REGRESSION MODELS: TRAINING + METRICS + RESULTS TABLE
# ==========================================================

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# -----------------------------------------
# Define all regression models
# -----------------------------------------
reg_models = {
    "LinearRegression": LinearRegression(),
    "DecisionTreeRegressor": DecisionTreeRegressor(random_state=42),
    "RandomForestRegressor": RandomForestRegressor(n_estimators=200, random_state=42),
    "GradientBoostingRegressor": GradientBoostingRegressor(random_state=42),
    "KNNRegressor": KNeighborsRegressor(n_neighbors=5)
}

# -----------------------------------------
# Train & evaluate each model
# -----------------------------------------
reg_results = []

for name, model in reg_models.items():
    # Train
    model.fit(X_train_s, y_reg_train)

    # Predictions
    y_val_pred = model.predict(X_val_s)
    y_test_pred = model.predict(X_test_s)

    # Store performance
    reg_results.append({
        "Model": name,
        "Val_MAE": mean_absolute_error(y_reg_val, y_val_pred),
        "Val_MSE": mean_squared_error(y_reg_val, y_val_pred),
        "Val_R2":  r2_score(y_reg_val, y_val_pred),
        "Test_MAE": mean_absolute_error(y_reg_test, y_test_pred),
        "Test_MSE": mean_squared_error(y_reg_test, y_test_pred),
        "Test_R2":  r2_score(y_reg_test, y_test_pred)
    })

# -----------------------------------------
# Convert to DataFrame
# -----------------------------------------
reg_results_df = pd.DataFrame(reg_results)

print("Regression model evaluation results:")
display(reg_results_df.sort_values("Val_MSE"))



Regression model evaluation results:


Unnamed: 0,Model,Val_MAE,Val_MSE,Val_R2,Test_MAE,Test_MSE,Test_R2
0,LinearRegression,6.982202,193.036769,0.953499,5.99239,123.950448,0.952678
3,GradientBoostingRegressor,4.74128,613.612434,0.852185,3.503054,311.089186,0.881232
2,RandomForestRegressor,4.180325,751.843538,0.818886,2.421364,375.480768,0.856649
1,DecisionTreeRegressor,3.720779,914.123377,0.779794,1.928571,457.227273,0.82544
4,KNNRegressor,23.335065,1484.304935,0.642442,20.302597,1024.694805,0.608792


In [16]:
# Select top 3 regression models by Val_MSE
top3_reg = reg_results_df.sort_values("Val_MSE").head(3)["Model"].tolist()
print("Top 3 regression models:", top3_reg)

# Use same top 3 models for Bayesian averaging
val_mse_top3 = reg_results_df.set_index("Model").loc[top3_reg, "Val_MSE"]

# Convert MSE to weights (smaller MSE => larger weight)
inv_errors = np.exp(-val_mse_top3)
weights_reg = inv_errors / inv_errors.sum()
print("Bayesian regression weights:")
display(weights_reg)

# Collect predictions
val_preds = np.array([reg_models[name].predict(X_val_s) for name in top3_reg])
test_preds = np.array([reg_models[name].predict(X_test_s) for name in top3_reg])

w = weights_reg.values.reshape(-1, 1)

y_val_bayes_reg = (w * val_preds).sum(axis=0)
y_test_bayes_reg = (w * test_preds).sum(axis=0)

bayesian_reg_metrics = {
    "Model": "BayesianEnsemble_Reg",
    "Val_MAE": mean_absolute_error(y_reg_val, y_val_bayes_reg),
    "Val_MSE": mean_squared_error(y_reg_val, y_val_bayes_reg),
    "Val_R2":  r2_score(y_reg_val, y_val_bayes_reg),
    "Test_MAE": mean_absolute_error(y_reg_test, y_test_bayes_reg),
    "Test_MSE": mean_squared_error(y_reg_test, y_test_bayes_reg),
    "Test_R2":  r2_score(y_reg_test, y_test_bayes_reg),
}

reg_results_df = pd.concat(
    [reg_results_df, pd.DataFrame([bayesian_reg_metrics])],
    ignore_index=True
)

print("Final regression results (including ensembles):")
display(reg_results_df.sort_values("Val_MSE"))

Top 3 regression models: ['LinearRegression', 'GradientBoostingRegressor', 'RandomForestRegressor']
Bayesian regression weights:


Model
LinearRegression              1.000000e+00
GradientBoostingRegressor    2.219777e-183
RandomForestRegressor         0.000000e+00
Name: Val_MSE, dtype: float64

Final regression results (including ensembles):


Unnamed: 0,Model,Val_MAE,Val_MSE,Val_R2,Test_MAE,Test_MSE,Test_R2
0,LinearRegression,6.982202,193.036769,0.953499,5.99239,123.950448,0.952678
5,BayesianEnsemble_Reg,6.982202,193.036769,0.953499,5.99239,123.950448,0.952678
3,GradientBoostingRegressor,4.74128,613.612434,0.852185,3.503054,311.089186,0.881232
2,RandomForestRegressor,4.180325,751.843538,0.818886,2.421364,375.480768,0.856649
1,DecisionTreeRegressor,3.720779,914.123377,0.779794,1.928571,457.227273,0.82544
4,KNNRegressor,23.335065,1484.304935,0.642442,20.302597,1024.694805,0.608792


In [17]:
binary = (len(np.unique(y_clf)) == 2)

clf_models = {
    "LogisticRegression": LogisticRegression(max_iter=2000),
    "DecisionTree": DecisionTreeClassifier(random_state=42),
    "RandomForest": RandomForestClassifier(n_estimators=200, random_state=42),
    "GradientBoosting": GradientBoostingClassifier(random_state=42),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "SVC": SVC(probability=True, random_state=42)
}

clf_results = []

for name, model in clf_models.items():
    model.fit(X_train_s, y_clf_train)
    
    y_val_pred = model.predict(X_val_s)
    y_test_pred = model.predict(X_test_s)
    
    y_val_prob = model.predict_proba(X_val_s)[:, 1] if binary else None
    y_test_prob = model.predict_proba(X_test_s)[:, 1] if binary else None
    
    val_acc  = accuracy_score(y_clf_val, y_val_pred)
    val_prec = precision_score(y_clf_val, y_val_pred, average="binary" if binary else "macro")
    val_rec  = recall_score(y_clf_val, y_val_pred, average="binary" if binary else "macro")
    val_f1   = f1_score(y_clf_val, y_val_pred, average="binary" if binary else "macro")
    val_auc  = roc_auc_score(y_clf_val, y_val_prob) if binary else np.nan
    
    test_acc  = accuracy_score(y_clf_test, y_test_pred)
    test_prec = precision_score(y_clf_test, y_test_pred, average="binary" if binary else "macro")
    test_rec  = recall_score(y_clf_test, y_test_pred, average="binary" if binary else "macro")
    test_f1   = f1_score(y_clf_test, y_test_pred, average="binary" if binary else "macro")
    test_auc  = roc_auc_score(y_clf_test, y_test_prob) if binary else np.nan
    
    clf_results.append({
        "Model": name,
        "Val_Accuracy": val_acc,
        "Val_Precision": val_prec,
        "Val_Recall": val_rec,
        "Val_F1": val_f1,
        "Val_ROC_AUC": val_auc,
        "Test_Accuracy": test_acc,
        "Test_Precision": test_prec,
        "Test_Recall": test_rec,
        "Test_F1": test_f1,
        "Test_ROC_AUC": test_auc
    })

clf_results_df = pd.DataFrame(clf_results)
print("Classification model comparison:")
display(clf_results_df.sort_values("Val_F1", ascending=False))


Classification model comparison:


Unnamed: 0,Model,Val_Accuracy,Val_Precision,Val_Recall,Val_F1,Val_ROC_AUC,Test_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_ROC_AUC
1,DecisionTree,0.987013,1.0,0.967742,0.983607,0.983871,0.974026,1.0,0.954023,0.976471,0.977011
2,RandomForest,0.974026,1.0,0.935484,0.966667,0.999299,0.967532,1.0,0.942529,0.970414,1.0
3,GradientBoosting,0.967532,0.952381,0.967742,0.96,0.981417,0.961039,0.987952,0.942529,0.964706,0.987648
5,SVC,0.915584,0.84507,0.967742,0.902256,0.977034,0.935065,0.932584,0.954023,0.943182,0.977526
4,KNN,0.857143,0.80303,0.854839,0.828125,0.957661,0.844156,0.870588,0.850575,0.860465,0.949305
0,LogisticRegression,0.831169,0.730769,0.919355,0.814286,0.911115,0.824675,0.833333,0.862069,0.847458,0.903929


In [None]:
top_metric = "Val_F1"  # you could also choose "Val_ROC_AUC" if binary

top3_clf = clf_results_df.sort_values(top_metric, ascending=False).head(3)["Model"].tolist()
print("Top 3 classification models:", top3_clf)

voting_clf = VotingClassifier(
    estimators=[(name, clf_models[name]) for name in top3_clf],
    voting="soft"
)

voting_clf.fit(X_train_s, y_clf_train)

y_val_pred_v = voting_clf.predict(X_val_s)
y_test_pred_v = voting_clf.predict(X_test_s)

y_val_prob_v = voting_clf.predict_proba(X_val_s)[:, 1] if binary else None
y_test_prob_v = voting_clf.predict_proba(X_test_s)[:, 1] if binary else None

voting_clf_metrics = {
    "Model": "VotingClassifier",
    "Val_Accuracy": accuracy_score(y_clf_val, y_val_pred_v),
    "Val_Precision": precision_score(y_clf_val, y_val_pred_v, average="binary" if binary else "macro"),
    "Val_Recall": recall_score(y_clf_val, y_val_pred_v, average="binary" if binary else "macro"),
    "Val_F1": f1_score(y_clf_val, y_val_pred_v, average="binary" if binary else "macro"),
    "Val_ROC_AUC": roc_auc_score(y_clf_val, y_val_prob_v) if binary else np.nan,
    "Test_Accuracy": accuracy_score(y_clf_test, y_test_pred_v),
    "Test_Precision": precision_score(y_clf_test, y_test_pred_v, average="binary" if binary else "macro"),
    "Test_Recall": recall_score(y_clf_test, y_test_pred_v, average="binary" if binary else "macro"),
    "Test_F1": f1_score(y_clf_test, y_test_pred_v, average="binary" if binary else "macro"),
    "Test_ROC_AUC": roc_auc_score(y_clf_test, y_test_prob_v) if binary else np.nan
}

clf_results_df = pd.concat(
    [clf_results_df, pd.DataFrame([voting_clf_metrics])],
    ignore_index=True
)

print("With VotingClassifier added:")
display(clf_results_df.sort_values("Val_F1", ascending=False))


Top 3 classification models: ['DecisionTree', 'RandomForest', 'GradientBoosting']
With VotingClassifier added:


Unnamed: 0,Model,Val_Accuracy,Val_Precision,Val_Recall,Val_F1,Val_ROC_AUC,Test_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_ROC_AUC
1,DecisionTree,0.987013,1.0,0.967742,0.983607,0.983871,0.974026,1.0,0.954023,0.976471,0.977011
6,VotingClassifier,0.987013,1.0,0.967742,0.983607,0.994039,0.974026,1.0,0.954023,0.976471,0.997941
2,RandomForest,0.974026,1.0,0.935484,0.966667,0.999299,0.967532,1.0,0.942529,0.970414,1.0
3,GradientBoosting,0.967532,0.952381,0.967742,0.96,0.981417,0.961039,0.987952,0.942529,0.964706,0.987648
5,SVC,0.915584,0.84507,0.967742,0.902256,0.977034,0.935065,0.932584,0.954023,0.943182,0.977526
4,KNN,0.857143,0.80303,0.854839,0.828125,0.957661,0.844156,0.870588,0.850575,0.860465,0.949305
0,LogisticRegression,0.831169,0.730769,0.919355,0.814286,0.911115,0.824675,0.833333,0.862069,0.847458,0.903929


In [None]:
# Use F1 scores as performance measure for weights
val_f1_top3 = clf_results_df.set_index("Model").loc[top3_clf, "Val_F1"]

scores = np.exp(val_f1_top3)
weights_clf = scores / scores.sum()
print("Bayesian classification weights:")
display(weights_clf)

# Collect probabilities
val_probs = np.array([
    clf_models[name].predict_proba(X_val_s)[:, 1]
    for name in top3_clf
])
test_probs = np.array([
    clf_models[name].predict_proba(X_test_s)[:, 1]
    for name in top3_clf
])

w = weights_clf.values.reshape(-1, 1)

y_val_prob_bayes = (w * val_probs).sum(axis=0)
y_test_prob_bayes = (w * test_probs).sum(axis=0)

y_val_pred_bayes = (y_val_prob_bayes >= 0.5).astype(int)
y_test_pred_bayes = (y_test_prob_bayes >= 0.5).astype(int)

bayes_clf_metrics = {
    "Model": "BayesianEnsemble_Clf",
    "Val_Accuracy": accuracy_score(y_clf_val, y_val_pred_bayes),
    "Val_Precision": precision_score(y_clf_val, y_val_pred_bayes, average="binary"),
    "Val_Recall": recall_score(y_clf_val, y_val_pred_bayes, average="binary"),
    "Val_F1": f1_score(y_clf_val, y_val_pred_bayes, average="binary"),
    "Val_ROC_AUC": roc_auc_score(y_clf_val, y_val_prob_bayes),
    "Test_Accuracy": accuracy_score(y_clf_test, y_test_pred_bayes),
    "Test_Precision": precision_score(y_clf_test, y_test_pred_bayes, average="binary"),
    "Test_Recall": recall_score(y_clf_test, y_test_pred_bayes, average="binary"),
    "Test_F1": f1_score(y_clf_test, y_test_pred_bayes, average="binary"),
    "Test_ROC_AUC": roc_auc_score(y_clf_test, y_test_prob_bayes)
}

clf_results_df = pd.concat(
    [clf_results_df, pd.DataFrame([bayes_clf_metrics])],
    ignore_index=True
)

print("Final classification results (including ensembles):")
display(clf_results_df.sort_values("Val_F1", ascending=False))


Bayesian classification weights:


Model
DecisionTree        0.337852
RandomForest        0.332177
GradientBoosting    0.329970
Name: Val_F1, dtype: float64

Final classification results (including ensembles):


Unnamed: 0,Model,Val_Accuracy,Val_Precision,Val_Recall,Val_F1,Val_ROC_AUC,Test_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_ROC_AUC
1,DecisionTree,0.987013,1.0,0.967742,0.983607,0.983871,0.974026,1.0,0.954023,0.976471,0.977011
7,BayesianEnsemble_Clf,0.987013,1.0,0.967742,0.983607,0.994039,0.974026,1.0,0.954023,0.976471,0.997941
6,VotingClassifier,0.987013,1.0,0.967742,0.983607,0.994039,0.974026,1.0,0.954023,0.976471,0.997941
2,RandomForest,0.974026,1.0,0.935484,0.966667,0.999299,0.967532,1.0,0.942529,0.970414,1.0
3,GradientBoosting,0.967532,0.952381,0.967742,0.96,0.981417,0.961039,0.987952,0.942529,0.964706,0.987648
5,SVC,0.915584,0.84507,0.967742,0.902256,0.977034,0.935065,0.932584,0.954023,0.943182,0.977526
4,KNN,0.857143,0.80303,0.854839,0.828125,0.957661,0.844156,0.870588,0.850575,0.860465,0.949305
0,LogisticRegression,0.831169,0.730769,0.919355,0.814286,0.911115,0.824675,0.833333,0.862069,0.847458,0.903929


In [18]:
# Clean up / round regression metrics
reg_table = (
    reg_results_df
    .copy()
    .round(4)  # fewer decimals = nicer table
    .sort_values("Val_MSE")
)

# Clean up / round classification metrics
clf_table = (
    clf_results_df
    .copy()
    .round(4)
    .sort_values("Val_F1", ascending=False)
)

display(reg_table)
display(clf_table)


Unnamed: 0,Model,Val_MAE,Val_MSE,Val_R2,Test_MAE,Test_MSE,Test_R2
0,LinearRegression,6.9822,193.0368,0.9535,5.9924,123.9504,0.9527
5,BayesianEnsemble_Reg,6.9822,193.0368,0.9535,5.9924,123.9504,0.9527
3,GradientBoostingRegressor,4.7413,613.6124,0.8522,3.5031,311.0892,0.8812
2,RandomForestRegressor,4.1803,751.8435,0.8189,2.4214,375.4808,0.8566
1,DecisionTreeRegressor,3.7208,914.1234,0.7798,1.9286,457.2273,0.8254
4,KNNRegressor,23.3351,1484.3049,0.6424,20.3026,1024.6948,0.6088


Unnamed: 0,Model,Val_Accuracy,Val_Precision,Val_Recall,Val_F1,Val_ROC_AUC,Test_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_ROC_AUC
1,DecisionTree,0.987,1.0,0.9677,0.9836,0.9839,0.974,1.0,0.954,0.9765,0.977
2,RandomForest,0.974,1.0,0.9355,0.9667,0.9993,0.9675,1.0,0.9425,0.9704,1.0
3,GradientBoosting,0.9675,0.9524,0.9677,0.96,0.9814,0.961,0.988,0.9425,0.9647,0.9876
5,SVC,0.9156,0.8451,0.9677,0.9023,0.977,0.9351,0.9326,0.954,0.9432,0.9775
4,KNN,0.8571,0.803,0.8548,0.8281,0.9577,0.8442,0.8706,0.8506,0.8605,0.9493
0,LogisticRegression,0.8312,0.7308,0.9194,0.8143,0.9111,0.8247,0.8333,0.8621,0.8475,0.9039


In [19]:
reg_table.to_html("regression_results.html", index=False)
clf_table.to_html("classification_results.html", index=False)


In [20]:
reg_results_df.head()


Unnamed: 0,Model,Val_MAE,Val_MSE,Val_R2,Test_MAE,Test_MSE,Test_R2
0,LinearRegression,6.982202,193.036769,0.953499,5.99239,123.950448,0.952678
1,DecisionTreeRegressor,3.720779,914.123377,0.779794,1.928571,457.227273,0.82544
2,RandomForestRegressor,4.180325,751.843538,0.818886,2.421364,375.480768,0.856649
3,GradientBoostingRegressor,4.74128,613.612434,0.852185,3.503054,311.089186,0.881232
4,KNNRegressor,23.335065,1484.304935,0.642442,20.302597,1024.694805,0.608792


In [21]:
reg_table = (
    reg_results_df
    .copy()
    .round(4)
    .sort_values("Val_MSE")
)
display(reg_table)


Unnamed: 0,Model,Val_MAE,Val_MSE,Val_R2,Test_MAE,Test_MSE,Test_R2
0,LinearRegression,6.9822,193.0368,0.9535,5.9924,123.9504,0.9527
5,BayesianEnsemble_Reg,6.9822,193.0368,0.9535,5.9924,123.9504,0.9527
3,GradientBoostingRegressor,4.7413,613.6124,0.8522,3.5031,311.0892,0.8812
2,RandomForestRegressor,4.1803,751.8435,0.8189,2.4214,375.4808,0.8566
1,DecisionTreeRegressor,3.7208,914.1234,0.7798,1.9286,457.2273,0.8254
4,KNNRegressor,23.3351,1484.3049,0.6424,20.3026,1024.6948,0.6088
