In [None]:
#overfitting logic
for model_type, best_model in best_models.items():
    # Perform cross-validation and collect fold-wise scores
    scores = cross_val_score(best_model, X_train, y_train, cv=5, scoring="neg_root_mean_squared_error", n_jobs=-1)
    scores = -scores  # Convert negative RMSE to positive
    
    print(f"\n🔹 {model_type} - Cross-Validation Scores for each fold:")
    for fold, score in enumerate(scores, 1):
        print(f"   Fold {fold}: {score:.4f}")
    
    print(f"   Mean RMSE: {np.mean(scores):.4f}")
    print(f"   Std Dev RMSE: {np.std(scores):.4f}")
    
    y_train_pred = best_model.predict(X_train)
    y_valid_pred = best_model.predict(X_valid)
    train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
    valid_rmse = np.sqrt(mean_squared_error(y_valid, y_valid_pred))
    model_rmse[model_type] = valid_rmse
    
    print(f"\n🔹 {model_type} - RMSE:")
    print(f"   Training: {train_rmse:.4f}")
    print(f"   Validation: {valid_rmse:.4f}")
    
    if train_rmse < valid_rmse - 0.5:
        print(f"   ⚠️ Overfitting detected!")

print("\n✅ Model evaluation completed!")


🔹 RandomForest - Cross-Validation Scores for each fold:
   Fold 1: 3.6087
   Fold 2: 3.6142
   Fold 3: 7.9325
   Fold 4: 3.5038
   Fold 5: 2.8360
   Mean RMSE: 4.2990
   Std Dev RMSE: 1.8396

🔹 RandomForest - RMSE:
   Training: 2.2522
   Validation: 3.3317
   ⚠️ Overfitting detected!

🔹 XGBoost - Cross-Validation Scores for each fold:
   Fold 1: 2.6608
   Fold 2: 2.9765
   Fold 3: 7.1679
   Fold 4: 3.0604
   Fold 5: 3.1611
   Mean RMSE: 3.8053
   Std Dev RMSE: 1.6896

🔹 XGBoost - RMSE:
   Training: 0.8147
   Validation: 3.5599
   ⚠️ Overfitting detected!

🔹 LightGBM - Cross-Validation Scores for each fold:
   Fold 1: 3.8977
   Fold 2: 3.6099
   Fold 3: 9.5952
   Fold 4: 3.7956
   Fold 5: 3.5085
   Mean RMSE: 4.8814
   Std Dev RMSE: 2.3609

🔹 LightGBM - RMSE:
   Training: 2.2944
   Validation: 3.5820
   ⚠️ Overfitting detected!

🔹 GradientBoosting - Cross-Validation Scores for each fold:
   Fold 1: 3.0181
   Fold 2: 3.4260
   Fold 3: 11.7643
   Fold 4: 3.0790
   Fold 5: 2.5210
   Mean 

In [None]:
#best till now 1.8
import pandas as pd
import numpy as np
import joblib
import optuna
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error

# Load datasets
train_df = pd.read_csv(r"C:\Users\Ayush Pandita\OneDrive\Desktop\YR 3 SEM 6\AOMLM1\M1\train.csv")
test_df = pd.read_csv(r"C:\Users\Ayush Pandita\OneDrive\Desktop\YR 3 SEM 6\AOMLM1\M1\test.csv")

# Drop 'uid' as it's just an identifier
y = train_df.pop("output_electricity_generation")
train_df.drop(columns=["uid"], inplace=True)
test_uid = test_df.pop("uid")

# Encode categorical column 'day'
train_df["day"] = train_df["day"].fillna("Unknown")
test_df["day"] = test_df["day"].fillna("Unknown")
le = LabelEncoder()
train_df["day"] = le.fit_transform(train_df["day"])
test_df["day"] = le.transform(test_df["day"])

# Handling missing values using median imputation
imputer = SimpleImputer(strategy="median")
train_df = pd.DataFrame(imputer.fit_transform(train_df), columns=train_df.columns)
test_df = pd.DataFrame(imputer.transform(test_df), columns=test_df.columns)

# Apply feature scaling
scaler = StandardScaler()
train_df = pd.DataFrame(scaler.fit_transform(train_df), columns=train_df.columns)
test_df = pd.DataFrame(scaler.transform(test_df), columns=test_df.columns)

# Split training data
X_train, X_valid, y_train, y_valid = train_test_split(train_df, y, test_size=0.3, random_state=42)

# Define model mappings
model_dict = {
    "RandomForest": RandomForestRegressor,
    "XGBoost": XGBRegressor,
    "LightGBM": LGBMRegressor,
    "GradientBoosting": GradientBoostingRegressor,
    "CatBoost": CatBoostRegressor,
}

# Define hyperparameter optimization using Optuna
def objective(trial, model_type):
    if model_type == "RandomForest":
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 100, 1500, step=100),
            "max_depth": trial.suggest_int("max_depth", 5, 50),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 5),
            "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2"]),
        }
    elif model_type in ["XGBoost", "LightGBM"]:
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=50),
            "max_depth": trial.suggest_int("max_depth", 3, 15),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
            "subsample": trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        }
    elif model_type == "GradientBoosting":
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=50),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
            "max_depth": trial.suggest_int("max_depth", 3, 15),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
        }
    elif model_type == "CatBoost":
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=50),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
            "depth": trial.suggest_int("depth", 4, 12),
            "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1.0, 10.0),
        }

    model_class = model_dict[model_type]
    model = model_class(**params, random_state=42)
    
    if model_type == "CatBoost":
        model.fit(X_train, y_train, verbose=0)
    else:
        model.fit(X_train, y_train)

    y_pred = model.predict(X_valid)
    rmse = np.sqrt(mean_squared_error(y_valid, y_pred))
    return rmse

# Optimize models
best_params = {}
best_models = {}
model_rmse = {}

for model_type in model_dict.keys():
    print(f"\n🔍 Optimizing {model_type}...")
    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective(trial, model_type), n_trials=10)  # Adjust trials as needed

    best_params[model_type] = study.best_params
    print(f"✅ Best params for {model_type}: {study.best_params}")

    # Train best model with optimized parameters
    best_model = model_dict[model_type](**study.best_params, random_state=42)
    if model_type == "CatBoost":
        best_model.fit(X_train, y_train, verbose=0)
    else:
        best_model.fit(X_train, y_train)

    best_models[model_type] = best_model

# Model evaluation
for model_type, best_model in best_models.items():
    # Perform cross-validation and collect fold-wise scores
    scores = cross_val_score(best_model, X_train, y_train, cv=5, scoring="neg_root_mean_squared_error", n_jobs=-1)
    scores = -scores  # Convert negative RMSE to positive
    
    print(f"\n🔹 {model_type} - Cross-Validation Scores for each fold:")
    for fold, score in enumerate(scores, 1):
        print(f"   Fold {fold}: {score:.4f}")
    
    print(f"   Mean RMSE: {np.mean(scores):.4f}")
    print(f"   Std Dev RMSE: {np.std(scores):.4f}")
    
    y_train_pred = best_model.predict(X_train)
    y_valid_pred = best_model.predict(X_valid)
    train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
    valid_rmse = np.sqrt(mean_squared_error(y_valid, y_valid_pred))
    model_rmse[model_type] = valid_rmse
    
    print(f"\n🔹 {model_type} - RMSE:")
    print(f"   Training: {train_rmse:.4f}")
    print(f"   Validation: {valid_rmse:.4f}")

    # Save best models
    joblib.dump(best_model, f"{model_type}_best_model.pkl")

# Select best model based on validation RMSE
best_model_name = min(model_rmse, key=model_rmse.get)
print(f"\n🏆 Best Model: {best_model_name} with RMSE {model_rmse[best_model_name]:.4f}")

# Generate submission for best model
predictions = best_models[best_model_name].predict(test_df)
submission = pd.DataFrame({'uid': test_uid, 'output_electricity_generation': predictions})
submission.to_csv(f"submission_{best_model_name}.csv", index=False)

print("\n✅ Model training and evaluation completed!")


[I 2025-02-09 12:40:44,937] A new study created in memory with name: no-name-8b6395ca-3f3a-4c31-a1ff-dac50f737b9f



🔍 Optimizing RandomForest...


[I 2025-02-09 12:41:37,021] Trial 0 finished with value: 3.5114918586617017 and parameters: {'n_estimators': 200, 'max_depth': 48, 'min_samples_split': 7, 'min_samples_leaf': 5, 'max_features': 'log2'}. Best is trial 0 with value: 3.5114918586617017.
[I 2025-02-09 12:45:27,351] Trial 1 finished with value: 3.1790434465681545 and parameters: {'n_estimators': 900, 'max_depth': 44, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt'}. Best is trial 1 with value: 3.1790434465681545.
[I 2025-02-09 12:48:16,241] Trial 2 finished with value: 3.7441918102709684 and parameters: {'n_estimators': 1300, 'max_depth': 12, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_features': 'log2'}. Best is trial 1 with value: 3.1790434465681545.
[I 2025-02-09 12:51:16,884] Trial 3 finished with value: 3.272393535629359 and parameters: {'n_estimators': 1100, 'max_depth': 16, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'sqrt'}. Best is trial 1 with value: 3.1790434465681

✅ Best params for RandomForest: {'n_estimators': 500, 'max_depth': 41, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_features': 'log2'}


[I 2025-02-09 13:08:35,364] A new study created in memory with name: no-name-d9e8aebb-9666-467b-817b-16d4ddda161a



🔍 Optimizing XGBoost...


[I 2025-02-09 13:08:38,970] Trial 0 finished with value: 4.151409979408958 and parameters: {'n_estimators': 350, 'max_depth': 9, 'learning_rate': 0.013088745642507327, 'subsample': 0.9795861925304128, 'colsample_bytree': 0.6488622273295308}. Best is trial 0 with value: 4.151409979408958.
[I 2025-02-09 13:09:10,882] Trial 1 finished with value: 2.951173690336279 and parameters: {'n_estimators': 700, 'max_depth': 13, 'learning_rate': 0.046918369164281395, 'subsample': 0.7332403069345503, 'colsample_bytree': 0.8003748732457715}. Best is trial 1 with value: 2.951173690336279.
[I 2025-02-09 13:09:15,517] Trial 2 finished with value: 3.5681339843139077 and parameters: {'n_estimators': 950, 'max_depth': 5, 'learning_rate': 0.08565178392346952, 'subsample': 0.5215497970057072, 'colsample_bytree': 0.6841127377529034}. Best is trial 1 with value: 2.951173690336279.
[I 2025-02-09 13:09:22,915] Trial 3 finished with value: 4.105363380934197 and parameters: {'n_estimators': 1000, 'max_depth': 7, 'l

✅ Best params for XGBoost: {'n_estimators': 700, 'max_depth': 13, 'learning_rate': 0.046918369164281395, 'subsample': 0.7332403069345503, 'colsample_bytree': 0.8003748732457715}


[I 2025-02-09 13:10:32,405] A new study created in memory with name: no-name-3d61c8c7-386a-4dfa-98d7-b7efaa29527d



🔍 Optimizing LightGBM...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002054 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2385
[LightGBM] [Info] Number of data points in the train set: 35280, number of used features: 12
[LightGBM] [Info] Start training from score 832.333268


[I 2025-02-09 13:10:33,189] Trial 0 finished with value: 10.49538387618337 and parameters: {'n_estimators': 250, 'max_depth': 9, 'learning_rate': 0.011805199235543707, 'subsample': 0.6496422719493807, 'colsample_bytree': 0.5785583602440723}. Best is trial 0 with value: 10.49538387618337.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003463 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2385
[LightGBM] [Info] Number of data points in the train set: 35280, number of used features: 12
[LightGBM] [Info] Start training from score 832.333268


[I 2025-02-09 13:10:34,041] Trial 1 finished with value: 3.631580298316197 and parameters: {'n_estimators': 350, 'max_depth': 9, 'learning_rate': 0.1278026155673484, 'subsample': 0.5312823695541172, 'colsample_bytree': 0.7041171926669108}. Best is trial 1 with value: 3.631580298316197.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000920 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2385
[LightGBM] [Info] Number of data points in the train set: 35280, number of used features: 12
[LightGBM] [Info] Start training from score 832.333268


[I 2025-02-09 13:10:35,739] Trial 2 finished with value: 3.4532766421648566 and parameters: {'n_estimators': 950, 'max_depth': 4, 'learning_rate': 0.077001323068476, 'subsample': 0.7272715788734312, 'colsample_bytree': 0.9649642267230222}. Best is trial 2 with value: 3.4532766421648566.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002113 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2385
[LightGBM] [Info] Number of data points in the train set: 35280, number of used features: 12
[LightGBM] [Info] Start training from score 832.333268


[I 2025-02-09 13:10:36,432] Trial 3 finished with value: 3.6404985922637625 and parameters: {'n_estimators': 300, 'max_depth': 11, 'learning_rate': 0.060158544427364374, 'subsample': 0.9191359817353719, 'colsample_bytree': 0.6325772896570988}. Best is trial 2 with value: 3.4532766421648566.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002423 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2385
[LightGBM] [Info] Number of data points in the train set: 35280, number of used features: 12
[LightGBM] [Info] Start training from score 832.333268


[I 2025-02-09 13:10:37,617] Trial 4 finished with value: 5.125616878653271 and parameters: {'n_estimators': 400, 'max_depth': 15, 'learning_rate': 0.011091131347591945, 'subsample': 0.8378899492395326, 'colsample_bytree': 0.7365510158911401}. Best is trial 2 with value: 3.4532766421648566.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001924 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2385
[LightGBM] [Info] Number of data points in the train set: 35280, number of used features: 12
[LightGBM] [Info] Start training from score 832.333268


[I 2025-02-09 13:10:38,855] Trial 5 finished with value: 3.8043234330362568 and parameters: {'n_estimators': 650, 'max_depth': 10, 'learning_rate': 0.14514852547759488, 'subsample': 0.8954368123897141, 'colsample_bytree': 0.5494220832628092}. Best is trial 2 with value: 3.4532766421648566.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003198 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2385
[LightGBM] [Info] Number of data points in the train set: 35280, number of used features: 12
[LightGBM] [Info] Start training from score 832.333268


[I 2025-02-09 13:10:39,964] Trial 6 finished with value: 3.268614701540949 and parameters: {'n_estimators': 500, 'max_depth': 11, 'learning_rate': 0.11464977643910952, 'subsample': 0.5477839034073746, 'colsample_bytree': 0.7171640000215329}. Best is trial 6 with value: 3.268614701540949.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002124 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2385
[LightGBM] [Info] Number of data points in the train set: 35280, number of used features: 12
[LightGBM] [Info] Start training from score 832.333268


[I 2025-02-09 13:10:42,521] Trial 7 finished with value: 4.097816555545008 and parameters: {'n_estimators': 950, 'max_depth': 8, 'learning_rate': 0.011457942337368134, 'subsample': 0.7439036436816231, 'colsample_bytree': 0.9014753555955672}. Best is trial 6 with value: 3.268614701540949.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003491 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2385
[LightGBM] [Info] Number of data points in the train set: 35280, number of used features: 12
[LightGBM] [Info] Start training from score 832.333268


[I 2025-02-09 13:10:44,685] Trial 8 finished with value: 3.179164803704245 and parameters: {'n_estimators': 1000, 'max_depth': 12, 'learning_rate': 0.20568752568338688, 'subsample': 0.5118196188442821, 'colsample_bytree': 0.8217539207569675}. Best is trial 8 with value: 3.179164803704245.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002328 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2385
[LightGBM] [Info] Number of data points in the train set: 35280, number of used features: 12
[LightGBM] [Info] Start training from score 832.333268


[I 2025-02-09 13:10:46,177] Trial 9 finished with value: 4.468761211904513 and parameters: {'n_estimators': 600, 'max_depth': 5, 'learning_rate': 0.025074259700154203, 'subsample': 0.7926627482581106, 'colsample_bytree': 0.9180099951043781}. Best is trial 8 with value: 3.179164803704245.


✅ Best params for LightGBM: {'n_estimators': 1000, 'max_depth': 12, 'learning_rate': 0.20568752568338688, 'subsample': 0.5118196188442821, 'colsample_bytree': 0.8217539207569675}
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002549 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2385
[LightGBM] [Info] Number of data points in the train set: 35280, number of used features: 12
[LightGBM] [Info] Start training from score 832.333268


[I 2025-02-09 13:10:48,083] A new study created in memory with name: no-name-fcd48200-1c07-4761-8f3c-a3edd7df9a56



🔍 Optimizing GradientBoosting...


[I 2025-02-09 13:15:14,294] Trial 0 finished with value: 3.127142969859859 and parameters: {'n_estimators': 500, 'learning_rate': 0.027522794249439302, 'max_depth': 7, 'min_samples_split': 5}. Best is trial 0 with value: 3.127142969859859.
[I 2025-02-09 13:23:35,844] Trial 1 finished with value: 2.171810332394153 and parameters: {'n_estimators': 850, 'learning_rate': 0.19524239703981452, 'max_depth': 9, 'min_samples_split': 5}. Best is trial 1 with value: 2.171810332394153.
[I 2025-02-09 13:28:08,875] Trial 2 finished with value: 3.9097983724599144 and parameters: {'n_estimators': 600, 'learning_rate': 0.08603658430842517, 'max_depth': 7, 'min_samples_split': 3}. Best is trial 1 with value: 2.171810332394153.
[I 2025-02-09 13:29:29,007] Trial 3 finished with value: 4.58962598805335 and parameters: {'n_estimators': 300, 'learning_rate': 0.04907146495345831, 'max_depth': 4, 'min_samples_split': 9}. Best is trial 1 with value: 2.171810332394153.
[I 2025-02-09 13:32:14,668] Trial 4 finishe

✅ Best params for GradientBoosting: {'n_estimators': 1000, 'learning_rate': 0.01463774638712544, 'max_depth': 14, 'min_samples_split': 6}


[I 2025-02-09 14:29:30,625] A new study created in memory with name: no-name-29bec701-4e95-4d57-8ffa-9b2ba07d8887



🔍 Optimizing CatBoost...


[I 2025-02-09 14:29:34,982] Trial 0 finished with value: 5.091010470106603 and parameters: {'n_estimators': 250, 'learning_rate': 0.14025053311151797, 'depth': 8, 'l2_leaf_reg': 9.368038203046853}. Best is trial 0 with value: 5.091010470106603.
[I 2025-02-09 14:29:41,431] Trial 1 finished with value: 6.851753611781016 and parameters: {'n_estimators': 850, 'learning_rate': 0.013825592257571217, 'depth': 5, 'l2_leaf_reg': 7.897817288241968}. Best is trial 0 with value: 5.091010470106603.
[I 2025-02-09 14:29:51,414] Trial 2 finished with value: 6.273354785911684 and parameters: {'n_estimators': 650, 'learning_rate': 0.011313995684151836, 'depth': 8, 'l2_leaf_reg': 2.8141480125917204}. Best is trial 0 with value: 5.091010470106603.
[I 2025-02-09 14:30:09,310] Trial 3 finished with value: 4.312262688541774 and parameters: {'n_estimators': 800, 'learning_rate': 0.03758300232334099, 'depth': 9, 'l2_leaf_reg': 3.9883436520073596}. Best is trial 3 with value: 4.312262688541774.
[I 2025-02-09 14

✅ Best params for CatBoost: {'n_estimators': 800, 'learning_rate': 0.03758300232334099, 'depth': 9, 'l2_leaf_reg': 3.9883436520073596}

🔹 RandomForest - Cross-Validation Scores for each fold:
   Fold 1: 3.2016
   Fold 2: 3.0721
   Fold 3: 8.0480
   Fold 4: 2.9902
   Fold 5: 2.4806
   Mean RMSE: 3.9585
   Std Dev RMSE: 2.0593

🔹 RandomForest - RMSE:
   Training: 1.5224
   Validation: 2.8289

🔹 XGBoost - Cross-Validation Scores for each fold:
   Fold 1: 2.4055
   Fold 2: 3.1403
   Fold 3: 8.0603
   Fold 4: 3.1599
   Fold 5: 2.8123
   Mean RMSE: 3.9156
   Std Dev RMSE: 2.0904

🔹 XGBoost - RMSE:
   Training: 0.1521
   Validation: 2.9512

🔹 LightGBM - Cross-Validation Scores for each fold:
   Fold 1: 3.3853
   Fold 2: 3.6500
   Fold 3: 11.1793
   Fold 4: 3.6330
   Fold 5: 3.0276
   Mean RMSE: 4.9750
   Std Dev RMSE: 3.1103

🔹 LightGBM - RMSE:
   Training: 0.8599
   Validation: 3.1792

🔹 GradientBoosting - Cross-Validation Scores for each fold:
   Fold 1: 2.0243
   Fold 2: 5.0594
   Fold 3: 