# ** Leave-One-Out Cross-Validation **
<div style="margin-top:10px; text-align:justify";>
This script performs nested Leave-One-Out Cross-Validation (LOOCV) to evaluate an ElasticNet regression model. It automatically tunes the ElasticNet hyperparameters (alpha and l1_ratio) within each fold to provide robust performance estimation.

In [None]:
element_concentrations = {
    'Al': [..........................], 👈#Input element concentrations here
    'Cu': [..........................],
    'Zn': [..........................],
    'Mn': [..........................],
    'Fe': [..........................],
    'Mg': [..........................],
    'Si': [..........................],
    'Ni': [..........................]
}

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import LeaveOneOut, GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import warnings
from sklearn.exceptions import ConvergenceWarning

# Suppress convergence warnings
warnings.filterwarnings("ignore", category=ConvergenceWarning)

def nested_loocv_elasticnet(
    element,
    emission_lines,
    peak_max_df,
    element_concentrations,  # Manual target values
    sample_colors_fn=None
):
    print("🚀 Starting nested LOOCV for ElasticNet")

    df = peak_max_df[element]
    X = df[emission_lines].values
    y = np.array(y_concentration, dtype=float)
    sample_names = df.index.tolist()

    print("✅ Data shape:", X.shape, "| y:", y.shape)

    param_grid = {
        "elasticnet__alpha": np.logspace(-4, 1, 10),
        "elasticnet__l1_ratio": [0.1, 0.3, 0.5, 0.7, 0.9, 1.0]
    }

    loo = LeaveOneOut()
    y_pred_all = []
    y_true_all = []
    all_records = []

    for fold, (train_idx, test_idx) in enumerate(loo.split(X)):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        test_sample_name = sample_names[test_idx[0]]
        train_sample_names = [sample_names[i] for i in train_idx]

        print(f"\n🔁 Fold {fold + 1}/{len(X)}")
        print(f"🔹 Left-Out Sample: {test_sample_name}")
        print(f"🔸 Training Samples: {train_sample_names}")

        pipe = make_pipeline(StandardScaler(), ElasticNet(max_iter=100000))
        search = GridSearchCV(pipe, param_grid, scoring='neg_mean_squared_error', cv=len(X_train))
        search.fit(X_train, y_train)

        best_model = search.best_estimator_
        y_pred = best_model.predict(X_test)[0]

        y_pred_all.append(y_pred)
        y_true_all.append(y_test[0])

        all_records.append({
            "Left-Out Sample": test_sample_name,
            "Training Samples": train_sample_names,
            "True": round(float(y_test[0]), 4),
            "Predicted": round(float(y_pred), 4),
            "Abs Error": round(abs(y_test[0] - y_pred), 4),
            "Best Alpha": round(search.best_params_['elasticnet__alpha'], 6),
            "Best L1 Ratio": round(search.best_params_['elasticnet__l1_ratio'], 2)
        })

    y_pred_all = np.array(y_pred_all)
    y_true_all = np.array(y_true_all)

    r2 = r2_score(y_true_all, y_pred_all)
    rmse = mean_squared_error(y_true_all, y_pred_all, squared=False)
    mae = mean_absolute_error(y_true_all, y_pred_all)

    print(f"\n📊 Final Evaluation for ElasticNet on {element}:")
    print(f"R²:   {r2:.3f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"MAE:  {mae:.4f}")

    results_df = pd.DataFrame(all_records)
    return results_df, y_true_all, y_pred_all

cu_concentration = [1.315, 4.653, 1.36, 0.0908, 0.0931, 2.08, 2.46, 1.77]

results_df, y_true, y_pred = nested_loocv_elasticnet(
    element="Cu",
    emission_lines=["Cu 324.75 nm", "Cu 327.40 nm", "Cu 510.55 nm"],
    peak_max_df=peak_max_df,
    y_concentration=cu_concentration
)

print("\n✅ Full Fold Results:")
results_df


# ** Second Option **

In [None]:
def perform_elasticnet_loocv_full_report(
    element,
    emission_lines,
    peak_max_df,
    element_concentrations=None,
    assign_sample_colors=None,
    cap_width=0.006,
    return_model=False,
    alpha=1.0,
    l1_ratio=0.5  # balance between L1 (lasso) and L2 (ridge)
):
    import numpy as np
    import pandas as pd
    from sklearn.linear_model import ElasticNet
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
    from sklearn.model_selection import LeaveOneOut

    if element_concentrations is None or element not in element_concentrations:
        raise ValueError(f"Missing concentration values for element '{element}'.")

    df = peak_max_df[element]
    X = df[emission_lines].values
    y = np.array(element_concentrations[element], dtype=float)
    sample_labels = df.index.tolist()

    loo = LeaveOneOut()

    y_true_cv = []
    y_pred_cv = []
    detailed_records = []

    print(f"LOOCV performance for element '{element}' with ElasticNet (alpha={alpha}, l1_ratio={l1_ratio}):")

    for train_idx, test_idx in loo.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        test_sample = sample_labels[test_idx[0]]
        train_samples = [sample_labels[i] for i in train_idx]

        model = make_pipeline(StandardScaler(), ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=10000))
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)[0]
        y_true = y_test[0]

        y_true_cv.append(y_true)
        y_pred_cv.append(y_pred)

        abs_error = abs(y_true - y_pred)
        rel_error = abs_error / y_true * 100 if y_true != 0 else np.nan

        detailed_records.append({
            "Left-Out Sample": test_sample,
            "Training Samples": train_samples,
            "True": round(y_true, 4),
            "Predicted": round(y_pred, 4),
            "Abs Error": round(abs_error, 4),
            "Rel Error (%)": round(rel_error, 2)
        })

    r2 = r2_score(y_true_cv, y_pred_cv)
    rmse = mean_squared_error(y_true_cv, y_pred_cv, squared=False)
    mae = mean_absolute_error(y_true_cv, y_pred_cv)

    print(f"RMSECV={rmse:.4f}, R²={r2:.4f}, MAE={mae:.4f}")

    summary_df = pd.DataFrame([{
        "RMSECV": rmse,
        "R2": r2,
        "MAE": mae
    }])

    details_df = pd.DataFrame(detailed_records)

    final_model = None
    if return_model:
        final_model = make_pipeline(StandardScaler(), ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=10000))
        final_model.fit(X, y)

    if return_model:
        return summary_df, details_df, final_model
    else:
        return summary_df, details_df

summary_df, details_df = perform_elasticnet_loocv_full_report(
    element="Mn",
    emission_lines=["Mn 403.08 nm", "Mn 403.31 nm"],
    peak_max_df=peak_max_df,
    element_concentrations=element_concentrations,
    alpha=0.1,
    l1_ratio=0.7,
    return_model=False
)

print(summary_df)
print(details_df)
