# ** Leave-One-Out Cross-Validation **

This script performs Lasso regression on spectral data to predict elemental concentrations. It uses Leave-One-Out Cross-Validation (LOOCV) to evaluate model performance. The dataset is split so that each sample takes a turn being the "left-out" test sample, while the remaining samples form the training set. This process repeats for every sample in the dataset.

In [None]:
element_concentrations = {
    'Al': [..........................], 👈#Input element concentrations here
    'Cu': [..........................],
    'Zn': [..........................],
    'Mn': [..........................],
    'Fe': [..........................],
    'Mg': [..........................],
    'Si': [..........................],
    'Ni': [..........................]
}

In [None]:
def perform_lasso_loocv_full_report(
    element,
    features,
    peak_max_df,
    element_concentrations=None,
    alpha=0.01,
    return_model=False
):
    import numpy as np
    import pandas as pd
    from sklearn.linear_model import Lasso
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
    from sklearn.model_selection import LeaveOneOut

    if element_concentrations is None or element not in element_concentrations:
        raise ValueError(f"Missing concentration values for element '{element}'.")

    df = peak_max_df[element]
    X = df[features].values
    y = np.array(element_concentrations[element], dtype=float)
    sample_labels = df.index.tolist()

    loo = LeaveOneOut()
    y_true_cv = []
    y_pred_cv = []
    detailed_records = []

    print(f"LOOCV performance for element '{element}' with Lasso (alpha={alpha}):")

    for train_idx, test_idx in loo.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        test_sample = sample_labels[test_idx[0]]
        train_samples = [sample_labels[i] for i in train_idx]

        model = make_pipeline(StandardScaler(), Lasso(alpha=alpha, max_iter=10000))
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)[0]
        y_true = y_test[0]

        y_true_cv.append(y_true)
        y_pred_cv.append(y_pred)

        abs_error = abs(y_true - y_pred)
        rel_error = abs_error / y_true * 100 if y_true != 0 else np.nan

        detailed_records.append({
            "Left-Out Sample": test_sample,
            "Training Samples": train_samples,
            "True": round(y_true, 4),
            "Predicted": round(y_pred, 4),
            "Abs Error": round(abs_error, 4),
            "Rel Error (%)": round(rel_error, 2),
        })

    r2 = r2_score(y_true_cv, y_pred_cv)
    rmse = mean_squared_error(y_true_cv, y_pred_cv, squared=False)
    mae = mean_absolute_error(y_true_cv, y_pred_cv)

    print(f"  RMSECV={rmse:.4f}, R²={r2:.4f}, MAE={mae:.4f}")

    summary_df = pd.DataFrame([{
        "RMSECV": rmse,
        "R2": r2,
        "MAE": mae
    }])

    details_df = pd.DataFrame(detailed_records)

    final_model = None
    if return_model:
        final_model = make_pipeline(StandardScaler(), Lasso(alpha=alpha, max_iter=10000))
        final_model.fit(X, y)

    if return_model:
        return summary_df, details_df, final_model
    else:
        return summary_df, details_df


# Example usage:

summary_df, details_df = perform_lasso_loocv_full_report(
    element="Cu",
    features=["Cu 324.75 nm", "Cu 327.40 nm", "Cu 510.55 nm"],
    peak_max_df=peak_max_df,
    element_concentrations=element_concentrations,
    alpha=0.01,
    return_model=False
)

print("\nLOOCV summary:")
print(summary_df)

import IPython.display as disp
print("\nDetailed predictions:")
disp.display(details_df[[
    "Left-Out Sample", "Training Samples", "True",
    "Predicted", "Abs Error",
    "Rel Error (%)"
]])