# ** Elasticnet Regression **
<div style="margin-top:10px; text-align:justify">
This model combines both Ridge and LASSO. It    adds two penalties (L1 and L2) to the model. L1 helps remove unimportant variables, whereas L2 helps when variables are strongly correlated. These penalties make some coefficients smaller or even zero, giving a better prediction. <br></br>

⚙Key parameters:<br></br>
- alpha: Controls the overall strength of regularization (how much you penalize large coefficients).

- l1_ratio: Controls the mix between L1 and L2 penalties:

    - l1_ratio=1 means pure Lasso (L1 only).

    - l1_ratio=0 means pure Ridge (L2 only).

    - Values in between blend both penalties (e.g.,0.1, 0.5, 0.9).

⚛Workflow:
- Normalize the spectral data (mean = 0, variance = 1) using StandardScaler.

- Fit the ElasticNet regression model with specified alpha and l1_ratio.

- Predict concentrations.

- Calculate and visualize errors (absolute, relative) and model statistics (R², RMSE, MAE).

- Display results in an interactive plot and data table using Bokeh.

</div>

In [None]:
element_concentrations = {
    'Al': [..........................], 👈#Input element concentrations here
    'Cu': [..........................],
    'Zn': [..........................],
    'Mn': [..........................],
    'Fe': [..........................],
    'Mg': [..........................],
    'Si': [..........................],
    'Ni': [..........................]
}

In [None]:
from sklearn.linear_model import ElasticNet
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Label, DataTable, TableColumn, NumberFormatter, PreText
from bokeh.layouts import column
from bokeh.transform import factor_mark
from bokeh.io import output_file
import numpy as np
import pandas as pd

output_file("bokeh_elasticnet.html")

# Sample color/marker mapping
def assign_sample_colors(sample_labels):
    style_map = {
        "BAM-308": ("blue", "circle"),
        "BAM-311": ("blue", "square"),
        "BAM-M308a": ("blue", "triangle"),
        "BAM-M318": ("blue", "diamond"),
        "ERM-EB313": ("red", "circle"),
        "ERM-EB314a": ("red", "square"),
        "ERM-EB315a": ("red", "triangle"),
        "ERM-EB317": ("red", "diamond"),
    }
    default = ("gray", "x")
    return [style_map.get(label, default) for label in sample_labels]

def perform_elasticnet_with_table(
    element,
    features,
    peak_max_df,
    element_concentrations,
    alpha=0.01,
    l1_ratio=0.5,
    cap_width=0.003,
    return_model=False
):
    df = peak_max_df[element]
    X = df[features].values
    y = np.array(element_concentrations[element], dtype=float)
    sample_labels = df.index.tolist()

    model = make_pipeline(StandardScaler(), ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=10000))
    model.fit(X, y)
    y_pred = model.predict(X)

    abs_error = np.abs(y_pred - y)
    rel_error = np.where(y == 0, np.nan, abs_error / y * 100)
    pred_error_str = [f"{pred:.2f} ± {err:.2f}" for pred, err in zip(y_pred, abs_error)]

    sample_colors, sample_markers = zip(*assign_sample_colors(sample_labels))
    source_data = {
        "x": y,
        "y": y_pred,
        "sample": sample_labels,
        "color": sample_colors,
        "marker": sample_markers,
        "lower": y_pred - abs_error,
        "upper": y_pred + abs_error,
        "x_left": y - cap_width,
        "x_right": y + cap_width,
        "Observed (%)": y,
        "Predicted (%)": y_pred,
        "Abs Deviation": abs_error,
        "Rel Deviation (%)": rel_error,
        "Predicted ± Error": pred_error_str
    }
    source = ColumnDataSource(data=source_data)

    min_val = min(min(y), min(y_pred))
    max_val = max(max(y), max(y_pred))

    p = figure(title=f"ElasticNet Regression for {element}\n{' + '.join(features)}",
               x_axis_label="Certified Concentration (%)",
               y_axis_label="Predicted Concentration (%)",
               width=980, height=600)

    p.scatter('x', 'y', source=source, size=16,
              color='color',
              marker=factor_mark('marker',
                                 markers=["circle", "square", "triangle", "diamond", "x"],
                                 factors=["circle", "square", "triangle", "diamond", "x"]),
              legend_field='sample')

    p.segment(x0='x', y0='lower', x1='x', y1='upper', source=source, line_color="black", line_width=2)
    p.segment(x0='x_left', y0='lower', x1='x_right', y1='lower', source=source, line_color="black", line_width=2)
    p.segment(x0='x_left', y0='upper', x1='x_right', y1='upper', source=source, line_color="black", line_width=2)
    p.line([min_val, max_val], [min_val, max_val], line_dash='dashed', color='gray', legend_label="Perfect Fit")

    label = Label(
        x=max_val * 0.4,
        y=min_val * 1.05,
        text=f"R²: {r2_score(y, y_pred):.3f}\nRMSE: {np.sqrt(mean_squared_error(y, y_pred)):.3f}\nMAE: {mean_absolute_error(y, y_pred):.3f}",
        text_font_size='14pt',
        background_fill_color='white',
        background_fill_alpha=0.7
    )
    p.add_layout(label)
    p.legend.visible = False

    columns = [
        TableColumn(field="sample", title="Sample"),
        TableColumn(field="Observed (%)", title="Observed", formatter=NumberFormatter(format="0.000")),
        TableColumn(field="Predicted (%)", title="Predicted", formatter=NumberFormatter(format="0.000")),
        TableColumn(field="Abs Deviation", title="Abs Error", formatter=NumberFormatter(format="0.000")),
        TableColumn(field="Rel Deviation (%)", title="Rel Error (%)", formatter=NumberFormatter(format="0.00")),
        TableColumn(field="Predicted ± Error", title="Predicted ± Error")
    ]
    data_table = DataTable(source=source, columns=columns, width=850, height=300)

    summary = PreText(text=f"ElasticNet for {element} using {len(features)} lines, α={alpha}, l1_ratio={l1_ratio}\n"
                           f"R²: {r2_score(y, y_pred):.3f}, RMSE: {np.sqrt(mean_squared_error(y, y_pred)):.4f}, "
                           f"MAE: {mean_absolute_error(y, y_pred):.4f}, Relative RMSE: {(np.sqrt(mean_squared_error(y, y_pred)) / np.mean(y)) * 100:.2f}%",
                      width=850)

    show(column(p, data_table, summary))

    if return_model:
        return pd.DataFrame(source_data), model
    return pd.DataFrame(source_data)

# Example usage:
results_elasticnet, model_elasticnet = perform_elasticnet_with_table(
    element="Cu",
    features=["Cu 324.75 nm", "Cu 327.40 nm", "Cu 510.55 nm"],👈# It can be Mn, Mg, Si, or Zn.
    peak_max_df=peak_max_dict,  # your peak intensities DataFrame/dict
    element_concentrations=element_concentrations,
    alpha=0.01,
    l1_ratio=0.5,  # balance between Lasso (1) and Ridge (0)
    return_model=True
)