# ** Peak Intensity Extraction for Multiple Elements **
<div style="margin-top:10px; text-align:justify;">
This script identifies and extracts maximum intensity values at known emission peaks for various key elements like Cu, Mn, Mg, Si, Zn and others from SNV/MSC/TSN-normalized spectra.

<br>🧪 How it works:</br>

Define emission lines:

- Each element (like Cu, Zn, Mn) has known wavelength peaks where it emits strongly.

- These are stored in dictionaries with the element name and wavelength ranges.

- Find max intensity:

    - For each sample spectrum, the script searches within the defined wavelength window (peak region).

    - It then finds the maximum intensity value in that region for each element’s peak.

- Return as a structured DataFrame:

    One DataFrame per element with:

   - Rows = sample names

   - Columns = specific peaks

   - All element DataFrames are combined into a single large table for analysis or modeling.

</div>

In [1]:
# Define your emission lines for each element: [Peak_Pos, Peak_Min, Peak_Max]
emission_lines = {
    'Cu':    [(324.75, 324.68, 324.80), (327.40, 327.30, 327.50)],
    'Mn':    [(403.08, 403.00, 403.20), (403.31, 403.25, 403.40)],
    'Zn':    [(330.29, 330.20, 330.40), (334.50, 334.30, 334.60)],
    'Al':    [(308.21, 308.00, 308.35), (309.28, 309.00, 309.40)],
    'Si':    [(251.61, 251.50, 251.70), (288.16, 288.00, 288.30)],
    'Fe':    [(373.71, 373.65, 373.80)],
    'Mg':    [(285.21, 285.00, 285.30)],
    'Ni':    [(341.35, 341.00, 341.55)],
    'Pb':    [(261.42, 261.30, 261.50)]
}

In [2]:
def get_peak_max(df, element, peak_pos, peak_min, peak_max):
    roi_df = df[(df['wavelength'] >= peak_min) & (df['wavelength'] <= peak_max)]
    sample_cols = df.columns[1:]  # Skip 'wavelength'
    max_vals = {col: roi_df[col].max() for col in sample_cols}
    max_vals.update({'Element': element, 'Peak_Pos (nm)': peak_pos})
    return max_vals

In [None]:
# ✅ Define Peak Selection for Each Element
peak_definitions = {
    'Cu': {'Cu 324.75 nm': (324.50, 324.90), 'Cu 327.40 nm': (327.20, 327.60), 'Cu 510.55 nm': (510.30, 510.65)},
    'Si': {'Si 251.61 nm': (251.50, 251.80), 'Si 252.85 nm': (252.70, 252.95), 'Si 288.16 nm': (288.10, 288.30), 'Si 390.55 nm': (390.45, 390.65)},
    'Mn': {'Mn 403.08 nm': (403.00, 403.20), 'Mn 403.31 nm': (403.25, 403.40), 'Mn 475.40 nm': (475.00, 475.65), 'Mn 478.34 nm': (478.00, 478.55), 'Mn 403.44 nm': (403.20, 403.55)},
    'Mg': {'Mg 279.55 nm': (279.30, 279.65), 'Mg 285.21 nm': (285.00, 285.30), 'Mg 383.23 nm': (383.10, 383.30), 'Mg 383.83 nm': (383.70, 383.90), 'Mg 518.36 nm': (518.20, 518.45)},
    'Fe': {'Fe 371.99 nm': (371.90, 372.05), 'Fe 373.71 nm': (373.65, 373.80), 'Fe 374.83 nm': (374.75, 374.95), 'Fe 375.82 nm': (375.75, 375.95)},
    'Zn': {'Zn 213.86 nm': (213.70, 213.95), 'Zn 330.29 nm': (330.10, 330.45), 'Zn 334.50 nm': (334.00, 334.65), 'Zn 468.01 nm': (467.90, 468.30), 'Zn 472.22 nm': (472.00, 472.40), 'Zn 481.05 nm': (480.95, 481.30)},
    'Al': {'Al 308.21 nm': (308.00, 308.35), 'Al 309.28 nm': (309.00, 309.40), 'Al 394.40 nm': (394.00, 394.60), 'Al 396.15 nm': (396.00, 396.40)},
    'Ni': {'Ni 341.35 nm': (341.00, 341.55), 'Ni 345.29 nm': (345.00, 345.45), 'Ni 351.98 nm': (351.80, 352.00), 'Ni 361.94 nm': (361.80, 362.00)},
    'Pb': {'Pb 261.42 nm': (261.30, 261.45)}
}

def extract_peak_intensities(df):
  
    sample_names = [col for col in df.columns if col != "wavelength"]

    peak_max_intensities = {
        element: {peak: [] for peak in peaks}
        for element, peaks in peak_definitions.items()
    }

    for sample in sample_names:
        sample_df = pd.concat([df["wavelength"], df[sample]], axis=1)

        for element, peaks in peak_definitions.items():
            for peak_name, (peak_min, peak_max) in peaks.items():
                roi = sample_df[(sample_df["wavelength"] >= peak_min) & (sample_df["wavelength"] <= peak_max)]
                if not roi.empty:
                    peak_val = roi.iloc[:, 1].max()
                else:
                    peak_val = np.nan
                peak_max_intensities[element][peak_name].append(peak_val)

    # Convert to DataFrames
    peak_max_dict = {
        element: pd.DataFrame(peaks, index=sample_names)
        for element, peaks in peak_max_intensities.items()
    }

    return peak_max_dict

def combine_peak_max_df(peak_max_dict):
    combined_df = None
    for element, df in peak_max_dict.items():
        df = df.copy()
        df.columns = [f"{element}_{col}" for col in df.columns]
        if combined_df is None:
            combined_df = df
        else:
            combined_df = pd.concat([combined_df, df], axis=1)
    return combined_df

# Extract and combine (no smoothing)
peak_max_dict = extract_peak_intensities(SAMPLE_Averaged_full_df)
combined_peak_df = combine_peak_max_df(peak_max_dict)
combined_peak_df.reset_index(inplace=True)
combined_peak_df.rename(columns={'index': 'Sample'}, inplace=True)

# View
combined_peak_df.head()