# Automatic Analysis of XAS *In-Situ* Data w. Measured Standards
Notebook showing an example workflow used for automatic analysis of XAS *in-situ* data when standards for both unreduced precursors and reduced metal foils have been measured on the same instrument.

# Imports
Here the required packages and functions are imported.

Whether plots are interactive is also changed here. 

In [1]:
# Functions written for the analysis of XAS data
from autoXAS.data import *
from autoXAS.LCA import *
from autoXAS.plotting import *
from autoXAS.continuous_analysis import *

%matplotlib inline

# Global variables

In [25]:
synchrotron = 'ESRF'

# Boolean flags
Here the values of boolean flags (True/False) that occur throughout the notebook can be changed.

In [26]:
# Decide if transmission or absorption data should be used for normalization and analysis
use_transmission = False
# Decide if subtraction of pred-edge should be used for normalization
use_preedge = True
# Decide if plots should be interactive or static
interactive = True

# Analysis function

In [None]:
def in_situ_analysis_standards(
    data_paths: Union[str, list[str]],
    synchrotron: str,
    edge_correction_energies: dict,
    df_foils: pd.DataFrame,
    df_precursors: pd.DataFrame,
    metal: str,
    precursor_suffix: str,
    x_axis: str='Measurement',
    file_selection_condition: str='*',
    negated_condition: bool=False,
    use_preedge: bool=True,
    use_transmission: bool=False,
    interactive: bool=False,
    with_uncertainty: bool=True,
) -> None:
    """Function that runs the entire analysis pipeline for in-situ analysis with measured standards.

    Args:
        data_paths (Union[str, list[str]]): Path or list of paths to folders containing the measured data.
        synchrotron (str): Name of the synchrotron the data was measured at.
        edge_correction_energies (dict): Energy shifts for all relevant edges.
        df_foils (pd.DataFrame): Normalized measurements of the reduced metal edges.
        df_precursors (pd.DataFrame): Normalized measurements of the unreduced precursors.
        metal (str): The metal to use for plotting.
        precursor_suffix (str): The precursor to use for plotting.
        x_axis (str, optional): The column to use for the x-axis when plotting. Defaults to 'Measurement'.
        file_selection_condition (str, optional): Pattern to match with filenames. Defaults to '*'.
        negated_condition (bool, optional): Whether the filenames matching the pattern is included or excluded. Defaults to False.
        use_preedge (bool, optional): Boolean flag controlling if the pre-edge fit is subtracted during normalization. Defaults to True.
        use_transmission (bool, optional): Boolean flag deciding if absorption (False) or transmission (True) signal is used. Defaults to False.
        interactive (bool, optional): Boolean flag deciding if plots are interactive or static. Defaults to False.
        with_uncertainty (bool, optional): Boolean flag deciding if the uncertainties are plotted. Defaults to True.

    Returns:
        None
    """
    # Clears the notebook cell output when this function is called.
    clear_output(wait=True)
    # Read the in-situ data
    if type(data_paths) == list and len(data_paths) > 1:
        # Create empty list to hold all datasets
        list_of_datasets = []
        # Load data
        for path in data_paths:
            df_data = load_xas_data(
                path, 
                synchrotron=synchrotron, 
                file_selection_condition=file_selection_condition, 
                negated_condition=negated_condition, 
                verbose=False,
            )
            # Initial data processing
            df_data = processing_df(df_data, synchrotron=synchrotron)
            # Append to list of datasets
            list_of_datasets.append(df_data)
        # Combine the datasets
        df_data = combine_datasets(list_of_datasets)
    else:
        if type(data_paths) == list:
            data_paths = data_paths[0]
        # Load data
        df_data = load_xas_data(
            data_paths, 
            synchrotron=synchrotron, 
            file_selection_condition=file_selection_condition, 
            negated_condition=negated_condition, 
        )
        # Initial data processing
        df_data = processing_df(df_data, synchrotron=synchrotron)
    # Normalization of the data
    normalize_data(
        df_data, 
        edge_correction_energies, 
        subtract_preedge=use_preedge, 
        transmission=use_transmission
    )
    # Plotting of normalized data
    plot_data(
        df_data, 
        metal=metal, 
        foils=df_foils, 
        precursors=df_precursors, 
        precursor_suffix=precursor_suffix, 
        interactive=interactive
    )
    # Linear combination analysis (LCA)
    df_results = linear_combination_analysis(df_data, df_foils, df_precursors)
    # Plot temperature curve
    plot_temperatures(
        df_results, 
        with_uncertainty=with_uncertainty, 
        interactive=interactive
    )
    # Plot LCA over time for single edge
    plot_LCA_change(
        df_results, 
        metal=metal, 
        precursor_suffix=precursor_suffix, 
        x_axis=x_axis, 
        with_uncertainty=with_uncertainty, 
        interactive=interactive
    )
    # Plot LCA over time for all edges
    plot_reduction_comparison(
        df_results, 
        precursor_type='all', 
        x_axis=x_axis, 
        with_uncertainty=with_uncertainty, 
        interactive=interactive
    )
    return None

# Standards and preprocessing
Here the measured standards (metal foils and precursors) are loaded and preprocessed. 

This section only needs to be run once, as it applies to all experiments measured on the same instrument.

## Metal foils

In [27]:
# Specify data location
folder_metal_foils = '../Data/ESRF_BM31/Standards/Standards/'

# Load data
df_foils = load_xas_data(
    folder_metal_foils,
    synchrotron='ESRF', 
    file_selection_condition='mono', # It will look for files with this in the filename
    negated_condition=True, # Files containing the above substring will be ignored(True)/loaded(False).
)

# Initial data processing
df_foils = processing_df(df_foils, synchrotron='ESRF')

# Specify the measurements to use when averaging. 
# This can be given as either a list or a range.
measurements_to_average = range(1,2) # Change this to fit the number of repeat measurements for the standards

# Create dataframe with the reference spectra for reduced metals
df_foils = average_measurements(df_foils, measurements_to_average)

Loading data: 100%|██████████| 6/6 [00:00<00:00, 20.12it/s, Currently loading Rufoil.dat]


### Edge energy corrections
The energy shifts of the different edges are systematic errors from the instrument. Therefore the shift is consistent across measurements and we can correct the measured data using the theoretical edge energies.

In [28]:
# Calculate the edge energy shift at each edge
edge_correction_energies = {
    'Pd':calc_edge_correction(df_foils, metal='Pd', edge='K', transmission=use_transmission),
    'Ag':calc_edge_correction(df_foils, metal='Ag', edge='K', transmission=use_transmission),
    'Rh':calc_edge_correction(df_foils, metal='Rh', edge='K', transmission=use_transmission),
    'Ru':calc_edge_correction(df_foils, metal='Ru', edge='K', transmission=use_transmission),
    'Mn':calc_edge_correction(df_foils, metal='Mn', edge='K', transmission=use_transmission),
    'Mo':calc_edge_correction(df_foils, metal='Mo', edge='K', transmission=use_transmission),
    'Ir':calc_edge_correction(df_foils, metal='Ir', edge='L3', transmission=use_transmission),
    'Pt':calc_edge_correction(df_foils, metal='Pt', edge='L3', transmission=use_transmission),
    }

### Normalization
Normalization includes correcting the energy shifts, subtraction by the minimum measured value and division by a fit to the post-edge data. A fit to the pre-edge data can also be used to subtract from the data, but can sometimes lead to overcorrections. 

The pre- and post-edge fits can be visually inspected using the "plot_non_normalized_xas()" function with the optional arguments "pre_edge=True" and "post_edge=True". 

All normalization of data **must** use the same normalization procedure!

In [29]:
# Normalization of the data
normalize_data(
    df_foils, 
    edge_correction_energies, 
    subtract_preedge=use_preedge, 
    transmission=use_transmission,
)
df_foils.head()

Unnamed: 0,Filename,Experiment,Measurement,ZapEnergy,MonEx,xmap_roi00,Ion1,Metal,Precursor,Energy,Temperature,Absorption,Transmission,Relative Time,Energy_Corrected,Normalized,pre_edge,post_edge
0,Agfoil.dat,Agfoil,1,25.400257,1838.0,1642.0,47334.0,Ag,Avg,25400.257812,25.777052,0.893362,-3.248551,0,25392.158203,0.003653,-0.151765,57.481833
1,Agfoil.dat,Agfoil,1,25.40056,1856.0,1549.0,47827.0,Ag,Avg,25400.560547,25.777052,0.83459,-3.249167,0,25392.460938,0.002536,-0.146117,57.481149
2,Agfoil.dat,Agfoil,1,25.401297,1854.0,1587.0,47723.0,Ag,Avg,25401.296875,25.777052,0.855987,-3.248068,0,25393.197266,0.002669,-0.132379,57.479485
3,Agfoil.dat,Agfoil,1,25.40201,1855.0,1650.0,47774.0,Ag,Avg,25402.009766,25.777052,0.889488,-3.248597,0,25393.910156,0.003021,-0.119078,57.477875
4,Agfoil.dat,Agfoil,1,25.402702,1855.0,1670.0,47787.0,Ag,Avg,25402.703125,25.777052,0.90027,-3.248869,0,25394.603516,0.002984,-0.106142,57.476307


### Plotting
It is always a good idea to visually inspect the data to see if it behaves as it should.

In [30]:
plot_non_normalized_xas(df_foils, 'Pdfoil', pre_edge=True, post_edge=True, transmission=use_transmission, interactive=interactive)

## Precursors

In [31]:
# Specify data loccation
folder_precursor_standards = '../Data/ESRF_BM31/wheel/wheel/'

# Load data
df_precursors = load_xas_data(
    folder_precursor_standards, 
    synchrotron='ESRF', 
    file_selection_condition='mono', 
    negated_condition=True,
)

# Initial data processing
df_precursors = processing_df(df_precursors, synchrotron='ESRF')

# Specify the measurements to use when averaging. 
# This can be given as either a list or a range.
measurements_to_average = range(1,2)

# Create dataframe with the reference spectra for reduced metals
df_precursors = average_measurements(df_precursors, measurements_to_average)

Loading data: 100%|██████████| 8/8 [00:00<00:00, 20.26it/s, Currently loading Ruacac.dat]


Incomplete measurement detected!
Not all edges were measured 3 times, but only 2 times.
Incomplete measurements will be removed unless keep_incomplete="True".

Incomplete measurements were removed!


### Normalization

In [32]:
# Normalization of the data
normalize_data(
    df_precursors, 
    edge_correction_energies, 
    subtract_preedge=use_preedge, 
    transmission=use_transmission
)
df_precursors.head()

Unnamed: 0,Filename,Experiment,Measurement,ZapEnergy,MonEx,xmap_roi00,Ion1,Metal,Precursor,Energy,Temperature,Absorption,Transmission,Relative Time,Energy_Corrected,Normalized,pre_edge,post_edge
0,Agacac.dat,Agacac,1,25.400257,15827.0,2483.0,38828.0,Ag,Avg,25400.257812,1100.0,0.156884,-0.897424,0,25392.158203,0.003475,-0.028298,8.114892
1,Agacac.dat,Agacac,1,25.40056,15951.0,2585.0,39134.0,Ag,Avg,25400.560547,1100.0,0.162059,-0.89747,0,25392.460938,0.004003,-0.027416,8.114993
2,Agacac.dat,Agacac,1,25.401253,15932.0,2544.0,39093.0,Ag,Avg,25401.251953,1100.0,0.159679,-0.897614,0,25393.152344,0.003464,-0.025402,8.115225
3,Agacac.dat,Agacac,1,25.402031,15940.0,2685.0,39104.0,Ag,Avg,25402.03125,1100.0,0.168444,-0.897393,0,25393.931641,0.004263,-0.023132,8.115487
4,Agacac.dat,Agacac,1,25.402941,15921.0,2678.0,39048.0,Ag,Avg,25402.941406,1100.0,0.168206,-0.897153,0,25394.841797,0.003909,-0.020481,8.115793


### Plotting
It is always a good idea to visually inspect the data to see if it behaves as it should.

In [33]:
plot_non_normalized_xas(df_precursors, 'Pdacac', pre_edge=True, post_edge=True, transmission=use_transmission, interactive=interactive)

# Experiments
Here the measured data from different experiments are loaded, preprocessed and analysed. 

This section needs to be run every time a new experiment is analysed.

## Preprocessing

### Single dataset
Use either this section or *Stiching together datasets*.

In [34]:
# # Specify data location
# folder_XAS_data = '../Data/ESRF_BM31/IrPtPdRuRh_XAS/IrPtPdRuRh_XAS/XAS1/'

# # Load data
# df_data = load_xas_data(
#     folder_XAS_data, 
#     synchrotron='ESRF', 
#     file_selection_condition='mono', 
#     negated_condition=True
# )

# # Initial data processing
# df_data = processing_df(df_data, synchrotron='ESRF')

### Stiching together datasets
Use either this section or *Single dataset*.

In [35]:
# Specify all data locations
list_of_folders = [
    '../Data/ESRF_BM31/IrPtPdRuRh_XAS/IrPtPdRuRh_XAS/XAS1/',
    '../Data/ESRF_BM31/IrPtPdRuRh_XAS/IrPtPdRuRh_XAS/XAS2/',
    '../Data/ESRF_BM31/IrPtPdRuRh_XAS/IrPtPdRuRh_XAS/XAS3/',
    '../Data/ESRF_BM31/IrPtPdRuRh_XAS/IrPtPdRuRh_XAS/XAS4/',
]

# Create empty list to hold all datasets
list_of_datasets = []

# Load data
for folder in list_of_folders:
    df_data = load_xas_data(
        folder, 
        synchrotron='ESRF', 
        file_selection_condition='mono', 
        negated_condition=True, 
        verbose=False,
    )

    # Initial data processing
    df_data = processing_df(df_data, synchrotron='ESRF')

    # Append to list of datasets
    list_of_datasets.append(df_data)

# Combine the datasets
df_data = combine_datasets(list_of_datasets)

Loading data: 100%|██████████| 5/5 [00:00<00:00, 24.63it/s, Currently loading Ru.dat]
Loading data: 100%|██████████| 5/5 [00:00<00:00, 15.82it/s, Currently loading Ru.dat]
Loading data: 100%|██████████| 5/5 [00:00<00:00, 26.66it/s, Currently loading Ru.dat]
Loading data: 100%|██████████| 5/5 [00:00<00:00, 12.41it/s, Currently loading Ru.dat]


### Normalization

In [36]:
# Normalization of the data
normalize_data(
    df_data, 
    edge_correction_energies, 
    subtract_preedge=use_preedge, 
    transmission=use_transmission
)
df_data.head()

Unnamed: 0,index,Filename,Experiment,Measurement,ZapEnergy,MonEx,xmap_roi00,Ion1,Metal,Precursor,Energy,Temperature,Absorption,Transmission,Relative Time,Energy_Corrected,Normalized,pre_edge,post_edge
0,0,Ir.dat,Ir,1,11.170101,83329.0,389.0,44793.0,Ir,,11170.101562,39.459183,0.004668,0.620745,0,11166.945312,0.008543,-0.001677,0.224033
1,1,Ir.dat,Ir,1,11.170307,83551.0,377.0,44911.0,Ir,,11170.307617,39.459183,0.004512,0.620774,0,11167.151367,0.007512,-0.001599,0.223956
2,2,Ir.dat,Ir,1,11.170659,83612.0,384.0,44955.0,Ir,,11170.65918,39.459183,0.004593,0.620525,0,11167.50293,0.007287,-0.001466,0.223823
3,3,Ir.dat,Ir,1,11.171026,83536.0,369.0,44922.0,Ir,,11171.026367,39.459183,0.004417,0.62035,0,11167.870117,0.0059,-0.001327,0.223684
4,4,Ir.dat,Ir,1,11.171415,83525.0,409.0,44919.0,Ir,,11171.415039,39.459183,0.004897,0.620285,0,11168.258789,0.007387,-0.00118,0.223537


#### Saving results as .csv file

In [37]:
# save_data(df_data, filename='Normalized_XAS_data.csv')

## Data inspection
It is always a good idea to visually inspect the data to see if it behaves as it should.

In [38]:
plot_data(df_data, 'Pd', foils=df_foils, precursors=df_precursors, precursor_suffix=None, interactive=interactive)

## Linear combination analysis
This section performs linear combination analysis (LCA) of every combination of two-component systems consisting of 1 metal foil and 1 precursor (with the same metal). 

The estimated uncertainties of the dependent parameter behaves weird when the independent parameter is approximately zero. In the column "StdCorrected" this is handled by using the same uncertainty for both parameters.

In [39]:
df_results = linear_combination_analysis(
    data = df_data, 
    products = df_foils, 
    precursors = df_precursors,
)
df_results.head()

LCA progress: 100%|██████████| 7/7 [00:04<00:00,  1.54it/s, Analysing Ruacac + Rufoil]


Unnamed: 0,Experiment,Metal,Product,Intermediate,Precursor,Precursor Type,Measurement,Temperature,Temperature Average,Temperature Std,Parameter,Value,StdErr,StdCorrected,Energy Range,Basis Function
0,Iracac + Irfoil,Ir,Irfoil,,Iracac,acac,1,41.438393,41.440155,1.34725,product_weight,1.665335e-16,0.026984,0.026984,"[11166.9453125, 11167.1513671875, 11167.502929...","[0.002917575675627365, 0.0027507154410406913, ..."
1,Iracac + Irfoil,Ir,Irfoil,,Iracac,acac,1,41.438393,41.440155,1.34725,precursor_weight,1.0,603616.362051,0.026984,"[11166.9453125, 11167.1513671875, 11167.502929...","[0.0008645473564250575, 0.0009945414898842803,..."
2,Iracac + Irfoil,Ir,Irfoil,,Iracac,acac,2,46.650097,43.886864,1.941293,product_weight,0.0,0.007014,0.007014,"[11166.927734375, 11167.130859375, 11167.51074...","[0.0029253702728686192, 0.0027683322287115706,..."
3,Iracac + Irfoil,Ir,Irfoil,,Iracac,acac,2,46.650097,43.886864,1.941293,precursor_weight,1.0,0.007014,0.007014,"[11166.927734375, 11167.130859375, 11167.51074...","[0.0008455077577204543, 0.0010655208983069797,..."
4,Iracac + Irfoil,Ir,Irfoil,,Iracac,acac,3,47.809467,47.003166,2.54706,product_weight,0.0,0.002652,0.002652,"[11166.982421875, 11167.1474609375, 11167.5068...","[0.0028958442156626976, 0.002754071019644668, ..."


#### Saving results as .csv file

In [40]:
# save_data(df_results, filename='LCA_results.csv')

### Results plotting

#### Temperature curves

In [41]:
plot_temperatures(
    df_results, 
    with_uncertainty=True, 
    interactive=interactive
)

#### Waterfall plots


In [42]:
plot_insitu_waterfall(
    df_data, 
    experiment='Pd', 
    # lines=[5,33,109],
    vmin=0.8, 
    vmax=1.3, 
    y_axis='Measurement',
    time_unit='m',
    interactive=interactive,
    homogenize_measurements=True,
)

In [43]:
plot_insitu_change(
    df_data, 
    experiment='Pd', 
    reference_measurement=1,
    # lines=[5,348],
    vmin=-0.25, 
    vmax=0.25, 
    y_axis='Measurement',
    time_unit='m',
    interactive=interactive,
    homogenize_measurements=True,
)

#### Plot of a single LCA fit
Plot showing the measurement that is being fitted, the contributions from the components and the residual.

In [44]:
plot_LCA(
    df_results, 
    df_data, 
    experiment='Pdacac + Pdfoil', 
    measurement=4, 
    interactive=interactive
)

#### Plot of LCA component weights over time
Plot showing how the weight of each component changes over time.

In [45]:
plot_LCA_change(df_results, product='Pdfoil', precursor='Pdacac', x_axis='Measurement', with_uncertainty=True, interactive=interactive)

#### Comparison of reduction times of different metals
Plot showing the weight of the metal foil component over time for the different metal species in the sample. 

In [46]:
plot_reduction_comparison(
    df_results, 
    precursor_type='all', 
    x_axis='Measurement', 
    with_uncertainty=True, 
    interactive=interactive
)