# Performance Metrics for Mass Spectrometry Data Analysis

This notebook demonstrates how to calculate key performance metrics for mass spectrometry raw data using both Thermo and Bruker instruments. The metrics include injection times, scan counts, and Total Ion Chromatograms (TIC).

## Outline

1. **Thermo Raw File Analysis** - Using AlphaRaw to extract metrics from Thermo .raw files
2. **Bruker timsTOF Analysis** - Using AlphaTims to process Bruker .d files

For each:
- **Performance Metrics Calculation** - Median injection times, scan counts, and TIC 
- **TIC Visualization** - Critical for spray stability assessment
values


In [None]:
# Uncomment and run if packages need to be installed

# %pip install plotly
# %pip install alpharaw
# %pip install nbformat

In [None]:
%reload_ext autoreload
%autoreload 2

import alphatims
import alphatims.bruker
import alphatims.utils
import numpy as np
import pandas as pd
import plotly.express as px
from alpharaw import thermo

In [None]:
# Based on https://github.com/MannLabs/alpharaw/blob/main/docs/tutorials/ms_methods.ipynb


def _tic_for_spectrum_df(
    spectrum_df: pd.DataFrame, peak_df: pd.DataFrame
) -> pd.DataFrame:
    """Calculate the TIC for each spectrum in the spectrum_df.

    Parameters
    ----------
    spectrum_df : pd.DataFrame
        Spectrum data for specific MS level
    peak_df : pd.DataFrame
        Peak data

    Returns
    -------
        pd.DataFrame
            DataFrame with the TIC for each spectrum

    """
    _stat_df = []
    for spec_idx, peak_start_idx, peak_stop_idx in zip(
        spectrum_df["spec_idx"],
        spectrum_df["peak_start_idx"],
        spectrum_df["peak_stop_idx"],
    ):
        _stat_df.append(
            {
                "spec_idx": spec_idx,
                "tic": peak_df.iloc[peak_start_idx:peak_stop_idx]["intensity"].sum(),
            }
        )

    return pd.DataFrame(_stat_df)


def calculate_thermo_metrics(
    raw_file: thermo.ThermoRawData,
) -> tuple[dict, pd.DataFrame]:
    """Calculate performance metrics and compile TIC data for Thermo raw file.

    Parameters
    ----------
    raw_file : thermo.ThermoRawData
        Loaded Thermo raw file object

    Returns
    -------
    tuple[dict, pd.DataFrame]
        (dict_ms_metrics, combined_tic_df) containing metrics and TIC data

    """
    dict_ms_metrics = {"median_injection_time": {}, "ms_scans": {}, "median_tic": {}}
    all_tic_data = []

    for ms_level in raw_file.spectrum_df["ms_level"].unique():
        level_data = raw_file.spectrum_df[raw_file.spectrum_df["ms_level"] == ms_level]
        tic_df = _tic_for_spectrum_df(level_data, raw_file.peak_df)

        injection_times = level_data["injection_time"]
        dict_ms_metrics["median_injection_time"][f"ms_level_{ms_level}"] = np.median(
            injection_times
        )

        num_scans = level_data["precursor_mz"].nunique()
        dict_ms_metrics["ms_scans"][f"ms_level_{ms_level}"] = num_scans

        dict_ms_metrics["median_tic"][f"ms_level_{ms_level}"] = np.median(tic_df["tic"])

        tic_df["ms_level"] = f"MS{ms_level}"
        all_tic_data.append(tic_df)

    combined_tic_df = pd.concat(all_tic_data, ignore_index=True)

    return dict_ms_metrics, combined_tic_df


def _filter_bruker_data(
    data: alphatims.bruker.TimsTOF,
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """Filter Bruker data into MS1 and MS2 components.

    Parameters
    ----------
    data : alphatims.bruker.TimsTOF
        Loaded Bruker timsTOF data

    Returns
    -------
    tuple[pd.DataFrame, pd.DataFrame]
        (chrom_ms1, chrom_ms2) DataFrames with chromatographic data
        where MsMsType==0 indicates MS1 scans and MsMsType!=0 indicates MS2 scans

    """
    chrom_ms1 = data.frames.query("MsMsType == 0")[
        ["Time", "SummedIntensities", "MaxIntensity"]
    ]
    # Convert retention times from seconds to minutes for better readability of plots
    chrom_ms1["RT"] = chrom_ms1["Time"] / 60

    chrom_ms2 = data.frames.query("MsMsType != 0")[
        ["Time", "SummedIntensities", "MaxIntensity"]
    ]
    chrom_ms2["RT"] = chrom_ms2["Time"] / 60

    return chrom_ms1, chrom_ms2


def calculate_bruker_metrics(
    data: alphatims.bruker.TimsTOF,
) -> tuple[dict, pd.DataFrame]:
    """Calculate performance metrics and compile TIC data for Bruker timsTOF data.

    Parameters
    ----------
    data : alphatims.bruker.TimsTOF
        Loaded Bruker timsTOF data

    Returns
    -------
    tuple
        (dict_ms_metrics, combined_tic_df) containing metrics and TIC data

    """
    dict_ms_metrics = {"ms_scans": {}, "median_tic": {}}

    dict_ms_metrics["ms_scans"]["ms_level_2"] = len(set(data.precursor_indices))

    chrom_ms1, chrom_ms2 = _filter_bruker_data(data)

    dict_ms_metrics["median_tic"]["ms_level_1"] = np.median(
        chrom_ms1["SummedIntensities"]
    )
    dict_ms_metrics["median_tic"]["ms_level_2"] = np.median(
        chrom_ms2["SummedIntensities"]
    )

    chrom_ms1["ms_level"] = "MS1"
    chrom_ms2["ms_level"] = "MS2"
    combined_tic_df = pd.concat([chrom_ms1, chrom_ms2], ignore_index=True)
    combined_tic_df = combined_tic_df.rename(
        columns={"Time": "spec_idx", "SummedIntensities": "tic"}
    )

    return dict_ms_metrics, combined_tic_df


def plot_tic_combined(combined_tic_df: pd.DataFrame) -> None:
    """Plot TIC for multiple MS levels from a combined DataFrame.

    Parameters
    ----------
    combined_tic_df : pd.DataFrame
        DataFrame with columns 'spec_idx', 'tic', and 'ms_level'

    Returns
    -------
    None
        Displays the plot

    """
    fig = px.line(
        combined_tic_df,
        x="spec_idx",
        y="tic",
        color="ms_level",
        title="Total Ion Chromatogram (TIC)",
        labels={"spec_idx": "Spectrum Index", "tic": "TIC Intensity"},
    )
    fig.show()

## 1. Thermo Raw File Analysis

In [None]:
# TODO: Replace the folder path with your own Thermo .raw file path
file = "/Users/patriciaskowronek/Documents/documentation_agent_few_shot_examples/raw_files_alphaKraken_metrics/20250701_OA1_Evo14_16p3min__TiHe_ADIAMA_HeLa_5ng_F-40_01.raw"
auxiliary_items = ["injection_time", "faims_cv"]
raw_file = thermo.ThermoRawData(auxiliary_items=auxiliary_items)
raw_file.load_raw(file)

In [None]:
dict_ms_metrics, combined_tic_df = calculate_thermo_metrics(raw_file)
print(dict_ms_metrics)

# This plot is super important for me. I always check the spray!!!
plot_tic_combined(combined_tic_df)

## 2. Bruker timsTOF Analysis

In [None]:
alphatims.utils.set_threads(4)

# TODO: Replace the folder path with your own Bruker .d folder path
bruker_d_folder_name = "/Users/patriciaskowronek/Documents/documentation_agent_few_shot_examples/raw_files_alphaKraken_metrics/20250623_TIMS02_EVO05_PaSk_DIAMA_HeLa_200ng_44min_S1-A4_1_21403.d"
data = alphatims.bruker.TimsTOF(bruker_d_folder_name)

In [None]:
dict_ms_metrics, combined_tic_df = calculate_bruker_metrics(data)
print(dict_ms_metrics)

# This plot is super important for me. I always check the spray!!!
plot_tic_combined(combined_tic_df)