# Figure 2: MS2/Identification visualization

## Library imports

In [1]:
import pandas as pd
import numpy as np

import plotly.graph_objects as go

import utils
import alphatims.bruker

## Fig 2 A: MS2 DDA spectra 

For this figure we used the [PXD012867 PRIDE project](https://www.ebi.ac.uk/pride/archive/projects/PXD012867) as example data.

In [None]:
ms2_dda_path = '.../Data/PXD012867_yeast_project/20190124_QX3_JuSc_SA_JS7_1_wt_4h_1.raw'

In [None]:
ms2_dda_data = utils.load_thermo_raw(ms2_dda_path)

In [21]:
# this plotting function is taken from the AlphaViz package (https://github.com/MannLabs/alphaviz) and modified
def plot_mass_spectra(
    mz_values: np.ndarray,
    intensity_values: np.ndarray,
    title: str,
    spectrum_color: str = 'grey',
    template: str = "plotly_white",
    spectrum_line_width: float = 1.5,
    height: int = 520
):
    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=mz_values,
            y=intensity_values,
            mode='markers',
            marker=dict(color=spectrum_color, size=1),
            hovertemplate='<b>m/z:</b> %{x};<br><b>Intensity:</b> %{y}.',
            name='',
            showlegend=False
        )
    )

    # Use the 'shapes' attribute from the layout to draw the vertical lines
    fig.update_layout(
        template=template,
        shapes=[
            dict(
                type='line',
                xref='x',
                yref='y',
                x0=mz_values[i],
                y0=0,
                x1=mz_values[i],
                y1=intensity_values[i],
                line=dict(
                    color = spectrum_color,
                    width=spectrum_line_width
                )
            ) for i, val in enumerate(zip(mz_values, intensity_values))
        ],
        xaxis=dict(
            title='m/z, Th',
        ),
        yaxis=dict(
            title='Intensity',
        ),
        legend=dict(
            orientation="h",
            x=1,
            xanchor="right",
            yanchor="bottom",
            y=1.01
        ),
        hovermode="closest",
        height=height,
        title=dict(
            text=title,
            font=dict(
                size=16,
            ),
            x=0.5,
            xanchor='center',
            yanchor='top'
        ),
    )

    return fig

In [None]:
df_ms2 = pd.DataFrame({'scan': ms2_dda_data['scan_list_ms2'], 
                    'RT': ms2_dda_data['rt_list_ms2'],
                    'prec_mono_mz': ms2_dda_data['mono_mzs2'],
                    'prec_charge': ms2_dda_data['charge_ms2'],
                    'mz_values': ms2_dda_data['mass_list_ms2'],
                   'intensity_values': ms2_dda_data['int_list_ms2'],
                    'order': 'ms2'})
df_ms2[df_ms2.RT > 50].head()

In [None]:
# let's plot the MS2 spectra acquired at 50.0019 min for the precursor m/z = 590.2973
plot_mass_spectra(
    mz_values=df_ms2[df_ms2.scan == 61885].mz_values.values[0], 
    intensity_values=df_ms2[df_ms2.scan == 61885].intensity_values.values[0],
    title=f"MS2 DDA spectrum for precursor m/z={df_ms2[df_ms2.scan == 61885].prec_mono_mz.values[0]} and charge={df_ms2[df_ms2.scan == 61885].prec_charge.values[0]}."
).show(config=utils.config)

## Fig 2 B: MS2 DIA spectra 

For this figure we used the Bruker raw file is used from the [Project PXD017703 from ProteomeXchange](https://www.ebi.ac.uk/pride/archive/projects/PXD017703). as example data.

To read the raw TIMS-TOF data we use a recently published [AlphaTims package](https://github.com/MannLabs/alphatims).

In [2]:
bruker_file_path = '../Data/PXD017703_diaPASEF/20200428_Evosep_60SPD_SG06-16_MLHeLa_200ng_py8_S3-A6_1_2452.d'

In [3]:
bruker_raw_data = alphatims.bruker.TimsTOF(bruker_file_path)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11872/11872 [00:20<00:00, 570.97it/s]


In [None]:
# extract the MS2 DIA spectrum from the frame 6005 (10.564 min) and m/z window: 400-425 Th
peptide_data = bruker_raw_data[6005, :, slice(400., 425.)]
peptide_data.head()

In [None]:
plot_mass_spectra(
    mz_values=peptide_data.mz_values.values, 
    intensity_values=peptide_data.intensity_values.values,
    title='MS2 DIA spectrum for the RT = 10.564 min and m/z window = 400-425 Th.'
).show(config=utils.config)

## Fig 2 C: Phospho MS2 spectra 

For this figure we used the [PXD010697 PRIDE project](https://www.ebi.ac.uk/pride/archive/projects/PXD010697) as example data.

In [17]:
phospho_path = '../Data/PXD010697_circadian_clock/20170123_Qep6_ChRo_SA_collab_SYN_CT_phospho_1.raw'

In [18]:
phospho_data = utils.load_thermo_raw(phospho_path)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 54493/54493 [01:58<00:00, 460.62it/s]


In [19]:
df_ms2_phospho = pd.DataFrame({'scan': phospho_data['scan_list_ms2'], 
                    'RT': phospho_data['rt_list_ms2'],
                    'prec_mono_mz': phospho_data['mono_mzs2'],
                    'prec_charge': phospho_data['charge_ms2'],
                    'mz_values': phospho_data['mass_list_ms2'],
                   'intensity_values': phospho_data['int_list_ms2'],
                    'order': 'ms2'})
df_ms2_phospho[df_ms2_phospho.RT > 50].head()

Unnamed: 0,scan,RT,prec_mono_mz,prec_charge,mz_values,intensity_values,order
16055,20544,50.001946,473.2245,2,"[101.01766967773438, 102.05557250976562, 110.0...","[32731, 2690, 8449, 1218, 2323, 34334, 22946, ...",ms2
16056,20545,50.00448,691.008118,3,"[101.07112121582031, 102.05569458007812, 110.0...","[1445, 2576, 2548, 1807, 2375, 5065, 2100, 183...",ms2
16057,20546,50.006332,695.341797,3,"[101.07183837890625, 102.05586242675781, 110.0...","[12392, 18667, 8993, 1861, 1604, 4179, 1997, 1...",ms2
16058,20547,50.008876,946.916992,2,"[101.07193756103516, 102.05623626708984, 103.2...","[4184, 1701, 950, 1233, 5056, 2468, 1291, 4600...",ms2
16059,20548,50.011451,1298.466431,3,"[110.99449920654297, 112.05134582519531, 126.0...","[1422, 1988, 51974, 2238, 2081, 23566, 295747,...",ms2


In [22]:
# let's plot the MS2 spectra acquired at 50.001946 min for the precursor m/z = 473.224500
scan = 20544
plot_mass_spectra(
    mz_values=df_ms2_phospho[df_ms2_phospho.scan == scan].mz_values.values[0], 
    intensity_values=df_ms2_phospho[df_ms2_phospho.scan == scan].intensity_values.values[0],
    title=f"MS2 DDA spectrum for precursor m/z={df_ms2_phospho[df_ms2_phospho.scan == scan].prec_mono_mz.values[0]} and charge={df_ms2_phospho[df_ms2_phospho.scan == scan].prec_charge.values[0]}."
).show(config=utils.config)

## Fig 2 G: Precursor/fragments elution profile 

For this figure we used the Bruker raw file is used from the [Project PXD017703 from ProteomeXchange](https://www.ebi.ac.uk/pride/archive/projects/PXD017703). as example data.

To read the raw TIMS-TOF data we use a recently published [AlphaTims package](https://github.com/MannLabs/alphatims).

An example is the following peptide:

> the information about the peptide was taken from the pyprophet_export_60SPD.tsv file.

* sequence: "IIIPEIQK"
* charge: 2
* m/z: 477.3051, Th
* im: 0.86, 1/K0
* rt: 703.802, seconds

![image](https://i.gyazo.com/a66e3676aa75dff9834d4acee7538b94.png)

In [4]:
peptide_info = {
    "sequence": "IIIPEIQK",
    "charge": 2,
    "mz": 477.3051,
    "im": 0.86,
    "rt": 703.802,
    "fragments": {
        "y8": 953.60304,
        "y7": 840.51897,
        "y6": 727.43491,
        "y5": 614.35085,
        "y4": 517.29808,
        "y3": 388.25549,
        "y2": 275.17143,
        "y1": 147.11285,
        "b1": 114.09139,
        "b2": 227.17545,
        "b3": 340.25951,
        "b4": 437.31228,
        "b5": 566.35487,
        "b6": 679.43893,
        "b7": 807.49751,
        "b8": 935.59247
    }      
}

In [5]:
# this plotting function is taken from the AlphaViz package (https://github.com/MannLabs/alphaviz) and modified

def plot_line(
    timstof_data,
    selected_indices,
    label: str,
    x_axis_label: str,
    y_axis_label: str = "intensity",
    remove_zeros: bool = False,
    trim: bool = True,
):
    axis_dict = {
        "mz": "m/z, Th",
        "rt": "RT, min",
        "mobility": "Inversed IM, V·s·cm\u207B\u00B2",
        "intensity": "Intensity",
    }
    x_axis_label = axis_dict[x_axis_label]
    y_axis_label = axis_dict[y_axis_label]
    labels = {
        'm/z, Th': "mz_values",
        'RT, min': "rt_values",
        'Inversed IM, V·s·cm\u207B\u00B2': "mobility_values",
    }
    x_dimension = labels[x_axis_label]
    intensities = timstof_data.bin_intensities(selected_indices, [x_dimension])
    if x_dimension == "mz_values":
        x_ticks = timstof_data.mz_values
        plot_title = f"Spectrum"
    elif x_dimension == "mobility_values":
        x_ticks = timstof_data.mobility_values
        plot_title = f"Mobilogram"
    elif x_dimension == "rt_values":
        x_ticks = timstof_data.rt_values / 60
        plot_title = f"XIC"
    non_zeros = np.flatnonzero(intensities)
    if len(non_zeros) == 0:
        x_ticks = np.empty(0, dtype=x_ticks.dtype)
        intensities = np.empty(0, dtype=intensities.dtype)
    else:
        if remove_zeros:
            x_ticks = x_ticks[non_zeros]
            intensities = intensities[non_zeros]
        elif trim:
            start = max(0, non_zeros[0] - 1)
            end = non_zeros[-1] + 2
            x_ticks = x_ticks[start: end]
            intensities = intensities[start: end]

    trace = go.Scatter(
        x=x_ticks,
        y=intensities,
        mode='lines',
        text = [f'{x_axis_label}'.format(i + 1) for i in range(len(x_ticks))],
        hovertemplate='<b>%{text}:</b> %{x};<br><b>Intensity:</b> %{y}.',
        name=label
    )
    return trace

In [14]:
# this plotting function is taken from the AlphaViz package (https://github.com/MannLabs/alphaviz) and modified

def plot_elution_profile(
    timstof_data,
    peptide_info,
    mz_tol: int = 50,
    rt_tol: int = 30,
    im_tol: int = 0.05,
    x_axis_label: str = "rt",
    y_axis_label: str = "intensity",
    title: str = "",
    width: int = 900,
    height: int = 400
):
    fig = go.Figure()
    
    rt_slice = slice(peptide_info['rt'] - rt_tol, peptide_info['rt'] + rt_tol)
    im_slice = slice(peptide_info['im'] - im_tol, peptide_info['im'] + im_tol)
    prec_mz_slice = slice(peptide_info['mz'] / (1 + mz_tol / 10**6), peptide_info['mz'] * (1 + mz_tol / 10**6))
    
    # create an elution profile for the precursor
    precursor_indices = timstof_data[
        rt_slice,
        im_slice,
        0,
        prec_mz_slice,
        'raw'
    ]
    fig.add_trace(
        plot_line(bruker_raw_data, precursor_indices, x_axis_label='rt', remove_zeros=True, label = 'precursor')
    )
    
    # create elution profiles for the all fragments
    for frag, frag_mz in peptide_info['fragments'].items():
        fragment_data_indices = bruker_raw_data[
            rt_slice,
            im_slice,
            prec_mz_slice,
            slice(frag_mz / (1 + mz_tol / 10**6), frag_mz * (1 + mz_tol / 10**6)),
            'raw'
        ]
        if len(fragment_data_indices) > 0:
            fig.add_trace(
                plot_line(bruker_raw_data, fragment_data_indices, x_axis_label='rt', remove_zeros=True, label = frag)
            )
    
    fig.update_layout(
        title=dict(
            text=title,
            font=dict(
                size=16,
            ),
            x=0.5,
            xanchor='center',
            yanchor='top'
        ),
        xaxis=dict(
            title=x_axis_label,
            titlefont_size=14,
            tickmode = 'auto',
            tickfont_size=14,
        ),
        yaxis=dict(
            title=y_axis_label
        ),
        template = "plotly_white", 
        width=width,
        height=height,
        hovermode="x unified",
        showlegend=True
    )
    return fig

In [16]:
plot_elution_profile(
    bruker_raw_data, 
    peptide_info,
    title='Precursor/fragments elution profile'
).show(config=utils.config)