# Figure 2: MS2/Identification visualization

## Library imports

In [54]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler

import plotly.graph_objects as go

import utils
import alphatims.bruker

## Fig 2 A: MS2 DDA spectra 

For this figure we used the [PXD012867 PRIDE project](https://www.ebi.ac.uk/pride/archive/projects/PXD012867) as example data.

In [55]:
ms2_dda_path = '.../Data/PXD012867_yeast_project/20190124_QX3_JuSc_SA_JS7_1_wt_4h_1.raw'

In [58]:
from pyrawfilereader import RawFileReader
ms2_dda_data = utils.load_thermo_raw(ms2_dda_path)

ImportError: DLL load failed while importing clr: Not enough memory resources are available to process this command.

In [50]:
mass_dict = utils.get_mass_dict()

mass_dict["A"] = 71.0371138
mass_dict["C"] = 103.0091845
mass_dict["D"] = 115.0269431
mass_dict["E"] = 129.0425931
mass_dict["F"] = 147.0684139
mass_dict["G"] = 57.02146373
mass_dict["H"] = 137.0589119
mass_dict["I"] = 113.084064
mass_dict["K"] = 128.094963
mass_dict["L"] = 113.084064
mass_dict["M"] = 131.0404846
mass_dict["N"] = 114.0429275
mass_dict["P"] = 97.05276386
mass_dict["Q"] = 128.0585775
mass_dict["R"] = 156.101111
mass_dict["S"] = 87.03202843
mass_dict["T"] = 101.0476785
mass_dict["U"] = 150.9536333957
mass_dict["V"] = 99.06841392
mass_dict["W"] = 186.079313
mass_dict["Y"] = 163.0633286
mass_dict["cC"] = 160.03064823
mass_dict["oxM"] = 147.03539923000002
mass_dict["aA"] = 113.04767849000001
mass_dict["aC"] = 145.01974919
mass_dict["aD"] = 157.03750779
mass_dict["aE"] = 171.05315779
mass_dict["aF"] = 189.07897859
mass_dict["aG"] = 99.03202842
mass_dict["aH"] = 179.06947659
mass_dict["aI"] = 155.09462869
mass_dict["aK"] = 170.10552769
mass_dict["aL"] = 155.09462869
mass_dict[

In [None]:
frag_masses, frag_type = utils.get_fragmass(parsed_pep=list('AAAITSDILEALGR'), mass_dict=mass_dict)

In [None]:
{(f"b{key}" if key>0 else f"y{-key}"):value for key,value in zip(frag_type, frag_masses)}

In [114]:
# this plotting function is taken from the AlphaViz package (https://github.com/MannLabs/alphaviz) and modified
def plot_mass_spectra(
    data: pd.DataFrame,
    title: str,
    predicted: tuple = (),
    spectrum_color: str = 'grey',
    b_ion_color: str = 'blue',
    y_ion_color: str = 'red',
    template: str = "plotly_white",
    spectrum_line_width: float = 1.5,
    height: int = 520
):
    fig = go.Figure()
    
    fig.add_trace(
        go.Scatter(
            x=data[data.ions == ''].mz_values,
            y=data[data.ions == ''].intensity_values,
            mode='markers',
            marker=dict(color=spectrum_color, size=1),
            hovertext=data[data.ions == '-'].ions,
            hovertemplate='<b>m/z:</b> %{x};<br><b>Intensity:</b> %{y};<br><b>Ion:</b> %{hovertext}.',
            name='',
            showlegend=False
        )
    )
    # y-ions
    data_y_ions = data[data.ions.str.contains('y')]
    fig.add_trace(
        go.Scatter(
            x=data_y_ions.mz_values,
            y=data_y_ions.intensity_values,
            mode='markers',
            marker=dict(color=y_ion_color, size=1),
            hovertext=data_y_ions.ions,
            hovertemplate='<b>m/z:</b> %{x};<br><b>Intensity:</b> %{y};<br><b>Ion:</b> %{hovertext}.',
            name='',
            showlegend=False
        )
    )
    # b-ions
    data_b_ions = data[data.ions.str.contains('b')]
    fig.add_trace(
        go.Scatter(
            x=data_b_ions.mz_values,
            y=data_b_ions.intensity_values,
            mode='markers',
            marker=dict(color=b_ion_color, size=1),
            hovertext=data_b_ions.ions,
            hovertemplate='<b>m/z:</b> %{x};<br><b>Intensity:</b> %{y};<br><b>Ion:</b> %{hovertext}.',
            name='',
            showlegend=False
        )
    )
   
    if predicted:
        fig.add_trace(
            go.Scatter(
                x=predicted[0],
                y=predicted[1],
                mode='markers',
                hovertext=predicted[2],
                marker=dict(color='lightblue', size=3),
                hovertemplate='<b>m/z:</b> %{x};<br><b>Intensity:</b> %{y};<br><b>Ion:</b> %{hovertext}.',
                name='',
                showlegend=False
            )
        )
        
    # Use the 'shapes' attribute from the layout to draw the vertical lines
    fig.update_layout(
        template=template,
        xaxis=dict(
            title='m/z, Th',
            mirror=True
        ),
        yaxis=dict(
            title='Intensity',
        ),
        legend=dict(
            orientation="h",
            x=1,
            xanchor="right",
            yanchor="bottom",
            y=1.01
        ),
        hovermode="closest",
        height=height,
        title=dict(
            text=title,
            font=dict(
                size=16,
            ),
            x=0.5,
            xanchor='center',
            yanchor='top'
        ),
        shapes=[
            dict(
                type='line',
                xref='x',
                yref='y',
                x0=data.loc[i, 'mz_values'],
                y0=0,
                x1=data.loc[i, 'mz_values'],
                y1=data.loc[i, 'intensity_values'],
                line=dict(
                    color = b_ion_color if 'b' in data.loc[i, 'ions'] else \
                    (y_ion_color if 'y' in data.loc[i, 'ions'] else spectrum_color),
                    width=spectrum_line_width
                )
            ) for i in data.index
        ],
    )
    
    if predicted:
        combined_mz = list(data.loc[i, 'mz_values']) + list(predicted[0])
        combined_int = list(data.loc[i, 'intensity_values']) + list(predicted[1])
        fig.update_layout(
            shapes=[
                dict(
                    type='line',
                    xref='x',
                    yref='y',
                    x0=combined_mz[i],
                    y0=0,
                    x1=combined_mz[i],
                    y1=combined_int[i],
                    line=dict(
                        color = 'lightblue' if i > len(list(data.loc[i, 'mz_values'])) else \
                        (b_ion_color if 'b' in data.loc[i, 'ions'] else \
                        (y_ion_color if 'y' in data.loc[i, 'ions'] else spectrum_color)),
                        width=spectrum_line_width
                    )
                ) for i, val in enumerate(zip(combined_mz, combined_int))
            ],
            yaxis=dict(
                title='Relative intensity, %',
            ),
        )
    return fig

In [None]:
df_ms2 = pd.DataFrame({'scan': ms2_dda_data['scan_list_ms2'], 
                    'RT': ms2_dda_data['rt_list_ms2'],
                    'prec_mono_mz': ms2_dda_data['mono_mzs2'],
                    'prec_charge': ms2_dda_data['charge_ms2'],
                    'mz_values': ms2_dda_data['mass_list_ms2'],
                   'intensity_values': ms2_dda_data['int_list_ms2'],
                    'order': 'ms2'})
df_ms2[df_ms2.RT > 50].head()

In [None]:
# let's plot the MS2 spectra acquired at 50.0019 min for the precursor m/z = 590.2973
plot_mass_spectra(
    mz_values=df_ms2[df_ms2.scan == 61885].mz_values.values[0], 
    intensity_values=df_ms2[df_ms2.scan == 61885].intensity_values.values[0],
    title=f"MS2 DDA spectrum for precursor m/z={df_ms2[df_ms2.scan == 61885].prec_mono_mz.values[0]} and charge={df_ms2[df_ms2.scan == 61885].prec_charge.values[0]}."
).show(config=utils.config)

## Fig 2 B: MS2 DIA spectra 

For this figure we used the Bruker raw file from the [Project PXD017703 from ProteomeXchange](https://www.ebi.ac.uk/pride/archive/projects/PXD017703). as example data.

To read the raw TIMS-TOF data we use a recently published [AlphaTims package](https://github.com/MannLabs/alphatims).

In [2]:
bruker_file_path = '../Data/PXD017703_diaPASEF/20200428_Evosep_60SPD_SG06-16_MLHeLa_200ng_py8_S3-A6_1_2452.d'

In [7]:
bruker_raw_data = alphatims.bruker.TimsTOF(bruker_file_path)

100%|███████████████████████████████████████████████████████████████████████████| 11872/11872 [00:19<00:00, 611.17it/s]


In [105]:
# extract the MS2 DIA spectrum from the frame 700 (10.560 min) and m/z window: 925-950 Th
peptide_data = bruker_raw_data[7000, :, slice(925., 950.)]
print(peptide_data.shape)
peptide_data['ions'] = ''
peptide_data.head()

(11487, 12)


Unnamed: 0,raw_indices,frame_indices,scan_indices,precursor_indices,push_indices,tof_indices,rt_values,mobility_values,quad_low_mz_values,quad_high_mz_values,mz_values,intensity_values,ions
0,383964266,7000,33,6,6433033,110090,740.576886,1.601114,925.0,950.0,338.377137,9,
1,383964267,7000,35,6,6433035,272188,740.576886,1.598886,925.0,950.0,969.000476,9,
2,383964268,7000,35,6,6433035,331062,740.576886,1.598886,925.0,950.0,1278.324787,9,
3,383964269,7000,36,6,6433036,229418,740.576886,1.597771,925.0,950.0,771.114018,9,
4,383964270,7000,36,6,6433036,328786,740.576886,1.597771,925.0,950.0,1265.571739,9,


Here we should be able to annotate the peptide TLASENIPSLPPGGELASK with the charge 2, m/z=941.0018, rt=740.551.

In [106]:
frag_masses, frag_type = utils.get_fragmass(parsed_pep=list('TLASENIPSLPPGGELASK'), mass_dict=mass_dict)
predicted_ions = {(f"b{key}" if key>0 else f"y{-key}"):value for key,value in zip(frag_type, frag_masses)}

In [109]:
mz_tol = 10 #ppm
for fr_type, fr_mass in predicted_ions.items():
    fr_mass_low, fr_mass_high = fr_mass / (1 + mz_tol / 10**6), fr_mass * (1 + mz_tol / 10**6)
    peptide_data.loc[(peptide_data.mz_values >= fr_mass_low) & (peptide_data.mz_values <= fr_mass_high), 'ions'] = fr_type

In [110]:
peptide_data.ions.unique()

array(['', 'b9', 'b11', 'b14', 'y7', 'b7', 'y12', 'b6', 'y3', 'y14'],
      dtype=object)

In [118]:
plot_mass_spectra(
    data=peptide_data.sort_values('ions'), 
    title='MS2 DIA spectrum for the RT = 10.560 min and m/z window = 925-950 Th with the peptide TLASENIPSLPPGGELASK.'
)

## Fig 2 C: Phospho MS2 spectra 

For this figure we used the [PXD010697 PRIDE project](https://www.ebi.ac.uk/pride/archive/projects/PXD010697) as example data.

In [None]:
phospho_path = '../Data/PXD010697_circadian_clock/20170123_Qep6_ChRo_SA_collab_SYN_CT_phospho_1.raw'

In [None]:
phospho_data = utils.load_thermo_raw(phospho_path)

In [None]:
df_ms2_phospho = pd.DataFrame({'scan': phospho_data['scan_list_ms2'], 
                    'RT': phospho_data['rt_list_ms2'],
                    'prec_mono_mz': phospho_data['mono_mzs2'],
                    'prec_charge': phospho_data['charge_ms2'],
                    'mz_values': phospho_data['mass_list_ms2'],
                   'intensity_values': phospho_data['int_list_ms2'],
                    'order': 'ms2'})
df_ms2_phospho[df_ms2_phospho.RT > 50].head()

To look at the MS2 spectra of the phosphorylated precursor, we took an example from the `peptides.txt` and `Phospho (STY)Sites.txt` MQ output files.
For this we'd like to look at the peptide `AASSEAAPHHQPPPESR`:
- with scan number = 7901
- from protein group #47 with the leading protein Q62407 (Striated muscle-specific serine/threonine-protein kinase)
- id = 606
- 2 possible phosphosites with the localization probabilities: AAS(0.968)S(0.032)EAAPHHQPPPESR
- charge = 3 
- mass = 1767.8289

In [None]:
scan = 7901
plot_mass_spectra(
    mz_values=df_ms2_phospho[df_ms2_phospho.scan == scan].mz_values.values[0], 
    intensity_values=df_ms2_phospho[df_ms2_phospho.scan == scan].intensity_values.values[0],
    title=f"MS2 DDA spectrum for precursor m/z={df_ms2_phospho[df_ms2_phospho.scan == scan].prec_mono_mz.values[0]} and charge={df_ms2_phospho[df_ms2_phospho.scan == scan].prec_charge.values[0]}."
).show(config=utils.config)

## Fig 2 F: Mirrored spectra 

For this figure we used the raw file from the [Project  from ProteomeXchange](https://www.ebi.ac.uk/pride/archive/projects/). as example data.

In [None]:
mirrored_path = "D:/Holoviz_project/Data_test_case/Cancer_adenoma_for_testing/20191028_QX6_LiSc_FFPE_Covaris_Experiment4_7B.raw"

In [None]:
mirrored_data = utils.load_thermo_raw(mirrored_path)

In [None]:
df_ms2_mirrored = pd.DataFrame({'scan': mirrored_data['scan_list_ms2'], 
                    'RT': mirrored_data['rt_list_ms2'],
                    'prec_mono_mz': mirrored_data['mono_mzs2'],
                    'prec_charge': mirrored_data['charge_ms2'],
                    'mz_values': mirrored_data['mass_list_ms2'],
                   'intensity_values': mirrored_data['int_list_ms2'],
                    'order': 'ms2'})

Let's extract the information for the identified peptide **AAAITSDILEALGR** of **Periostin** protein with 
* m/z = 700.8901978, 
* charge = 2,
* CE = 27.

In [None]:
peptide = {
    'sequence': 'AAAITSDILEALGR',
    'm/z': 700.8901978,
    'charge': 2,
    'CE': 27
}

In [None]:
# let's extract the information for the identified peptide AAAITSDILEALGR using the # of spectrum from the evidence.txt 
scan = 117845
df_ms2_mirrored[df_ms2_mirrored.scan == scan]

The mirrored predicted MS2 spectrum was generated with [Prosit_2020_intensity_hcd](https://doi.org/10.1038/s41592-019-0426-7) model using the platform https://www.proteomicsdb.org/prosit/ for submitting the job. For this the peptide was saved into .csv file with the amino acid sequence, CE and charge.

In [None]:
pd.DataFrame.from_dict(data={
    'modified_sequence': [peptide['sequence']], 
    'collision_energy': [peptide['CE']], 
    'precursor_charge': [peptide['charge']]
}).to_csv('prediction.csv', index=False)

When you get the predicted file, you can upload it here and extract the predicted fragment ions and their intensities.

In [None]:
path_to_prediction = '../Data/PXD012867_yeast_project/myPrositLib.csv'
predicted_df = pd.read_csv(
    path_to_prediction, 
    usecols=['RelativeIntensity', 'FragmentMz', 'StrippedPeptide', 'FragmentNumber', 'FragmentType', 'FragmentCharge']
)
predicted_df = predicted_df[(predicted_df.StrippedPeptide == peptide['sequence']) & (predicted_df.RelativeIntensity > 0)]
predicted_df.RelativeIntensity *= -100
predicted_df['ions'] = predicted_df.apply(lambda x: f"{x.FragmentType}{x.FragmentNumber}_+{x.FragmentCharge}", axis=1)
predicted_df.head()

To visualize the mirrored plot, we need to get the Relative abundance(%) of the ions instead of Absolute intensity. For this let's use the MinMaxScaler from Sklearn.

In [None]:
mz_values = df_ms2_mirrored[df_ms2_mirrored.scan == scan].mz_values.values[0]
intensity_values = df_ms2_mirrored[df_ms2_mirrored.scan == scan].intensity_values.values[0].reshape(-1, 1)
relat_intensity_values = MinMaxScaler().fit_transform(intensity_values).reshape(1, -1)[0] * 100

In [None]:
plot_mass_spectra(
    mz_values=mz_values, 
    intensity_values=relat_intensity_values,
    predicted=(predicted_df.FragmentMz, predicted_df.RelativeIntensity, predicted_df.ions),
    title=f"Experimental and predicted spectrum for peptide AAAITSDILEALGR with m/z={df_ms2_mirrored[df_ms2_mirrored.scan == scan].prec_mono_mz.values[0]} and charge={df_ms2_mirrored[df_ms2_mirrored.scan == scan].prec_charge.values[0]}."
).show(config=utils.config)

## Fig 2 G: Precursor/fragments elution profile 

For this figure we used the Bruker raw file from the [Project PXD017703 from ProteomeXchange](https://www.ebi.ac.uk/pride/archive/projects/PXD017703). as example data.

To read the raw TIMS-TOF data we use a recently published [AlphaTims package](https://github.com/MannLabs/alphatims).

An example is the following peptide:

> the information about the peptide was taken from the pyprophet_export_60SPD.tsv file.

* sequence: "IIIPEIQK"
* charge: 2
* m/z: 477.3051, Th
* im: 0.86, 1/K0
* rt: 703.802, seconds

![image](https://i.gyazo.com/a66e3676aa75dff9834d4acee7538b94.png)

In [None]:
peptide_info = {
    "sequence": "IIIPEIQK",
    "charge": 2,
    "mz": 477.3051,
    "im": 0.86,
    "rt": 703.802      
}

In [None]:
frag_masses, frag_type = get_fragmass(parsed_pep=list(peptide_info['sequence']), mass_dict=mass_dict)
peptide_info['fragments'] = {(f"b{key}" if key>0 else f"y{-key}"):value for key,value in zip(frag_type, frag_masses)}

In [None]:
# this plotting function is taken from the AlphaViz package (https://github.com/MannLabs/alphaviz) and modified

def plot_line(
    timstof_data,
    selected_indices,
    label: str,
    x_axis_label: str,
    y_axis_label: str = "intensity",
    remove_zeros: bool = False,
    trim: bool = True,
):
    axis_dict = {
        "mz": "m/z, Th",
        "rt": "RT, min",
        "mobility": "Inversed IM, V·s·cm\u207B\u00B2",
        "intensity": "Intensity",
    }
    x_axis_label = axis_dict[x_axis_label]
    y_axis_label = axis_dict[y_axis_label]
    labels = {
        'm/z, Th': "mz_values",
        'RT, min': "rt_values",
        'Inversed IM, V·s·cm\u207B\u00B2': "mobility_values",
    }
    x_dimension = labels[x_axis_label]
    intensities = timstof_data.bin_intensities(selected_indices, [x_dimension])
    if x_dimension == "mz_values":
        x_ticks = timstof_data.mz_values
        plot_title = f"Spectrum"
    elif x_dimension == "mobility_values":
        x_ticks = timstof_data.mobility_values
        plot_title = f"Mobilogram"
    elif x_dimension == "rt_values":
        x_ticks = timstof_data.rt_values / 60
        plot_title = f"XIC"
    non_zeros = np.flatnonzero(intensities)
    if len(non_zeros) == 0:
        x_ticks = np.empty(0, dtype=x_ticks.dtype)
        intensities = np.empty(0, dtype=intensities.dtype)
    else:
        if remove_zeros:
            x_ticks = x_ticks[non_zeros]
            intensities = intensities[non_zeros]
        elif trim:
            start = max(0, non_zeros[0] - 1)
            end = non_zeros[-1] + 2
            x_ticks = x_ticks[start: end]
            intensities = intensities[start: end]

    trace = go.Scatter(
        x=x_ticks,
        y=intensities,
        mode='lines',
        text = [f'{x_axis_label}'.format(i + 1) for i in range(len(x_ticks))],
        hovertemplate='<b>%{text}:</b> %{x};<br><b>Intensity:</b> %{y}.',
        name=label
    )
    return trace

In [None]:
# this plotting function is taken from the AlphaViz package (https://github.com/MannLabs/alphaviz) and modified

def plot_elution_profile(
    timstof_data,
    peptide_info,
    mz_tol: int = 50,
    rt_tol: int = 30,
    im_tol: int = 0.05,
    x_axis_label: str = "rt",
    y_axis_label: str = "intensity",
    title: str = "",
    width: int = 900,
    height: int = 400
):
    fig = go.Figure()
    
    rt_slice = slice(peptide_info['rt'] - rt_tol, peptide_info['rt'] + rt_tol)
    im_slice = slice(peptide_info['im'] - im_tol, peptide_info['im'] + im_tol)
    prec_mz_slice = slice(peptide_info['mz'] / (1 + mz_tol / 10**6), peptide_info['mz'] * (1 + mz_tol / 10**6))
    
    # create an elution profile for the precursor
    precursor_indices = timstof_data[
        rt_slice,
        im_slice,
        0,
        prec_mz_slice,
        'raw'
    ]
    fig.add_trace(
        plot_line(bruker_raw_data, precursor_indices, x_axis_label='rt', remove_zeros=True, label = 'precursor')
    )
    
    # create elution profiles for the all fragments
    for frag, frag_mz in peptide_info['fragments'].items():
        fragment_data_indices = bruker_raw_data[
            rt_slice,
            im_slice,
            prec_mz_slice,
            slice(frag_mz / (1 + mz_tol / 10**6), frag_mz * (1 + mz_tol / 10**6)),
            'raw'
        ]
        if len(fragment_data_indices) > 0:
            fig.add_trace(
                plot_line(bruker_raw_data, fragment_data_indices, x_axis_label='rt', remove_zeros=True, label = frag)
            )
    
    fig.update_layout(
        title=dict(
            text=title,
            font=dict(
                size=16,
            ),
            x=0.5,
            xanchor='center',
            yanchor='top'
        ),
        xaxis=dict(
            title=x_axis_label,
            titlefont_size=14,
            tickmode = 'auto',
            tickfont_size=14,
        ),
        yaxis=dict(
            title=y_axis_label
        ),
        template = "plotly_white", 
        width=width,
        height=height,
        hovermode="x unified",
        showlegend=True
    )
    return fig

In [None]:
plot_elution_profile(
    bruker_raw_data, 
    peptide_info,
    title='Precursor/fragments elution profile'
).show(config=utils.config)