In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
raw_file = "/Users/wenfengzeng/data/multinorch/20240203_Ecl1_Evo08_11p5min_TiHe_SA_H032_E14_B5.raw"
from alpharaw.thermo import ThermoRawData

raw_data = ThermoRawData(
    process_count=1,
    auxiliary_items=["multinotch"],
)
raw_data.import_raw(raw_file)
raw_data.spectrum_df

Unnamed: 0,spec_idx,peak_start_idx,peak_stop_idx,rt,precursor_mz,precursor_charge,isolation_lower_mz,isolation_upper_mz,ms_level,nce,multinotch
0,0,0,2864,4.108146,452.239471,0,452.039471,452.439471,2,30.0,"[[452.03947143256664, 452.4394714385271], [457..."
1,1,2864,5606,4.116757,452.239471,0,452.039471,452.439471,2,30.0,"[[452.03947143256664, 452.4394714385271], [457..."
2,2,5606,8357,4.125367,452.239471,0,452.039471,452.439471,2,30.0,"[[452.03947143256664, 452.4394714385271], [457..."
3,3,8357,11260,4.133977,452.239471,0,452.039471,452.439471,2,30.0,"[[452.03947143256664, 452.4394714385271], [457..."
4,4,11260,13770,4.142586,452.239471,0,452.039471,452.439471,2,30.0,"[[452.03947143256664, 452.4394714385271], [457..."
...,...,...,...,...,...,...,...,...,...,...,...
665,665,1541227,1542980,10.599350,770.464478,0,770.264478,770.664478,2,30.0,"[[770.2644775360823, 770.6644775420427], [775...."
666,666,1542980,1544857,10.608726,770.464478,0,770.264478,770.664478,2,30.0,"[[770.2644775360823, 770.6644775420427], [775...."
667,667,1544857,1546730,10.618007,770.464478,0,770.264478,770.664478,2,30.0,"[[770.2644775360823, 770.6644775420427], [775...."
668,668,1546730,1548591,10.627298,770.464478,0,770.264478,770.664478,2,30.0,"[[770.2644775360823, 770.6644775420427], [775...."


In [3]:
spec_df = raw_data.spectrum_df
peak_df = raw_data.peak_df

In [4]:
import pandas as pd
import alphabase.peptide.precursor as precursur_func
psm_df = pd.DataFrame({
    "sequence": ["MGALGLEGR"],
    "charge": [2],
    "mods": "", 
    "mod_sites": "",
    "rt": 4.3
})
precursur_func.calc_precursor_mz(psm_df)
precursur_func.calc_precursor_isotope(psm_df)

Unnamed: 0,sequence,charge,mods,mod_sites,rt,nAA,precursor_mz,i_0,i_1,i_2,i_3,i_4,i_5,mono_isotope_idx
0,MGALGLEGR,2,,,4.3,9,452.23947,0.590296,0.273583,0.101809,0.027392,0.005874,0.001045,0


In [5]:
from alpharaw.viz.psm_plot import PSM_Plot
from alpharaw.viz.xic_plot import XIC_Plot
from alpharaw.viz.psm_plot import plot_multi_spectra, plot_multi_psms
from alpharaw.viz.df_utils import make_psm_plot_df_for_peptide, make_query_plot_df_for_peptide
import numpy as np

xic_plotter = XIC_Plot()
psm_plotter = PSM_Plot()

def plot_xic(
    spec_df, peak_df,
    query_masses, rt_sec,
    query_ion_names,
    precursor_mz,
    ms1_mz_tol = 20,
    title="",
):
    if precursor_mz > 0:
        precursor_left_mz = precursor_mz*(1-ms1_mz_tol*1e-6)
        precursor_right_mz = precursor_mz*(1+ms1_mz_tol*1e-6)
    else:
        precursor_left_mz = -1.0
        precursor_right_mz = -1.0
    return xic_plotter.plot_query_masses(
        spec_df, peak_df,
        query_masses,
        query_ion_names=query_ion_names,
        query_rt_sec=rt_sec, 
        precursor_left_mz=precursor_left_mz,
        precursor_right_mz=precursor_right_mz,
        query_intensities=None,
        title=title,
    )

def plot_psm_xic(
    spec_df, peak_df,
    sequence, mods, mod_sites, charge,
    rt_sec, include_isotopes=False
):
    plot_df = make_query_plot_df_for_peptide(
        sequence, mods, mod_sites, charge, rt_sec,
        ms_level=2, include_precursor_isotopes=include_isotopes,
    )
    return plot_df, xic_plotter.plot(
        spec_df, peak_df,
        plot_df, title=plot_df.modified_sequence.values[0]
    )

def plot_one_psm(
    peak_df,
    sequence, mods, mod_sites, charge,
):
    plot_df = make_psm_plot_df_for_peptide(
        peak_df.mz.values,
        peak_df.intensity.values,
        sequence, mods, mod_sites, charge
    )
    return psm_plotter.plot(
        plot_df, sequence, 
        plot_df.modified_sequence.values[0],
        plot_unmatched_peaks=True
    )

def plot_psms(
    peak_df_list,
    sequence, mods, mod_sites, charge,
    plot_height=800
):
    return plot_multi_psms(
        [_df.mz.values for _df in peak_df_list],
        [_df.intensity.values for _df in peak_df_list],
        sequence, mods, mod_sites, charge, 
        plot_height=plot_height
    )

def plot_matches(
    peak_df_list,
    query_masses, 
    ion_names,
    plot_height = 800
):
    spec_masses_list = []
    spec_intens_list = []
    for i in range(len(peak_df_list)):
        spec_masses = peak_df_list[i].mz.values
        _slices = (spec_masses>=query_masses.min()-3)&(spec_masses<=query_masses.max()+3)
        spec_masses_list.append(spec_masses[_slices])
        spec_intens_list.append(peak_df_list[i].intensity.values[_slices])
    return plot_multi_spectra(
        spec_masses_list, spec_intens_list, 
        query_masses, query_ion_names=ion_names,
        query_mass_tols=query_masses*10*1e-6,
        title="", plot_height=plot_height
    )

def find_neibor_spec_idxes(
    spec_df, rt_sec, 
    precursor_mz, n
):
    spec_idx = np.searchsorted(spec_df.rt.values, rt_sec/60)
    left_idxes = []
    right_idxes = []
    for i in range(spec_idx, -1, -1):
        if len(left_idxes) > (n//2 if n%2 else n//2-1): break
        if (
            spec_df.isolation_lower_mz.values[i]<=precursor_mz
            and spec_df.isolation_upper_mz.values[i]>= precursor_mz
        ):
            left_idxes.append(i)
    for i in range(spec_idx+1, len(spec_df)):
        if len(right_idxes) >= n//2: break
        if (
            spec_df.isolation_lower_mz.values[i]<=precursor_mz
            and spec_df.isolation_upper_mz.values[i]>= precursor_mz
        ):
            right_idxes.append(i)
    return sorted(left_idxes+right_idxes)

def get_peak_df_list(
    spec_df, peak_df, 
    rt_sec, precursor_mz,
    n = 1
):
    peak_df_list = []
    spec_idxes = find_neibor_spec_idxes(
        spec_df, rt_sec, precursor_mz, n
    )
    for i in spec_idxes:
        start = spec_df.peak_start_idx.values[i]
        stop = spec_df.peak_stop_idx.values[i]
        peak_df_list.append(peak_df.iloc[start:stop,:])
    return peak_df_list

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [6]:
i_psm = 0
psm_rt_sec = psm_df.rt.values[i_psm]*60
psm_mz = psm_df.precursor_mz.values[i_psm]
psm_seq = psm_df.sequence.values[i_psm]
psm_mods = psm_df.mods.values[i_psm]
psm_sites = psm_df.mod_sites.values[i_psm]
psm_charge = psm_df.charge.values[i_psm]
psm_rt_sec

258.0

In [7]:
plot_df, fig = plot_psm_xic(
    spec_df, peak_df, 
    psm_seq, psm_mods, psm_sites, psm_charge,
    psm_rt_sec, include_isotopes=True
)

In [8]:
xic_plotter.get_peak_area(
    spec_df, peak_df, plot_df
)

Unnamed: 0,mz,type,loss_type,charge,number,fragment_site,ion_name,sequence,mods,mod_sites,...,rt_sec,precursor_mz,precursor_i_0,precursor_i_1,precursor_i_2,precursor_i_3,precursor_i_4,precursor_i_5,precursor_mono_idx,peak_area
0,132.04776,98,0.0,1.0,1.0,0,b1+,MGALGLEGR,,,...,258.0,452.23947,0.590296,0.273583,0.101809,0.027392,0.005874,0.001045,0,17355.93
1,772.431152,121,0.0,1.0,8.0,0,y8+,MGALGLEGR,,,...,258.0,452.23947,0.590296,0.273583,0.101809,0.027392,0.005874,0.001045,0,963119.5
2,386.719238,121,0.0,2.0,8.0,0,y8++,MGALGLEGR,,,...,258.0,452.23947,0.590296,0.273583,0.101809,0.027392,0.005874,0.001045,0,903.0115
3,189.069229,98,0.0,1.0,2.0,1,b2+,MGALGLEGR,,,...,258.0,452.23947,0.590296,0.273583,0.101809,0.027392,0.005874,0.001045,0,43971740.0
4,715.409729,121,0.0,1.0,7.0,1,y7+,MGALGLEGR,,,...,258.0,452.23947,0.590296,0.273583,0.101809,0.027392,0.005874,0.001045,0,267663.5
5,358.208496,121,0.0,2.0,7.0,1,y7++,MGALGLEGR,,,...,258.0,452.23947,0.590296,0.273583,0.101809,0.027392,0.005874,0.001045,0,12957.4
6,260.106354,98,0.0,1.0,3.0,2,b3+,MGALGLEGR,,,...,258.0,452.23947,0.590296,0.273583,0.101809,0.027392,0.005874,0.001045,0,43988160.0
7,130.556808,98,0.0,2.0,3.0,2,b3++,MGALGLEGR,,,...,258.0,452.23947,0.590296,0.273583,0.101809,0.027392,0.005874,0.001045,0,0.0
8,644.37262,121,0.0,1.0,6.0,2,y6+,MGALGLEGR,,,...,258.0,452.23947,0.590296,0.273583,0.101809,0.027392,0.005874,0.001045,0,589820.0
9,322.689941,121,0.0,2.0,6.0,2,y6++,MGALGLEGR,,,...,258.0,452.23947,0.590296,0.273583,0.101809,0.027392,0.005874,0.001045,0,34604.77


In [9]:
one_peak_df = get_peak_df_list(
    spec_df, peak_df, psm_rt_sec, psm_mz, n=1
)[0]
fig = plot_one_psm(
    one_peak_df, psm_seq, psm_mods, psm_sites, psm_charge,
)

In [10]:
fig = plot_psms(
    get_peak_df_list(
        spec_df, peak_df,
        psm_rt_sec, 
        psm_mz,
        n=3
    ),
    psm_seq, psm_mods, psm_sites, psm_charge,
    plot_height=800
)