In [None]:
#default_exp io.psm_reader.dia_search_reader

As we know so far, all DIA search engines have similar tabular files to MaxQaunt, so here, all DIAReaders are inherited from `MaxQauntReader`.

In [None]:
#export
import pandas as pd
import numpy as np

from alphabase.io.psm_reader.psm_reader import (
    psm_reader_provider
)

from alphabase.io.psm_reader.maxquant_reader import (
    MaxQuantReader
)

class SpectronautReader(MaxQuantReader):
    def __init__(self,
        *,
        column_mapping:dict = None,
        modification_mapping:dict = None,
        fdr = 0.01,
        keep_decoy = False,
        mod_sep = '[]',
        underscore_for_ncterm=True,
        fixed_C57 = False,
        mod_seq_column=[
            'ModifiedPeptide',
            'ModifiedSequence',
            'FullUniModPeptideName',
        ],
        csv_sep = '\t',
        **kwargs,
    ):
        super().__init__(
            column_mapping=column_mapping,
            modification_mapping=modification_mapping,
            fdr=fdr, keep_decoy=keep_decoy,
            mod_sep=mod_sep,
            underscore_for_ncterm=underscore_for_ncterm,
            fixed_C57=fixed_C57
        )
        self.csv_sep = csv_sep

        self._mod_seq_columns = mod_seq_column
        self.mod_seq_column = 'ModifiedPeptide'

    def _find_mod_seq_column(self, df):
        for mod_seq_col in self._mod_seq_columns:
            if mod_seq_col in df.columns:
                self.mod_seq_column = mod_seq_col
                break

    def _init_column_mapping(self):
        self.column_mapping = {
            'sequence': ['StrippedPeptide','PeptideSequence'],
            'charge': 'PrecursorCharge',
            'rt': ['RT','iRT','Tr_recalibrated','RetentionTime'],
            'rt_norm': 'rt_norm',
            'ccs': 'CCS',
            'mobility': ['Mobility','IonMobility'],
            'precursor_mz': 'mz',
            'proteins': 'Protein Name',
            'uniprot_ids': ['UniProtIds','UniProtID'],
            'genes': 'Genes',
        } 
    
    def _load_file(self, filename):
        df = pd.read_csv(filename, sep=self.csv_sep)
        self._find_mod_seq_column(df)
        if 'ReferenceRun' in df.columns:
            df.drop_duplicates([
                'ReferenceRun',self.mod_seq_column, 'PrecursorCharge'
            ], inplace=True)
        else:
            df.drop_duplicates([
                self.mod_seq_column, 'PrecursorCharge'
            ], inplace=True)
        df.reset_index(drop=True, inplace=True)

        for rt_col in self.column_mapping['rt']:
            if rt_col not in df.columns: continue
            min_rt = df[rt_col].min()
            df['rt_norm'] = (
                df[rt_col] - min_rt
            )/(df[rt_col].max() - min_rt)
            break
        return df

class DiannReader(SpectronautReader):
    def __init__(self,
        *,
        column_mapping:dict = None,
        modification_mapping:dict = None,
        fdr = 0.01,
        keep_decoy = False,
        mod_sep = '()',
        underscore_for_ncterm=False,
        fixed_C57 = False,
        csv_sep = '\t',
        **kwargs,
    ):
        super().__init__(
            column_mapping=column_mapping,
            modification_mapping=modification_mapping,
            fdr=fdr, keep_decoy=keep_decoy,
            mod_sep=mod_sep,
            underscore_for_ncterm=underscore_for_ncterm,
            fixed_C57=fixed_C57,
            csv_sep=csv_sep,
        )
        self.mod_seq_column = 'Modified.Sequence'

    def _init_column_mapping(self):
        self.column_mapping = {
            'raw_name': 'Run',
            'sequence': 'Stripped.Sequence',
            'charge': 'Precursor.Charge',
            'rt': ['RT','iRT','Tr_recalibrated','RetentionTime'],
            'rt_norm':'rt_norm',
            'ccs': 'CCS',
            'precursor_mz': 'PrecursorMz',
            'mobility': ['IM','IonMobility'],
            'proteins': 'Protein.Names',
            'uniprot_ids': 'Protein.Ids',
            'genes': 'Genes',
            'spec_idx': 'MS2.Scan',
        }
    
    def _load_file(self, filename):
        df = pd.read_csv(filename, sep=self.csv_sep)

        # for rt_col in self.column_mapping['rt']:
            # if rt_col not in df.columns: continue
            # min_rt = df[rt_col].min()
            # df['rt_norm'] = (
            #     df[rt_col] - min_rt
            # )/(df[rt_col].max() - min_rt)
            # break
        return df

psm_reader_provider.register_reader(
    'spectronaut', SpectronautReader
)
psm_reader_provider.register_reader(
    'openswath', SpectronautReader
)
psm_reader_provider.register_reader(
    'diann', DiannReader
)

In [None]:
from io import StringIO
tsv = StringIO('''ReferenceRun	PrecursorCharge	Workflow	IntModifiedPeptide	CV	AllowForNormalization	ModifiedPeptide	StrippedPeptide	iRT	IonMobility	iRTSourceSpecific	BGSInferenceId	IsProteotypic	IntLabeledPeptide	LabeledPeptide	PrecursorMz	ReferenceRunQvalue	ReferenceRunMS1Response	FragmentLossType	FragmentNumber	FragmentType	FragmentCharge	FragmentMz	RelativeIntensity	ExcludeFromAssay	Database	ProteinGroups	UniProtIds	Protein Name	ProteinDescription	Organisms	OrganismId	Genes	Protein Existence	Sequence Version	FASTAName
202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_100ug_test_S4-A1_1_25843	2		_ALVAT[+80]PGK_		True	_ALVAT[Phospho (STY)]PGK_	ALVATPGK	-5.032703	0.758	-5.032703	P19338	False	_ALVAT[+80]PGK_	_ALVAT[Phospho (STY)]PGK_	418.717511324722	0	10352	noloss	3	y	1	301.187031733932	53.1991	False	sp	P19338	P19338	NUCL_HUMAN	Nucleolin	Homo sapiens		NCL	1	3	MCT_human_UP000005640_9606
202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_100ug_test_S4-A1_1_25843	2		_ALVAT[+80]PGK_		True	_ALVAT[Phospho (STY)]PGK_	ALVATPGK	-5.032703	0.758	-5.032703	P19338	False	_ALVAT[+80]PGK_	_ALVAT[Phospho (STY)]PGK_	418.717511324722	0	10352	H3PO4	4	y	1	384.224142529733	26.31595	False	sp	P19338	P19338	NUCL_HUMAN	Nucleolin	Homo sapiens		NCL	1	3	MCT_human_UP000005640_9606
202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_100ug_test_S4-A1_1_25843	2		_TLT[+80]PCPLR_		True	_TLT[Phospho (STY)]PC[Carbamidomethyl (C)]PLR_	TLTPCPLR	27.71659	0.818	27.71659	Q5T200	False	_TLT[+80]PPLR_	_TLT[Phospho (STY)]PPLR_	439.230785875227	0.000138389150379226	23117	noloss	3	b	1	396.153027901512	6.3264	False	sp	Q5T200	Q5T200	ZC3HD_HUMAN	Zinc finger CCCH domain-containing protein 13	Homo sapiens		ZC3H13	1	1	MCT_human_UP000005640_9606
202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_100ug_test_S4-A1_1_25843	2		_TLT[+80]PCPLR_		True	_TLT[Phospho (STY)]PC[Carbamidomethyl (C)]PLR_	TLTPCPLR	27.71659	0.818	27.71659	Q5T200	False	_TLT[+80]PPLR_	_TLT[Phospho (STY)]PPLR_	439.230785875227	0.000138389150379226	23117	noloss	3	y	1	385.255780000092	29.70625	False	sp	Q5T200	Q5T200	ZC3HD_HUMAN	Zinc finger CCCH domain-containing protein 13	Homo sapiens		ZC3H13	1	1	MCT_human_UP000005640_9606
202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_library25_S4-C1_1_25867	2		_LFVT[+80]PPEGSSR_		True	_[Acetyl (Protein N-term)]LFVS[Phospho (STY)]PPEGSSR_	LFVSPPEGSSR	38.05031	0.917	38.05031	Q14244;Q14244-6;Q14244-7	False	_LFVT[+80]PPEGSSR_	_LFVT[Phospho (STY)]PPEGSSR_	635.297385373987	0	14164	H3PO4	4	b	1	443.265279065723	12.24525	False	sp	Q14244;Q14244-6;Q14244-7	Q14244;Q14244-6;Q14244-7	MAP7_HUMAN	Ensconsin;Isoform of Q14244, Isoform 6 of Ensconsin;Isoform of Q14244, Isoform 7 of Ensconsin	Homo sapiens		MAP7	1;;	1;;	MCT_human_UP000005640_9606;MCT_human2_UP000005640_9606_additional;MCT_human2_UP000005640_9606_additional
202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_library25_S4-C1_1_25867	2		_LFVT[+80]PPEGSSR_		True	_[Acetyl (Protein N-term)]LFVS[Phospho (STY)]PPEGSSR_	LFVSPPEGSSR	38.05031	0.917	38.05031	Q14244;Q14244-6;Q14244-7	False	_LFVT[+80]PPEGSSR_	_LFVT[Phospho (STY)]PPEGSSR_	635.297385373987	0	14164	noloss	6	y	1	632.299829640042	46.07855	False	sp	Q14244;Q14244-6;Q14244-7	Q14244;Q14244-6;Q14244-7	MAP7_HUMAN	Ensconsin;Isoform of Q14244, Isoform 6 of Ensconsin;Isoform of Q14244, Isoform 7 of Ensconsin	Homo sapiens		MAP7	1;;	1;;	MCT_human_UP000005640_9606;MCT_human2_UP000005640_9606_additional;MCT_human2_UP000005640_9606_additional
202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_library25_S4-C1_1_25867	2		_LFVT[+80]PPEGSSR_		True	_[Acetyl (Protein N-term)]LFVS[Phospho (STY)]PPEGSSR_	LFVSPPEGSSR	38.05031	0.917	38.05031	Q14244;Q14244-6;Q14244-7	False	_LFVT[+80]PPEGSSR_	_LFVT[Phospho (STY)]PPEGSSR_	635.297385373987	0	14164	noloss	7	y	1	729.352593488892	100	False	sp	Q14244;Q14244-6;Q14244-7	Q14244;Q14244-6;Q14244-7	MAP7_HUMAN	Ensconsin;Isoform of Q14244, Isoform 6 of Ensconsin;Isoform of Q14244, Isoform 7 of Ensconsin	Homo sapiens		MAP7	1;;	1;;	MCT_human_UP000005640_9606;MCT_human2_UP000005640_9606_additional;MCT_human2_UP000005640_9606_additional
''')


spn_reader = psm_reader_provider.get_reader('spectronaut')
spn_reader.import_file(tsv)
spn_reader.psm_df

Unnamed: 0,sequence,charge,rt,rt_norm,mobility,proteins,uniprot_ids,genes,nAA,mods,mod_sites,precursor_mz,ccs
0,ALVATPGK,2,-5.032703,0.0,0.758,NUCL_HUMAN,P19338,NCL,8,Phospho@T,5,418.717512,308.612143
1,TLTPCPLR,2,27.71659,0.760144,0.818,ZC3HD_HUMAN,Q5T200,ZC3H13,8,Phospho@T;Carbamidomethyl@C,3;5,519.24611,331.995851
2,LFVSPPEGSSR,2,38.05031,1.0,0.917,MAP7_HUMAN,Q14244;Q14244-6;Q14244-7,MAP7,11,Acetyl@Protein N-term;Phospho@S,0;4,649.294843,371.196438


In [None]:
#hide
assert len(spn_reader.psm_df) == 3
assert spn_reader.psm_df.mods.values[0] == 'Phospho@T'
assert spn_reader.psm_df.mod_sites.values[0] == '5'
assert spn_reader.psm_df.mods.values[1] == 'Phospho@T;Carbamidomethyl@C'
assert spn_reader.psm_df.mod_sites.values[1] == '3;5'
assert spn_reader.psm_df.mods.values[2] == 'Acetyl@Protein N-term;Phospho@S'
assert spn_reader.psm_df.mod_sites.values[2] == '0;4'

In [None]:
from io import StringIO
tsv = StringIO('''PrecursorMz	ProductMz	Tr_recalibrated	transition_name	CE	LibraryIntensity	transition_group_id	decoy	PeptideSequence	ProteinName	Annotation	FullUniModPeptideName	PrecursorCharge	GroupLabel	UniprotID	FragmentType	FragmentCharge	FragmentSeriesNumber
685.732240417	886.020494795	59.0	255_AAAAAAAAAASGAAIPPLIPPRR_3	-1	5257.9	13_AAAAAAAAAASGAAIPPLIPPRR_3	0	AAAAAAAAAASGAAIPPLIPPRR	1/O14654	y19^2/0.002	AAAAAAAAAASGAAIPPLIPPRR	3	light	1/O14654	y	2	19
514.550999438	473.303261576	59.2	268_AAAAAAAAAASGAAIPPLIPPRR_4	-1	10000.0	14_AAAAAAAAAASGAAIPPLIPPRR_4	0	AAAAAAAAAASGAAIPPLIPPRR	1/O14654	y8^2/0.002	AAAAAAAAAASGAAIPPLIPPRR	4	light	1/O14654	y	2	8
514.550999438	629.39313922	59.2	276_AAAAAAAAAASGAAIPPLIPPRR_4	-1	5923.1	14_AAAAAAAAAASGAAIPPLIPPRR_4	0	AAAAAAAAAASGAAIPPLIPPRR	1/O14654	y12^2/0.001	AAAAAAAAAASGAAIPPLIPPRR	4	light	1/O14654	y	2	12
514.550999438	672.909153425	59.2	279_AAAAAAAAAASGAAIPPLIPPRR_4	-1	5249.8	14_AAAAAAAAAASGAAIPPLIPPRR_4	0	AAAAAAAAAASGAAIPPLIPPRR	1/O14654	y13^2/0.001	AAAAAAAAAASGAAIPPLIPPRR	4	light	1/O14654	y	2	13
514.550999438	356.19284545	59.2	262_AAAAAAAAAASGAAIPPLIPPRR_4	-1	5233.6	14_AAAAAAAAAASGAAIPPLIPPRR_4	0	AAAAAAAAAASGAAIPPLIPPRR	1/O14654	b5/0.001,b10^2/0.001,m6:10/0.001	AAAAAAAAAASGAAIPPLIPPRR	4	light	1/O14654	b	1	5
514.550999438	498.26707303	59.2	269_AAAAAAAAAASGAAIPPLIPPRR_4	-1	4976.0	14_AAAAAAAAAASGAAIPPLIPPRR_4	0	AAAAAAAAAASGAAIPPLIPPRR	1/O14654	b7/0.001,m4:10/0.001	AAAAAAAAAASGAAIPPLIPPRR	4	light	1/O14654	b	1	7
514.550999438	427.22995924	59.2	265_AAAAAAAAAASGAAIPPLIPPRR_4	-1	4859.4	14_AAAAAAAAAASGAAIPPLIPPRR_4	0	AAAAAAAAAASGAAIPPLIPPRR	1/O14654	b6/0.002,m5:10/0.002	AAAAAAAAAASGAAIPPLIPPRR	4	light	1/O14654	b	1	6
728.201724416	356.19284545	101.8	292_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5	-1	10000.0	15_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5	0	AAAAAAAAAASGAAIPPLIPPRRVITLYQCFSVSQR	1/O14654	b5/0.003,b10^2/0.003,m6:10/0.003	AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR	5	light	1/O14654	b	1	5
728.201724416	576.310000482	101.8	297_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5	-1	7611.0	15_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5	0	AAAAAAAAAASGAAIPPLIPPRRVITLYQCFSVSQR	1/O14654	y5/0.002	AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR	5	light	1/O14654	y	1	5
728.201724416	427.22995924	101.8	293_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5	-1	6805.1	15_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5	0	AAAAAAAAAASGAAIPPLIPPRRVITLYQCFSVSQR	1/O14654	b6/-0.002,m5:10/-0.002	AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR	5	light	1/O14654	b	1	6
728.201724416	569.30418682	101.8	296_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5	-1	6312.7	15_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5	0	AAAAAAAAAASGAAIPPLIPPRRVITLYQCFSVSQR	1/O14654	b8/0.009,m3:10/0.009	AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR	5	light	1/O14654	b	1	8
''')


osw_reader = psm_reader_provider.get_reader('openswath')
osw_reader.import_file(tsv)
osw_reader.psm_df

Unnamed: 0,sequence,charge,rt,rt_norm,nAA,mods,mod_sites
0,AAAAAAAAAASGAAIPPLIPPRR,3,59.0,0.0,23,,
1,AAAAAAAAAASGAAIPPLIPPRR,4,59.2,0.004673,23,,
2,AAAAAAAAAASGAAIPPLIPPRRVITLYQCFSVSQR,5,101.8,1.0,36,,


In [None]:
from io import StringIO
tsv = StringIO('''File.Name	Run	Protein.Group	Protein.Ids	Protein.Names	Genes	PG.Quantity	PG.Normalised	PG.MaxLFQ	Genes.Quantity	Genes.Normalised	Genes.MaxLFQ	Genes.MaxLFQ.Unique	Modified.Sequence	Stripped.Sequence	Precursor.Id	Precursor.Charge	Q.Value	Global.Q.Value	Protein.Q.Value	PG.Q.Value	Global.PG.Q.Value	GG.Q.Value	Translated.Q.Value	Proteotypic	Precursor.Quantity	Precursor.Normalised	Precursor.Translated	Quantity.Quality	RT	RT.Start	RT.Stop	iRT	Predicted.RT	Predicted.iRT	Lib.Q.Value	Ms1.Profile.Corr	Ms1.Area	Evidence	Spectrum.Similarity	Mass.Evidence	CScore	Decoy.Evidence	Decoy.CScore	Fragment.Quant.Raw	Fragment.Quant.Corrected	Fragment.Correlations	MS2.Scan	IM	iIM	Predicted.IM	Predicted.iIM
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636.d	20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636	Q9UH36	Q9UH36		SRRD	3296.49	3428.89	3428.89	3296.49	3428.89	3428.89	3428.89	(UniMod:1)AAAAAAALESWQAAAPR	AAAAAAALESWQAAAPR	(UniMod:1)AAAAAAALESWQAAAPR2	2	3.99074e-05	1.96448e-05	0.000159821	0.000159821	0.000146135	0.000161212	0	1	3296.49	3428.89	3296.49	0.852479	19.9208	19.8731	19.9685	123.9	19.8266	128.292	0	0.960106	5308.05	1.96902	0.683134	0.362287	0.999997	1.23691	3.43242e-05	1212.01;2178.03;1390.01;1020.01;714.008;778.008;	1212.01;1351.73;887.591;432.92;216.728;732.751;	0.956668;0.757581;0.670497;0.592489;0.47072;0.855203;	30053	1.19708	1.19328	1.19453	1.19469
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642.d	20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642	Q9UH36	Q9UH36		SRRD	2365	2334.05	2334.05	2365	2334.05	2334.05	2334.05	(UniMod:1)AAAAAAALESWQAAAPR	AAAAAAALESWQAAAPR	(UniMod:1)AAAAAAALESWQAAAPR2	2	0.000184434	1.96448e-05	0.000596659	0.000596659	0.000146135	0.000604961	0	1	2365	2334.05	2365	0.922581	19.905	19.8573	19.9527	123.9	19.782	128.535	0	0.940191	4594.04	1.31068	0.758988	0	0.995505	0.28633	2.12584e-06	1209.02;1210.02;1414.02;1051.01;236.003;130.002;	1209.02;1109.89;732.154;735.384;0;46.0967;	0.919244;0.937624;0.436748;0.639369;0.296736;0.647924;	30029	1.195	1.19328	1.19381	1.19339
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648.d	20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648	Q9UH36	Q9UH36		SRRD	1664.51	1635.46	1635.47	1664.51	1635.46	1635.47	1635.47	(UniMod:1)AAAAAAALESWQAAAPR	AAAAAAALESWQAAAPR	(UniMod:1)AAAAAAALESWQAAAPR2	2	0.000185123	1.96448e-05	0.000307409	0.000307409	0.000146135	0.000311332	0	1	1664.51	1635.46	1664.51	0.811147	19.8893	19.8416	19.937	123.9	19.7567	128.896	0	0.458773	6614.06	1.7503	0.491071	0.00111683	0.997286	1.92753	2.80543e-05	744.01;1708.02;1630.02;1475.02;0;533.006;	322.907;808.594;577.15;536.033;0;533.006;	0.760181;0.764072;0.542005;0.415779;0;0.913438;	30005	1.19409	1.19328	1.19323	1.19308
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B8_1_22654.d	20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B8_1_22654	Q9UH36	Q9UH36		SRRD	3369.91	3343.38	3343.38	3369.91	3343.38	3343.38	3343.38	(UniMod:1)AAAAAAALESWQAAAPR	AAAAAAALESWQAAAPR	(UniMod:1)AAAAAAALESWQAAAPR2	2	0.000153377	1.96448e-05	0.000298151	0.000298151	0.000146135	0.000302297	0	1	3369.91	3343.38	3369.91	0.798104	19.872	19.8243	19.9196	123.9	19.7347	128.576	0	0.892774	5026.05	1.01465	0.704953	0	0.996593	0.476378	7.76581e-06	1654.02;1286.02;1894.02;993.011;778.008;1190.01;	1638.42;1286.02;1293.97;466.705;292.465;445.475;	0.854908;0.860012;0.708773;0.584142;0.716032;0.410465;	29981	1.19136	1.19328	1.19169	1.1919
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636.d	20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636	P51608-2	P51608-2		MECP2	1585.53	1649.21	7673.34	1585.53	1649.21	7673.34	7673.34	AAAAAAAPSGGGGGGEEERLEEK	AAAAAAAPSGGGGGGEEERLEEK	(UniMod:1)AAAAAAAPSGGGGGGEEERLEEK3	3	0.00014185	5.69066e-05	0.000159821	0.000159821	0.000146135	0.000161212	0	1	861.509	896.11	861.509	0.530747	7.34894	7.30125	7.39665	14.6582	7.35635	14.5905	0	0.85024	2010.02	1.27844	0.470734	0.117495	0.998266	0.848349	0.0613877	905.008;1021.01;1179.01;419.003;753.005;638.006;	422.004;0;439.505;419.003;325.502;119.251;	0.626628;-0.441015;0.438684;0.800641;0.794403;0.458813;	11077	1.01	1.01225	1.01075	1.01099
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642.d	20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642	P51608-2	P51608-2		MECP2	16870.2	16649.4	7963.26	16870.2	16649.4	7963.26	7963.26	AAAAAAAPSGGGGGGEEERLEEK	AAAAAAAPSGGGGGGEEERLEEK	(UniMod:1)AAAAAAAPSGGGGGGEEERLEEK3	3	0.000200767	5.69066e-05	0.000152765	0.000152765	0.000146135	0.000154631	0	1	1832.12	1808.14	1832.12	0.438822	7.3176	7.26989	7.36527	14.6582	7.29706	14.8141	0	0.814702	3622.03	0.571375	0.278903	0.971334	0.994097	0.779006	0.0590956	1430.01;946.009;804.008;260.002;710.006;232.002;	886.115;946.009;0;0;431.102;0;	-0.0551893;0.901557;0.0979035;-0.326533;0.334642;-0.120337;	11029	1.01417	1.01225	1.01136	1.01447
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648.d	20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648	P51608-2	P51608-2		MECP2	16242.7	15959.3	13129.8	16242.7	15959.3	13129.8	13129.8	AAAAAAAPSGGGGGGEEERLEEK	AAAAAAAPSGGGGGGEEERLEEK	(UniMod:1)AAAAAAAPSGGGGGGEEERLEEK3	3	6.98405e-05	5.69066e-05	0.000168492	0.000168492	0.000146135	0.000169578	0	1	2765.24	2716.99	2765.24	0.619188	7.28562	7.23794	7.33338	14.6582	7.22243	15.2233	0	0.859167	2180.02	1.50997	0.43755	0.191245	0.999939	0.420023	0.00548723	1807.01;1018.01;1230.01;554.005;1216.01;276.002;	954.327;1006.89;804.021;554.005;0;143.651;	0.483734;0.907585;0.418797;0.61368;-0.523993;0.70046;	10981	1.015	1.01225	1.01115	1.01558
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B8_1_22654.d	20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B8_1_22654	P51608-2	P51608-2		MECP2	20914.7	20750	16106	20914.7	20750	16106	16106	AAAAAAAPSGGGGGGEEERLEEK	AAAAAAAPSGGGGGGEEERLEEK	(UniMod:1)AAAAAAAPSGGGGGGEEERLEEK3	3	0.000603914	5.69066e-05	0.000153516	0.000153516	0.000146135	0.000155304	0	1	4556.58	4520.72	4556.58	0.620251	7.26825	7.22055	7.31601	14.6582	7.18207	15.448	0	0.79218	3193.03	0.113147	0.344593	0.144439	0.971834	1.05178	0.112962	2855.02;1760.01;830.007;116.001;564.005;751.006;	2266.46;1760.01;530.111;0;0;317.258;	0.430169;0.867218;0.612985;-0.310664;-0.386197;0.286451;	10957	1.01208	1.01225	1.00965	1.01449
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636.d	20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636	Q96P70	Q96P70		IPO9	155722	161976	160062	155722	161976	160062	160062	(UniMod:1)AAAAAAGAASGLPGPVAQGLK	AAAAAAGAASGLPGPVAQGLK	(UniMod:1)AAAAAAGAASGLPGPVAQGLK2	2	3.99074e-05	1.96448e-05	0.000159821	0.000159821	0.000146135	0.000161212	0	1	44791.6	46590.6	44791.6	0.903543	14.709	14.6614	14.7567	77.7384	14.7374	77.4968	0	0.962703	395646	1.16168	0.790083	0	0.999999	0.491003	0.00391533	17738.2;18223.2;16218.2;17747.2;14484.2;12569.2;	17738.2;18223.2;16150;16115.2;14484.2;12569.2;	0.879361;0.89314;0.807683;0.73629;0.863152;0.984215;	22187	1.225	1.23344	1.23458	1.22263
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642.d	20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642	Q96P70	Q96P70		IPO9	172360	170104	155889	172360	170104	155889	155889	(UniMod:1)AAAAAAGAASGLPGPVAQGLK	AAAAAAGAASGLPGPVAQGLK	(UniMod:1)AAAAAAGAASGLPGPVAQGLK2	2	4.98766e-05	1.96448e-05	0.000152765	0.000152765	0.000146135	0.000154631	0	1	44700.5	44115.4	44700.5	0.615196	14.6456	14.5979	14.6933	77.7384	14.6419	77.8394	0	0.9107	553292	0.747052	0.814842	0	0.999996	0.830219	0.0331339	19349.2;20154.2;20586.2;20040.3;13620.2;12105.1;	19349.2;19014.1;19329.7;18862.6;13404.2;11947.1;	0.862894;0.42962;0.293099;0.655948;0.517625;0.323501;	22091	1.22042	1.23344	1.23392	1.21891
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648.d	20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648	Q96P70	Q96P70		IPO9	153712	151030	152845	153712	151030	152845	152845	(UniMod:1)AAAAAAGAASGLPGPVAQGLK	AAAAAAGAASGLPGPVAQGLK	(UniMod:1)AAAAAAGAASGLPGPVAQGLK2	2	6.08421e-05	1.96448e-05	0.000168492	0.000168492	0.000146135	0.000169578	0	1	40439.4	39733.8	40439.4	0.833327	14.6301	14.5824	14.6777	77.7384	14.5747	78.2527	0	0.989019	1.0768e+06	1.68843	0.759575	0	0.999999	0.674737	0.0631384	17597.2;19526.2;16647.2;16204.2;12871.2;11069.2;	17377.7;19526.2;16647.2;16204.2;12871.2;10190.5;	0.828262;0.979726;0.827511;0.9628;0.90466;0.751867;	22067	1.22708	1.23344	1.23332	1.2261
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B8_1_22654.d	20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B8_1_22654	Q96P70	Q96P70		IPO9	147008	145850	159209	147008	145850	159209	159209	(UniMod:1)AAAAAAGAASGLPGPVAQGLK	AAAAAAGAASGLPGPVAQGLK	(UniMod:1)AAAAAAGAASGLPGPVAQGLK2	2	4.42595e-05	1.96448e-05	0.000153516	0.000153516	0.000146135	0.000155304	0	1	41861.6	41532.1	41861.6	0.752921	14.549	14.5013	14.5967	77.7384	14.5155	78.0748	0	0.877873	939172	1.59762	0.821027	0	0.999997	0.397952	1.22212e-05	18143.2;19574.2;17444.2;17956.2;11427.2;13025.2;	17409.2;18432.7;16706.7;10820.4;11427.2;13025.2;	0.625292;0.399297;0.717726;0.321047;0.864595;0.825535;	21947	1.2275	1.23344	1.23199	1.2281
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636.d	20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636	P28482	P28482		MAPK1	72652.7	75570.7	78604.9	72652.7	75570.7	78604.9	78604.9	(UniMod:1)AAAAAAGAGPEM(UniMod:35)VR	AAAAAAGAGPEMVR	(UniMod:1)AAAAAAGAGPEM(UniMod:35)VR2	2	0.00693727	0.000834654	0.000159821	0.000159821	0.000146135	0.000161212	0	1	192.198	199.917	192.198	0.0197094	7.4249	7.37721	7.4726	15.9025	7.49813	15.2482	0	0.754191	8842.06	1.19725	0.470393	0.086229	0.843331	2.80548	0.384248	897.008;1032.01;279.002;377.003;440.004;286.003;	0;137.786;54.4117;0;89.692;286.003;	-0.20379;-0.0679789;0.241761;-0.388501;-0.0459688;0.973644;	11191	1.01917	1.0262	1.02479	1.02031
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642.d	20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642	P28482	P28482		MAPK1	69911.3	68996.2	63388.2	69911.3	68996.2	63388.2	63388.2	(UniMod:1)AAAAAAGAGPEM(UniMod:35)VR	AAAAAAGAGPEMVR	(UniMod:1)AAAAAAGAGPEM(UniMod:35)VR2	2	0.00122498	0.000834654	0.000152765	0.000152765	0.000146135	0.000154631	0	1	1572.67	1552.08	1572.67	0.906427	7.45711	7.40943	7.50482	15.9025	7.43922	16.0749	0	0.371998	5937.05	0.30888	0.510876	0.72688	0.95182	1.96259	0.65474	1320.01;838.009;638.006;827.009;562.005;339.003;	1320.01;252.656;0;213.073;330.325;0;	0.976001;0.542934;0.346963;0.38014;0.442774;-0.259898;	11239	1.01773	1.0262	1.02509	1.01834
''')
diann_reader = psm_reader_provider.get_reader('diann')
diann_reader.import_file(tsv)
diann_reader.psm_df

Unnamed: 0,raw_name,sequence,charge,rt,mobility,proteins,uniprot_ids,genes,spec_idx,nAA,mods,mod_sites,rt_norm,precursor_mz,ccs
0,20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_sp...,AAAAAAALESWQAAAPR,2,19.9208,1.19708,,Q9UH36,SRRD,30053,17,Acetyl@Protein N-term,0,1.0,834.428635,483.435307
1,20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_sp...,AAAAAAALESWQAAAPR,2,19.905,1.195,,Q9UH36,SRRD,30029,17,Acetyl@Protein N-term,0,0.999207,834.428635,482.595308
2,20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_sp...,AAAAAAALESWQAAAPR,2,19.8893,1.19409,,Q9UH36,SRRD,30005,17,Acetyl@Protein N-term,0,0.998419,834.428635,482.227809
3,20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_sp...,AAAAAAALESWQAAAPR,2,19.872,1.19136,,Q9UH36,SRRD,29981,17,Acetyl@Protein N-term,0,0.99755,834.428635,481.125311
4,20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_sp...,AAAAAAAPSGGGGGGEEERLEEK,3,7.34894,1.01,,P51608-2,MECP2,11077,23,,,0.368908,695.66629,610.81364
5,20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_sp...,AAAAAAAPSGGGGGGEEERLEEK,3,7.3176,1.01417,,P51608-2,MECP2,11029,23,,,0.367335,695.66629,613.335514
6,20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_sp...,AAAAAAAPSGGGGGGEEERLEEK,3,7.28562,1.015,,P51608-2,MECP2,10981,23,,,0.365729,695.66629,613.83747
7,20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_sp...,AAAAAAAPSGGGGGGEEERLEEK,3,7.26825,1.01208,,P51608-2,MECP2,10957,23,,,0.364857,695.66629,612.071553
8,20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_sp...,AAAAAAGAASGLPGPVAQGLK,2,14.709,1.225,,Q96P70,IPO9,22187,21,Acetyl@Protein N-term,0,0.738374,895.9916,494.430146
9,20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_sp...,AAAAAAGAASGLPGPVAQGLK,2,14.6456,1.22042,,Q96P70,IPO9,22091,21,Acetyl@Protein N-term,0,0.735191,895.9916,492.581583


In [None]:
assert 'ccs' in diann_reader.psm_df.columns
assert len(diann_reader.psm_df) == 14
assert np.sum(diann_reader.psm_df.mods.str.contains('Acetyl@Protein N-term')) == 10
assert np.sum(~diann_reader.psm_df.mods.str.contains('Acetyl@Protein N-term')) == 4
assert np.sum(diann_reader.psm_df.mods.str.contains('Oxidation@M')) == 2