In [None]:
#default_exp reader.maxquant_reader

In [None]:
#export
import pandas as pd
from alphadeep.reader.psm_reader import PSMReaderBase

class MaxQuantReader(PSMReaderBase):
    def __init__(self, 
        frag_types=['b','y','b-modloss','y-modloss'], 
        max_frag_charge=2
    ):
        super().__init__(frag_types, max_frag_charge)
        
        self.modification_convert_dict['(Acetyl (Protein N-term))'] = 'Acetyl@Protein N-term'
        self.modification_convert_dict['C(Carbamidomethyl (C))'] = 'Carbamidomethyl@C'
        self.modification_convert_dict['M(Oxidation (M))'] = 'Oxidation@M'
        self.modification_convert_dict['S(Phospho (S))'] = 'Phospho@S'
        self.modification_convert_dict['T(Phospho (T))'] = 'Phospho@T'
        self.modification_convert_dict['Y(Phospho (Y))'] = 'Phospho@Y'
        self.modification_convert_dict['S(Phospho (ST))'] = 'Phospho@S'
        self.modification_convert_dict['T(Phospho (ST))'] = 'Phospho@T'
        self.modification_convert_dict['S(Phospho (STY))'] = 'Phospho@S'
        self.modification_convert_dict['T(Phospho (STY))'] = 'Phospho@T'
        self.modification_convert_dict['Y(Phospho (STY))'] = 'Phospho@Y'
        self.modification_convert_dict['K(GlyGly (K))'] = 'GlyGly@K'
        self.modification_convert_dict['(ac)'] = 'Acetyl@Protein N-term'
        self.modification_convert_dict['M(ox)'] = 'Oxidation@M'
        self.modification_convert_dict['S(ph)'] = 'Phospho@S'
        self.modification_convert_dict['T(ph)'] = 'Phospho@T'
        self.modification_convert_dict['Y(ph)'] = 'Phospho@Y'
        self.modification_convert_dict['K(gl)'] = 'GlyGly@K'

    def _load_file(self, filename):
        df = pd.read_csv(filename, sep='\t')
        df = df[(df['Reverse']!='+')&(~pd.isna(df['Retention time']))]
        psm_df = pd.DataFrame()
        psm_df['peptide'] = df['Sequence']
        psm_df['modinfo'] = df['Modified sequence'].apply(parse_mq)
        psm_df['charge'] = df['Charge']
        psm_df['rt'] = df['Retention time']
        if 'Scan number' in df.columns:
            # msms.txt
            psm_df['scan'] = df['Scan number']
        else:
            # evidence.txt
            psm_df['scan'] = df['MS/MS scan number']
        if 'K0' in df.columns:
            psm_df['mobility'] = 1/df['K0']
        else:
            psm_df['mobility'] = pd.NA
        psm_df['raw_name'] = df['Raw file']
        psm_df['score'] = df['Score']
        psm_df['protein'] = df['Proteins']
        if 'Gene Names' in df.columns:
            psm_df['gene'] = df['Gene Names']
        elif 'Gene names' in df.columns:
            psm_df['gene'] = df['Gene names']
        else:
            psm_df['gene'] = ''
        psm_df = psm_df[psm_df['modinfo'] != '-1']
        psm_df['peptide'] = psm_df['peptide'].apply(check_invalid_aa)
        psm_df = psm_df[psm_df['peptide'].str.len()>0]
        return psm_df
