# MaxQuantOutput

In [1]:
from pathlib import Path
import ipywidgets as w

import vaep.io as io
import vaep.io.mq as mq

from config import FOLDER_MQ_TXT_DATA

folders = io.search_subfolders(path=FOLDER_MQ_TXT_DATA, depth=1, exclude_root=True)
w_folder = w.Dropdown(options=folders, description='Select a folder')
w_folder

Dropdown(description='Select a folder', options=(WindowsPath('data/mq_out/20190611_QX3_LiSc_MA_Hela_500ng_LC15…

## MaxQuantOutput class

Instead of handling the files manually in a MQ folder, e.g. like

In [2]:
all_files = io.search_files(path=w_folder.value, query='')
all_files

PathsList(files=['.ipynb_checkpoints', 'allPeptides.txt', 'evidence.txt', 'matchedFeatures.txt', 'modificationSpecificPeptides.txt', 'ms3Scans.txt', 'msms.txt', 'msmsScans.txt', 'mzRange.txt', 'parameters.txt', 'peptides.txt', 'proteinGroups.txt', 'summary.txt', 'tables.pdf'], folder=WindowsPath('data/mq_out/20190611_QX3_LiSc_MA_Hela_500ng_LC15'))

files can just be accessed using the `MaxQuantOutput` class.

In [3]:
mq_output = mq.MaxQuantOutput(w_folder.value)
mq_output

MaxQuantOutput(WindowsPath('data/mq_out/20190611_QX3_LiSc_MA_Hela_500ng_LC15'))

This lists the files in the current folder for you (calling `search_files`):

In [4]:
mq_output.files

['allPeptides.txt',
 'evidence.txt',
 'matchedFeatures.txt',
 'modificationSpecificPeptides.txt',
 'ms3Scans.txt',
 'msms.txt',
 'msmsScans.txt',
 'mzRange.txt',
 'parameters.txt',
 'peptides.txt',
 'proteinGroups.txt',
 'summary.txt']

And extends the class attributes on intialization by the expected files (statically):

In [5]:
mq_output._inital_attritubutes

['NAME_FILE_MAP',
 'dump_intensity',
 'find_attribute',
 'get_files',
 'get_list_of_attributes',
 'load',
 'register_file']

In [6]:
mq_output.get_list_of_attributes()

['NAME_FILE_MAP',
 'OxidationSites',
 '_inital_attritubutes',
 'allPeptides',
 'dump_intensity',
 'evidence',
 'files',
 'find_attribute',
 'folder',
 'get_files',
 'get_list_of_attributes',
 'load',
 'matchedFeatures',
 'modificationSpecificPeptides',
 'ms3Scans',
 'msms',
 'msmsScans',
 'mzRange',
 'parameters',
 'paths',
 'peptides',
 'proteinGroups',
 'register_file',
 'summary']

In [7]:
# not able to delete yet. __getitem__ better alternative?
# lookup
# del mq_output.OxidationMSites

In [8]:
{Path(x).stem: x for x in mq_output.files}

{'allPeptides': 'allPeptides.txt',
 'evidence': 'evidence.txt',
 'matchedFeatures': 'matchedFeatures.txt',
 'modificationSpecificPeptides': 'modificationSpecificPeptides.txt',
 'ms3Scans': 'ms3Scans.txt',
 'msms': 'msms.txt',
 'msmsScans': 'msmsScans.txt',
 'mzRange': 'mzRange.txt',
 'parameters': 'parameters.txt',
 'peptides': 'peptides.txt',
 'proteinGroups': 'proteinGroups.txt',
 'summary': 'summary.txt'}

In [9]:
# mq_output.evidence(mq_output)
mq_output.peptides

Unnamed: 0_level_0,N-term cleavage window,C-term cleavage window,Amino acid before,First amino acid,Second amino acid,Second last amino acid,Last amino acid,Amino acid after,A Count,R Count,...,Potential contaminant,id,Protein group IDs,Mod. peptide IDs,Evidence IDs,MS/MS IDs,Best MS/MS,Oxidation (M) site IDs,Taxonomy IDs,MS/MS Count
Sequence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAAAAAAAAAAATGTEAGPGTAGGSENGSEVAAQPAGLSGPAEVGPGAVGER,______________________________,LSGPAEVGPGAVGERTPRKKEPPRASPPGG,K,A,A,E,R,T,19,1,...,,0,1774,0,0,0,0.0,,9606,1
AAAAAAAAAVSR,TTSSRVLRGGRDRGRAAAAAAAAAVSRRRK,RGRAAAAAAAAAVSRRRKAEYPRRRRSSPS,R,A,A,S,R,R,9,1,...,,1,231,1,1,1,1.0,,9606,1
AAAAAAAGDSDSWDADAFSVEDPVRK,______________________________,SWDADAFSVEDPVRKVGGGGTAGGDRWEGE,M,A,A,R,K,V,9,1,...,,2,1877,2,2,2,2.0,,9606,1
AAAAAAALQAK,TILRQARNHKLRVDKAAAAAAALQAKSDEK,RVDKAAAAAAALQAKSDEKAAVAGKKPVVG,K,A,A,A,K,S,8,0,...,,3,2461,3,3,3;4,4.0,,9606,2
AAAAAAGAASGLPGPVAQGLK,______________________________,GAASGLPGPVAQGLKEALVDTLTGILSPVQ,M,A,A,L,K,E,9,0,...,,4,4162,4,4;5,5;6,6.0,,9606,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YYTSASGDEMVSLK,HEDSQNRKKLSELLRYYTSASGDEMVSLKD,RYYTSASGDEMVSLKDYCTRMKENQKHIYY,R,Y,Y,L,K,D,1,0,...,,38783,2077,40966;40967,49202;49203;49204;49205;49206;49207;49208;49209,54670;54671;54672;54673;54674;54675;54676;5467...,54679.0,1311,9606,10
YYTVFDRDNNR,PSGPLWILGDVFIGRYYTVFDRDNNRVGFA,FIGRYYTVFDRDNNRVGFAEAARL______,R,Y,Y,N,R,V,0,2,...,,38784,379,40968,49210,54680,54680.0,,9606,1
YYVLNALK,GQPVKVRVSYQKLLKYYVLNALKHRPPKAQ,SYQKLLKYYVLNALKHRPPKAQKKRYLFRS,K,Y,Y,L,K,H,1,0,...,,38785,3521,40969,49211,54681,54681.0,,9606,1
YYVTIIDAPGHR,GITIDISLWKFETSKYYVTIIDAPGHRDFI,TSKYYVTIIDAPGHRDFIKNMITGTSQADC,K,Y,Y,H,R,D,1,1,...,,38786,2873,40970,49212;49213;49214,54682;54683;54684;54685;54686,54683.0,,9606,5


### Dynamic Attribute lookup

try to use `__getattr__`, maybe `__setattr__`?

This version offers less inspection possibilities as the attributes are only set when they are looked up.

In [10]:
mq_output = mq.MaxQuantOutputDynamic(w_folder.value)
mq_output

MaxQuantOutputDynamic(WindowsPath('data/mq_out/20190611_QX3_LiSc_MA_Hela_500ng_LC15'))

In [11]:
mq_output.file_keys

['allPeptides',
 'evidence',
 'matchedFeatures',
 'modificationSpecificPeptides',
 'ms3Scans',
 'msms',
 'msmsScans',
 'mzRange',
 'parameters',
 'peptides',
 'proteinGroups',
 'summary']

In [12]:
mq_output.peptides

Unnamed: 0_level_0,N-term cleavage window,C-term cleavage window,Amino acid before,First amino acid,Second amino acid,Second last amino acid,Last amino acid,Amino acid after,A Count,R Count,...,Potential contaminant,id,Protein group IDs,Mod. peptide IDs,Evidence IDs,MS/MS IDs,Best MS/MS,Oxidation (M) site IDs,Taxonomy IDs,MS/MS Count
Sequence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAAAAAAAAAAATGTEAGPGTAGGSENGSEVAAQPAGLSGPAEVGPGAVGER,______________________________,LSGPAEVGPGAVGERTPRKKEPPRASPPGG,K,A,A,E,R,T,19,1,...,,0,1774,0,0,0,0.0,,9606,1
AAAAAAAAAVSR,TTSSRVLRGGRDRGRAAAAAAAAAVSRRRK,RGRAAAAAAAAAVSRRRKAEYPRRRRSSPS,R,A,A,S,R,R,9,1,...,,1,231,1,1,1,1.0,,9606,1
AAAAAAAGDSDSWDADAFSVEDPVRK,______________________________,SWDADAFSVEDPVRKVGGGGTAGGDRWEGE,M,A,A,R,K,V,9,1,...,,2,1877,2,2,2,2.0,,9606,1
AAAAAAALQAK,TILRQARNHKLRVDKAAAAAAALQAKSDEK,RVDKAAAAAAALQAKSDEKAAVAGKKPVVG,K,A,A,A,K,S,8,0,...,,3,2461,3,3,3;4,4.0,,9606,2
AAAAAAGAASGLPGPVAQGLK,______________________________,GAASGLPGPVAQGLKEALVDTLTGILSPVQ,M,A,A,L,K,E,9,0,...,,4,4162,4,4;5,5;6,6.0,,9606,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YYTSASGDEMVSLK,HEDSQNRKKLSELLRYYTSASGDEMVSLKD,RYYTSASGDEMVSLKDYCTRMKENQKHIYY,R,Y,Y,L,K,D,1,0,...,,38783,2077,40966;40967,49202;49203;49204;49205;49206;49207;49208;49209,54670;54671;54672;54673;54674;54675;54676;5467...,54679.0,1311,9606,10
YYTVFDRDNNR,PSGPLWILGDVFIGRYYTVFDRDNNRVGFA,FIGRYYTVFDRDNNRVGFAEAARL______,R,Y,Y,N,R,V,0,2,...,,38784,379,40968,49210,54680,54680.0,,9606,1
YYVLNALK,GQPVKVRVSYQKLLKYYVLNALKHRPPKAQ,SYQKLLKYYVLNALKHRPPKAQKKRYLFRS,K,Y,Y,L,K,H,1,0,...,,38785,3521,40969,49211,54681,54681.0,,9606,1
YYVTIIDAPGHR,GITIDISLWKFETSKYYVTIIDAPGHRDFI,TSKYYVTIIDAPGHRDFIKNMITGTSQADC,K,Y,Y,H,R,D,1,1,...,,38786,2873,40970,49212;49213;49214,54682;54683;54684;54685;54686,54683.0,,9606,5


In [13]:
try:
    mq_output.peptides_
except AttributeError as e:
    print(*e.args)

No such file: peptides_.txt: Choose one of the following:
allPeptides, evidence, matchedFeatures, modificationSpecificPeptides, ms3Scans, msms, msmsScans, mzRange, parameters, peptides, proteinGroups, summary


In [14]:
mq_output.get_list_of_attributes()

['NAME_FILE_MAP',
 'OxidationSites',
 '_inital_attritubutes',
 '_peptides',
 'allPeptides',
 'dump_intensity',
 'evidence',
 'file_keys',
 'files',
 'find_attribute',
 'folder',
 'get_files',
 'get_list_of_attributes',
 'load',
 'matchedFeatures',
 'modificationSpecificPeptides',
 'ms3Scans',
 'msms',
 'msmsScans',
 'mzRange',
 'name_file_map',
 'parameters',
 'paths',
 'peptides',
 'proteinGroups',
 'register_file',
 'summary']

## Files

### evidence.txt

> some columns throw a warning

In [15]:
import pandas as pd
pd.options.display.max_columns = len(mq_output.evidence.columns)
mq_output.evidence.head()

  return cls.find_attribute(f'_{filename}')


Unnamed: 0_level_0,Length,Modifications,Modified sequence,Oxidation (M) Probabilities,Oxidation (M) Score Diffs,Acetyl (Protein N-term),Oxidation (M),Missed cleavages,Proteins,Leading proteins,Leading razor protein,Gene names,Protein names,Type,Raw file,MS/MS m/z,Charge,m/z,Mass,Uncalibrated - Calibrated m/z [ppm],Uncalibrated - Calibrated m/z [Da],Mass error [ppm],Mass error [Da],Uncalibrated mass error [ppm],Uncalibrated mass error [Da],Max intensity m/z 0,Retention time,Retention length,Calibrated retention time,Calibrated retention time start,Calibrated retention time finish,Retention time calibration,Match time difference,Match m/z difference,Match q-value,Match score,Number of data points,Number of scans,Number of isotopic peaks,PIF,Fraction of total spectrum,Base peak fraction,PEP,MS/MS count,MS/MS scan number,Score,Delta score,Combinatorics,Intensity,Reverse,Potential contaminant,id,Protein group IDs,Peptide ID,Mod. peptide ID,MS/MS IDs,Best MS/MS,Oxidation (M) site IDs,Taxonomy IDs
Sequence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1
AAAAAAAAAAAATGTEAGPGTAGGSENGSEVAAQPAGLSGPAEVGPGAVGER,52,Unmodified,_AAAAAAAAAAAATGTEAGPGTAGGSENGSEVAAQPAGLSGPAEVG...,,,0,0,0,R4GMQ1;O60341;O60341-2,R4GMQ1,R4GMQ1,KDM1A,Lysine-specific histone demethylase 1A,MULTI-MSMS,20190611_QX3_LiSc_MA_Hela_500ng_LC15,1101.8,4,1101.3,4401.1,2.4,0.0,0.2,0.0,2.6,0.0,1101.8,85.6,0.3,85.6,85.5,85.8,0.0,,,,,84.0,22.0,5.0,0,0,0,0.0,1,106834,102.7,91.3,1,57739000.0,,,0,1774,0,0,0,0,,9606
AAAAAAAAAVSR,12,Unmodified,_AAAAAAAAAVSR_,,,0,0,0,A0A0A6YYC7;Q96JP5-2;Q96JP5,A0A0A6YYC7,A0A0A6YYC7,ZFP91-CNTF;ZFP91,E3 ubiquitin-protein ligase ZFP91,MULTI-MSMS,20190611_QX3_LiSc_MA_Hela_500ng_LC15,500.8,2,500.8,999.5,1.4,0.0,0.5,0.0,2.0,0.0,500.8,25.6,0.2,25.6,25.4,25.6,0.0,,,,,37.0,16.0,3.0,0,0,0,0.0,1,30184,68.6,46.9,1,87575000.0,,,1,231,1,1,1,1,,9606
AAAAAAAGDSDSWDADAFSVEDPVRK,26,Acetyl (Protein N-term),_(Acetyl (Protein N-term))AAAAAAAGDSDSWDADAFSV...,,,1,0,1,O75822;O75822-3;O75822-2,O75822,O75822,EIF3J,Eukaryotic translation initiation factor 3 sub...,MULTI-MSMS,20190611_QX3_LiSc_MA_Hela_500ng_LC15,879.4,3,879.1,2634.2,2.2,0.0,-0.8,-0.0,1.4,0.0,879.4,95.0,0.6,95.0,94.7,95.3,0.0,,,,,157.0,47.0,5.0,0,0,0,0.0,1,118493,157.9,144.3,1,442780000.0,,,2,1877,2,2,2,2,,9606
AAAAAAALQAK,11,Unmodified,_AAAAAAALQAK_,,,0,0,0,P36578;H3BM89;H3BU31,P36578,P36578,RPL4,60S ribosomal protein L4,MULTI-MSMS,20190611_QX3_LiSc_MA_Hela_500ng_LC15,479.3,2,478.8,955.5,1.9,0.0,-0.3,-0.0,1.6,0.0,478.8,26.7,0.6,26.7,26.5,27.1,0.0,,,,,163.0,46.0,5.0,0,0,0,0.0,2,31655,144.4,106.8,1,3166700000.0,,,3,2461,3,3,3;4,4,,9606
AAAAAAGAASGLPGPVAQGLK,21,Acetyl (Protein N-term),_(Acetyl (Protein N-term))AAAAAAGAASGLPGPVAQGLK_,,,1,0,0,Q96P70,Q96P70,Q96P70,IPO9,Importin-9,MULTI-MSMS,20190611_QX3_LiSc_MA_Hela_500ng_LC15,598.0,3,597.7,1790.0,2.4,0.0,-0.7,-0.0,1.8,0.0,597.7,96.9,0.5,96.9,96.6,97.1,0.0,,,,,85.0,34.0,3.0,0,0,0,0.0,1,120706,46.6,36.8,1,40166000.0,,,4,4162,4,4,5,5,,9606


In [16]:
mixed_dtype_columns = mq_output.evidence.columns[[50, 53, 58]]
mq_output.evidence[mixed_dtype_columns][mixed_dtype_columns[1]]

Sequence
AAAAAAAAAAAATGTEAGPGTAGGSENGSEVAAQPAGLSGPAEVGPGAVGER        0
AAAAAAAAAVSR                                                1
AAAAAAAGDSDSWDADAFSVEDPVRK                                  2
AAAAAAALQAK                                                 3
AAAAAAGAASGLPGPVAQGLK                                       4
                                                        ...  
YYVLNALK                                               38,785
YYVTIIDAPGHR                                           38,786
YYVTIIDAPGHR                                           38,786
YYVTIIDAPGHR                                           38,786
YYYIPQYK                                               38,787
Name: Peptide ID, Length: 49216, dtype: int64