# MaxQuant (MQ) Output-Files

Files compared:
1. `Summary.txt`
2. `mqpar.xml`
3. `peptides.txt`
4. `proteins.txt`

There is are many files more, where several files seem to be available in several times in different formats.

In [None]:
import os
import sys
import logging
from pathlib import Path
import random

import pandas as pd
import ipywidgets as widgets

sys.path.append('/home/jovyan/work/vaep/')
from vaep.io.mq import MaxQuantOutputDynamic

from src.logging import setup_logger_w_file
from src.data_objects import MqAllSummaries 

##################
##### CONFIG #####
##################
from config import FOLDER_MQ_TXT_DATA, FOLDER_PROCESSED

from config import FOLDER_DATA # project folder for storing the data
print(f"Search Raw-Files on path: {FOLDER_MQ_TXT_DATA}")

##################
### Logging ######
##################

#Delete Jupyter notebook root logger handler
root_logger = logging.getLogger()
root_logger.handlers = []

logger = logging.getLogger('vaep')
logger = setup_logger_w_file(logger, fname_base='log_00_mq_aggregate_summaries')

logger.info('Start with handlers: \n' + "\n".join(f"- {repr(log_)}" for log_ in logger.handlers))

In [None]:
folders = [folder for folder in  Path(FOLDER_MQ_TXT_DATA).iterdir()]
w_file = widgets.Dropdown(options=[folder for folder in folders], description='View files')
w_file

In [None]:
mq_output = MaxQuantOutputDynamic(w_file.value)
mq_output

In [None]:
print(f"Results will be saved in subfolders in\n\t{str(FOLDER_PROCESSED.absolute())}"
      "\nusing the name of the specified input-folder per default. Change to your liking.")
# FOLDER_PROCESSED = Path('')

> Go to the block you are interested in!

### Summaries Data

In [None]:
%%time
mq_all_summaries = MqAllSummaries()
mq_all_summaries.load_new_samples(folders=folders)

In [None]:
if mq_all_summaries.empty_folders:
    print(empty_folders)
    with open('log_empty_folder.txt', 'a') as f:
        f.writelines(empty_folders)
print(f"In total processed: {len(mq_all_summaries):5}")

In [None]:
pd.options.display.max_columns = len(mq_all_summaries.df.columns)

In [None]:
mq_all_summaries.df.info()

In [None]:
# assert df is mq_all_summaries.df , "Object identity changed"# same object

- SIL - MS2 based on precursor which was a set of peaks
- PEAK - MS2 scan based on a single peak on precursor spectrum
- ISO - isotopic pattern detection


In [None]:
class col_summary:
    MS = 'MS'
    MS2 =  'MS/MS Identified'

df = mq_all_summaries.df
if df is not None:
    MS_spectra = df[[col_summary.MS, col_summary.MS2]]
    def compute_summary(threshold_ms2_identified):
        mask  = MS_spectra[col_summary.MS2] > threshold_ms2_identified
        display(MS_spectra.loc[mask].describe())
    
    w_ions_range = widgets.IntSlider(value=10000.0, min=.0, max=MS_spectra[col_summary.MS2].max())
    display(widgets.interactive(compute_summary, threshold_ms2_identified=w_ions_range))