# MaxQuant (MQ) Output-Files

Files compared:
1. `Summary.txt`
2. `mqpar.xml`
3. `peptides.txt`
4. `proteins.txt`

There is are many files more, where several files seem to be available in several times in different formats.

In [None]:
# easiest way to add custom packages on erda.dk
# import sys
# sys.path.append('/home/jovyan/work/vaep/')

In [None]:
import os
from pathlib import Path
import random

import pandas as pd
import ipywidgets as widgets

from vaep.io import search_files, search_subfolders, PathsList, dump_json
from vaep.io.mq import MaxQuantOutputDynamic
from vaep.io.mq import ExtractFromPeptidesTxt
import vaep.io.mq as mq

import src
import src.file_utils as file_io
from src.file_utils import check_for_key
from src.file_utils import process_files
from src.file_utils import load_summary, load_mqpar_xml

##################
##### CONFIG #####
##################
from config import FOLDER_MQ_TXT_DATA, FOLDER_PROCESSED
from config import FOLDER_KEY  # defines how filenames are parsed for use as indices

from config import FOLDER_DATA # project folder for storing the data
print(f"Search Raw-Files on path: {FOLDER_MQ_TXT_DATA}")

In [None]:
import logging
from datetime import datetime

#Delete Jupyter notebook root logger handler
logger = logging.getLogger()
# logger.setLevel(logging.ERROR)
logger.handlers = []

# logger = logging.getLogger(mq_output.folder.stem)
logger = logging.getLogger('vaep')
logger.setLevel(logging.INFO)

c_handler = logging.StreamHandler()
c_handler.setLevel(logging.INFO)

date_log_file = "{:%y%m%d_%H%M}".format(datetime.now())
f_handler = logging.FileHandler(f"log_01_maxquant_file_processing_{date_log_file}.txt")
f_handler.setLevel(logging.INFO)

c_format = logging.Formatter(
    f'%(name)s - %(levelname)-8s %(message)s ')

c_handler.setFormatter(c_format)
f_handler.setFormatter(c_format)

logger.handlers = []  #remove any handler in case you reexecute the cell
logger.addHandler(c_handler)
logger.addHandler(f_handler)

In [None]:
import multiprocessing

setup_multiprocessing = {}

if os.cpu_count() > 8:
    setup_multiprocessing['processes'] = 8
else:
    setup_multiprocessing['processes'] = os.cpu_count() - 1 # leave one cpu for different things
    
def f(x):
    return x*x

with multiprocessing.Pool(**setup_multiprocessing) as p:
    print(p.map(f, range(100)))

In [None]:
folders_mq_txt_data = Path(FOLDER_MQ_TXT_DATA)

In [None]:
folders = [folder for folder in  Path(FOLDER_MQ_TXT_DATA).iterdir()]
w_file = widgets.Dropdown(options=[folder for folder in folders], description='View files')
w_file

In [None]:
mq_output = MaxQuantOutputDynamic(w_file.value)
mq_output

In [None]:
print(f"Results will be saved in subfolders in\n\t{str(FOLDER_PROCESSED.absolute())}"
      "\nusing the name of the specified input-folder per default. Change to your liking.")
# FOLDER_PROCESSED = Path('')

> Go to the block you are interested in!

### Summaries Data

In [None]:
from src.data_objects import MqAllSummaries    
mq_all_summaries = MqAllSummaries()
mq_all_summaries.load_new_samples(folders=folders, workers=8)

In [None]:
if mq_all_summaries.empty_folders:
    print(empty_folders)
    with open('log_empty_folder.txt', 'a') as f:
        f.writelines(empty_folders)
print(f"In total processed: {len(mq_all_summaries):5}")

In [None]:
pd.options.display.max_columns = len(mq_all_summaries.df.columns)

In [None]:
mq_all_summaries.df.info()

In [None]:
# assert df is mq_all_summaries.df , "Object identity changed"# same object

- SIL - MS2 based on precursor which was a set of peaks
- PEAK - MS2 scan based on a single peak on precursor spectrum
- ISO - isotopic pattern detection


In [None]:
class col_summary:
    MS = 'MS'
    MS2 =  'MS/MS Identified'

df = mq_all_summaries.df
if df is not None:
    MS_spectra = df[[col_summary.MS, col_summary.MS2]]
    def compute_summary(threshold_ms2_identified):
        mask  = MS_spectra[col_summary.MS2] > threshold_ms2_identified
        display(MS_spectra.loc[mask].describe())
    
    w_ions_range = widgets.IntSlider(value=0.0, min=.0, max=MS_spectra[col_summary.MS2].max())
    display(widgets.interactive(compute_summary, threshold_ms2_identified=w_ions_range))