# MaxQuant (MQ) Output-Files

Files compared:
1. `Summary.txt`
2. `mqpar.xml`
3. `peptides.txt`
4. `proteins.txt`

There is are many files more, where several files seem to be available in several times in different formats.

In [None]:
import os
import sys
import logging
from pathlib import Path
import random

import pandas as pd
import ipywidgets as widgets

sys.path.append('/home/jovyan/work/vaep/')
from vaep.io.mq import MaxQuantOutputDynamic
from vaep import io_images

from src.logging import setup_logger_w_file
from src.data_objects import MqAllSummaries 

##################
##### CONFIG #####
##################
import config
from config import FOLDER_MQ_TXT_DATA, FOLDER_PROCESSED

from config import FOLDER_DATA # project folder for storing the data
print(f"Search Raw-Files on path: {FOLDER_MQ_TXT_DATA}")

##################
### Logging ######
##################

#Delete Jupyter notebook root logger handler
root_logger = logging.getLogger()
root_logger.handlers = []

logger = logging.getLogger('vaep')
logger = setup_logger_w_file(logger, fname_base='log_00_mq_aggregate_summaries')

logger.info('Start with handlers: \n' + "\n".join(f"- {repr(log_)}" for log_ in logger.handlers))

In [None]:
folders = [folder for folder in  Path(FOLDER_MQ_TXT_DATA).iterdir() if folder.is_dir()]
w_file = widgets.Dropdown(options=[folder for folder in folders], description='View files')
w_file

In [None]:
mq_output = MaxQuantOutputDynamic(w_file.value)
mq_output

In [None]:
print(f"Results will be saved in subfolders in\n\t{str(FOLDER_PROCESSED.absolute())}"
      "\nusing the name of the specified input-folder per default. Change to your liking.")
# FOLDER_PROCESSED = Path('')

> Go to the block you are interested in!

### Summaries Data

In [None]:
%%time
mq_all_summaries = MqAllSummaries()
mq_all_summaries.load_new_samples(folders=folders)

In [None]:
if mq_all_summaries.empty_folders:
    print(empty_folders)
    with open('log_empty_folder.txt', 'a') as f:
        f.writelines(empty_folders)
print(f"In total processed: {len(mq_all_summaries):5}")

In [None]:
pd.options.display.max_columns = len(mq_all_summaries.df.columns)

In [None]:
mq_all_summaries.df.info()

- SIL - MS2 based on precursor which was a set of peaks
- PEAK - MS2 scan based on a single peak on precursor spectrum
- ISO - isotopic pattern detection


In [None]:
class col_summary:
    MS = 'MS'
    MS2 =  'MS/MS Identified'

df = mq_all_summaries.df
if df is not None:
    MS_spectra = df[[col_summary.MS, col_summary.MS2]]
    def compute_summary(threshold_ms2_identified):
        mask  = MS_spectra[col_summary.MS2] > threshold_ms2_identified
        display(MS_spectra.loc[mask].describe())
    
    w_ions_range = widgets.IntSlider(value=10000.0, min=.0, max=MS_spectra[col_summary.MS2].max())
    display(widgets.interactive(compute_summary, threshold_ms2_identified=w_ions_range))

In [None]:
MS_spectra[col_summary.MS2].mean(), MS_spectra[col_summary.MS2].std() # including folders with 0 identified peptides

In [None]:
def calc_cutoff(threshold=1):
    s = MS_spectra[col_summary.MS2]
    mask = s >= threshold
    s = s.loc[mask]
    display(f"Threshold selected (inclusive): {threshold} ")
    display(f"mean: {s.mean():.2f}, std-dev: {s.std():.2f}")


# calc_cutoff()
display(widgets.interactive(calc_cutoff, threshold=widgets.IntSlider(value=10000.0, min=.0, max=MS_spectra[col_summary.MS2].max())))

In [None]:
import matplotlib.pyplot as plt
fig, axes = plt.subplots(2,2, figsize=(20,20), sharex=True)

ylim_hist = (0,600)
xlim_dens = (0, 70_000)

ax = axes[0,0]
ax = mq_all_summaries.df[col_summary.MS2].plot(kind='hist', bins=50, title="Histogram including samples with zero identified peptides", grid=True, ax=ax, ylim=ylim_hist)
ax = axes[1,0]
_ = mq_all_summaries.df[col_summary.MS2].astype(float).plot.kde(ax=ax, title="Density plot including samples with zero identified peptides.", xlim=xlim_dens)

threshold_m2_identified = 15_000
mask = mq_all_summaries.df[col_summary.MS2] >= threshold_m2_identified

ax = axes[0,1]
ax = mq_all_summaries.df.loc[mask, col_summary.MS2].plot(kind='hist', bins=40, title=f"Histogram including samples with {threshold_m2_identified:,d} and more identified peptides", grid=True, ax=ax, ylim=ylim_hist)
ax = axes[1,1]
_ = mq_all_summaries.df.loc[mask, col_summary.MS2].astype(float).plot.kde(ax=ax, title=f"Density plot including samples with {threshold_m2_identified:,d} and more identified peptides.", xlim=xlim_dens)

io_images._savefig(fig, name='distribution_peptides_in_samples', folder=config.FIGUREFOLDER)