In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


# Using MaxquantRunner() in a Jupyter notebook

There are issues with JupyterLab 3 finding the commands when they are installed in a different conda environment than `base`. A workaround is to provide the absolute path to `mono` and `MaxQuantCmd.exe` or use a notebook server that does not have that problem. For example, VSCode can run the code without problems. 

The following should return the path to `mono` in the currently active environment.

In [2]:
! conda run -n lrg mono --version

Mono JIT compiler version 6.12.0.90 (tarball Fri Mar  5 04:37:13 UTC 2021)
Copyright (C) 2002-2014 Novell, Inc, Xamarin Inc and Contributors. www.mono-project.com
	TLS:           __thread
	SIGSEGV:       altstack
	Notifications: epoll
	Architecture:  amd64
	Disabled:      none
	Misc:          softdebug 
	Interpreter:   yes
	LLVM:          supported, not enabled.
	Suspend:       hybrid
	GC:            sgen (concurrent by default)



In [3]:
! conda run -n lrg which maxquant

/home/swacker/miniconda3/envs/lrg/bin/maxquant



This does not work in all versions of JupyterLab. You can try using VSCode instead until this is fixed.

### Import `MaxQuantRunner`

In [4]:
from pathlib import Path as P
from lrg_omics.proteomics import MaxquantRunner

#### Signature and documentation

In [5]:
MaxquantRunner?

[0;31mInit signature:[0m
[0mMaxquantRunner[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mfasta_file[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmqpar_file[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmaxquantcmd[0m[0;34m=[0m[0;34m'maxquant'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrun_dir[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mout_dir[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0madd_raw_name_to_outdir[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0madd_uuid_to_rundir[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msbatch_cmds[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcleanup[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mverbose[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0moutput_dir[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0

In [8]:
mq.run?

[0;31mSignature:[0m
[0mmq[0m[0;34m.[0m[0mrun[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mraw_file[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcold_run[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrerun[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msubmit[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrun[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mwith_time[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Executes MaxQuant run or only prepares output and run directories.
ARGS
----
raw_file: str|pathlib.Path, path to a proteomics.raw file
cold_run: bool, default=False
    * True: do not execute, only return the commands
    * False: 
rerun: bool, default=False
    * True: execute even if output-dir is already present, and replace results
    * False: ommit run, if output-dir exists
submit:

# Example usage

In [6]:
# Data path
PATH = P('../tests/data/')

# path to RAW file
fn_raw = '/data/proteomics_storage/testing/SA010-R1-blank-200425-R2.raw'

# path to mqpar.xml template
fn_mqp = PATH/'maxquant'/'tmt11'/'mqpar'/'mqpar.xml'

# path to fasta file
fn_faa = PATH/'fasta'/'minimal.faa'

# examples for MaxQuant run commands
## using mono
cmd = '/home/swacker/miniconda3/envs/lrg/bin/mono /home/swacker/miniconda3/envs/lrg/bin/MaxQuantCmd.exe'
## using MaxQuant installed with conda
cmd = 'maxquant'


In [6]:
# instantiate runner class
mq = MaxquantRunner(mqpar_file=fn_mqp, fasta_file=fn_faa, maxquantcmd=cmd)

# run MaxQuant (don't run maxquant only prepare folders and return shell commands)
cmds = mq.run(fn_raw, run=False, rerun=True)

In [7]:
# output
cmds

'cd /home/swacker/workspace/lrg-omics/notebooks/run; sleep 10; /usr/bin/time -o /home/swacker/workspace/lrg-omics/notebooks/run/time.txt -f "%E"  maxquant /home/swacker/workspace/lrg-omics/notebooks/run/mqpar.xml 1>maxquant.out 2>maxquant.err; if [ ! -d /home/swacker/workspace/lrg-omics/notebooks/run/combined ]; then mkdir /home/swacker/workspace/lrg-omics/notebooks/run/combined ; fi; if [ ! -d /home/swacker/workspace/lrg-omics/notebooks/run/combined/txt ]; then mkdir /home/swacker/workspace/lrg-omics/notebooks/run/combined/txt ; fi; cp time.txt maxquant.err maxquant.out /home/swacker/workspace/lrg-omics/notebooks/run/mqpar.xml /home/swacker/workspace/lrg-omics/notebooks/run/combined/txt/; mv /home/swacker/workspace/lrg-omics/notebooks/run/combined/txt/* /home/swacker/workspace/lrg-omics/notebooks/out'