# Notebook to run through all the steps in the pipeline

Very useful for debugging

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import rpy2.robjects as robjects
import multiprocessing
from experiments.pipelines.pipeline_rpy2 import Rpy2Pipeline

## Load analysis and project

In [None]:
project_id = 5
analysis_id = 6

In [None]:
project = Project.objects.get_or_create(id=project_id)[0]

In [None]:
analysis = Analysis.objects.get_or_create(id=analysis_id)[0]

In [None]:
print analysis.status
analysis.status = 'Ready'
analysis.save()

## Create pipeline object

In [None]:
pipeline = Rpy2Pipeline(analysis, project)

In [None]:
# shortcut ..
self = pipeline

In [None]:
xcms_params = self.get_value(self.pimp_params, 'xcms.params')
mzmatch_params = self.get_value(self.pimp_params, 'mzmatch.params')
peakml_params = self.get_value(self.pimp_params, 'peakml.params')
mzmatch_outputs = self.get_value(self.pimp_params, 'mzmatch.outputs')
mzmatch_filters = self.get_value(self.pimp_params, 'mzmatch.filters')
n_slaves = multiprocessing.cpu_count()

Here we only process the positive mode data

In [None]:
polarity = 'positive'

In [None]:
format_mzmatch_outputs = robjects.r['Pimp.getFormattedMzmatchOutputs']
formatted_mzmatch_outputs = format_mzmatch_outputs(self.analysis.id, polarity, mzmatch_outputs)
polarity_dir, combined_dir = self.create_input_directories(polarity, formatted_mzmatch_outputs)

### Peak detection and RT correction

In [None]:
print self.metadata.files[polarity]

In [None]:
self.create_peakml(polarity, polarity_dir, xcms_params, mzmatch_params,
                   peakml_params, mzmatch_outputs, n_slaves)

### Matching

Generate groups for matching

In [None]:
non_empty = self.generate_combinations(polarity, combined_dir)

In [None]:
for group_label, index, description, files, abspath in non_empty:
    print group_label, index, description, files, abspath

Match peaks across samples in groups

In [None]:
out_files = self.generate_peaksets(polarity_dir, combined_dir, non_empty, mzmatch_params)

In [None]:
print out_files

Filter each group

In [None]:
out_files = self.filter_peaksets(out_files, mzmatch_params)

In [None]:
print out_files

Final combine

In [None]:
out_file = self.combine_final(out_files, mzmatch_params, formatted_mzmatch_outputs)

In [None]:
print out_file

Final filter

In [None]:
out_file = self.filter_final(out_file, mzmatch_filters, mzmatch_params, formatted_mzmatch_outputs)

In [None]:
print out_file

### Gap filling

In [None]:
out_file = self.gap_filling(out_file, peakml_params, formatted_mzmatch_outputs)

In [None]:
print out_file

### Related peaks

In [None]:
out_file, basepeak_file = self.related_peaks(out_file, mzmatch_params, formatted_mzmatch_outputs)

In [None]:
print out_file
print basepeak_file

### Identification

In [None]:
databases = self.r_dbs
raw_data = self.identify(polarity, out_file, databases, non_empty, mzmatch_params, formatted_mzmatch_outputs)