# Jupyter Notebook GenoRobotics Full Pipeline

## Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

from lib.consensus.consensus import run_consensus
from lib.identification.identification import run_identification



## Define Your File and Folder Paths

- Modify the "input_src" variable to point to the directory containing the input files. 
  
- Modify the "output_src" variable to point to the directory where you want the output files to be saved.

In [3]:
input_fastq_filename = "rbcL_Qiagen_tomato_5000.fastq"
input_fastq_path = os.path.join("assets","input", input_fastq_filename)
base_name = os.path.splitext(input_fastq_filename)[0]

output_base_dir = os.path.join('assets','output')
output_dir = os.path.join(output_base_dir, base_name)
os.makedirs(output_dir, exist_ok=True)

## Run Preprocessing (Optional)

In [3]:
# preprocessing()

## Run Consensus Sequence Generation

Select which consensus sequence generation method you want to use by setting the "consensus_method" variable to either:

- "majority" (default)

- "consensus"

- "consensus_with_ambiguities"

In [4]:
# choose a consensus method between the following:
# - "80_20_best_sequence"
# - "80_20_longest_sequence"

#If you're on Windows and have to use WSL (Windows Subsystem for Linux), set wsl to True
wsl = True

run_consensus(input_name= base_name, 
              input_fastq_path=input_fastq_path, 
              consensus_method="80_20_best_sequence",
              wsl = wsl)

Running consensus pipeline...
Running consensus pipeline with 80_20_longest_sequence method...
Minimap2 alignment took 0.05 seconds.
Total Racon iterations took 14.85 seconds.
Total time taken for the pipeline: 14.91 seconds.


## Run Idenfitication of Consensus Sequence

In [90]:
# Choose your db along the gene you're trying to identify : matK, rbcL, psbA-trnH or ITS
db = None

run_identification(base_name, db)