In [1]:
using Revise

In [2]:
include("../src/ProcessSequence.jl")

Main.ProcessSequence

In [None]:
project_dir = dirname(@__DIR__)

using JSON: parse

project_json = parse(read(joinpath(project_dir, "project.json"), String))

sample_name = project_json["cell_line"]

output_dir = joinpath(project_dir, "output")

input_dir = joinpath(project_dir, "input")

sample_dir = joinpath(input_dir, sample_name)

## Find raw reads

In [None]:
read_file_paths = ProcessSequence.find_reads(sample_dir)

## Run FastQC

In [None]:
ProcessSequence.check_sequence(
    Tuple(read_file_paths),
    joinpath(output_dir, string("check_sequence_", sample_name, "_raw", "_clean")),
    project_json["n_job"],
)

## Run MultiQC

Not necessary if fastq files came from same lane and same sequencing run. The more complex the production of the reads was, the more useful MultiQC will be to identify potential lane or batch biases.

In [None]:
ProcessSequence.check_sequence_bias(sample_name, output_dir)

## Concatenate reads of same strand

In [None]:
ProcessSequence.concatenate_reads(read_file_paths, sample_name, input_dir)

## Run FastQC

In [None]:
sample_cat_input_dir = joinpath(input_dir, string(sample_name, "_cat"))

sample_cat_output_dir = joinpath(output_dir, string("check_sequence_", sample_name, "_cat"))

if ispath(sample_cat_input_dir)

    ProcessSequence.check_sequence(
        Tuple((
            joinpath(sample_cat_input_dir, string(sample_name, "_R1.fastq.gz")),
            joinpath(sample_cat_input_dir, string(sample_name, "_R2.fastq.gz")),
        )),
        sample_cat_output_dir,
        project_json["n_job"],
    )

else

    println("Reads weren't concatenated, no need to run FASTQC again.")

end