# Periodically look for new files, move to target directory, run pileline

In [20]:
import os
import sys

import numpy as np
import time

## User Inputs

In [29]:
source_dir = "available_data"  # this is data that has been generated
target_dir = "data/ont/uk/2019-03-28/"
pf_ref_path = "data/resources/plasmodb/39/PlasmoDB-39_Pfalciparum3D7_Genome.fasta"
wait_time = 120  # in seconds

## Prepare Folder Structure

In [30]:
fastq_dir = os.path.join(target_dir, "fastq")
trimmed_dir = os.path.join(target_dir, "fastq_trimmed")

if not os.path.isdir(fastq_dir):
    os.mkdir(fastq_dir)
if not os.path.isdir(trimmed_dir):
    os.mkdir(trimmed_dir)

## Run Program

In [None]:
seen_files = set() # To start, we have seen no files.
while True:
    print("Checking for new files...")
    current_files = set(os.listdir(source_dir))
    new_files = current_files.difference(seen_files)
    if len(new_files) > 0:
        seen_files.update(new_files)  # Add new files to those seen, as they are about to be processed.
        print("Discovered %d new files." % len(new_files))
        print("Beginning processing.")
        print("--------------------------------------------------------------------------------")
        for i, file in enumerate(new_files):
            ID, ext = os.path.splitext(file)
            print("File", i)
            print("  Name:", file)
            print("  ID:", ID)
            print("  Copying to /fastq directory...")
            cmd = "cp %s %s" % (os.path.join(source_dir, file), fastq_dir)
            print("  ", cmd)
            os.system(cmd)
            print("  Done.")
            print("")
            print("  Beginning adapter trimming...")
            trimmed_output_dir = os.path.join(trimmed_dir, ID)
            if not os.path.isdir(trimmed_output_dir):
                os.mkdir(trimmed_output_dir)
            print("  Output dir:", trimmed_output_dir)
            cmd = "porechop -i %s -b %s" % (os.path.join(fastq_dir, file), trimmed_output_dir)
            print("  ", cmd)
            os.system(cmd)
            print("  Done.")
            print("")
            print("  Mapping to P.f.")
            barcode_files = [f for f in os.listdir(trimmed_output_dir) if ".fastq" in f]
            n_barcodes = len(barcode_files)
            print("  Number of barcodes discovered:", n_barcodes)
            if n_barcodes > 0:
                for j, barcode in enumerate(barcode_files):
                    print("    Mapping Barcode ", j)
                    print("    Name:", barcode)
                    barcode_fastq = os.path.join(trimmed_output_dir, barcode)
                    barcode_sam = os.path.join(target_dir, barcode.replace("fastq", "sam"))
                    cmd = 	"minimap2 -ax map-ont %s %s >> %s" % (pf_ref_path, 
                                                                  barcode_fastq,
                                                                  barcode_sam)
                    os.system(cmd)
                    print("    Done.")
                    print("")
                    print("")
            else:
                print("No barcodes discoverd.")
    else:
        print("No new files found, waiting...")
        time.sleep(wait_time)

Checking for new files...
Discovered 2 new files.
Beginning processing.
--------------------------------------------------------------------------------
File 0
  Name: FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_16.fastq
  ID: FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_16
  Copying to /fastq directory...
   cp available_data/FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_16.fastq data/ont/uk/2019-03-28/fastq
  Done.

  Beginning adapter trimming...
  Output dir: data/ont/uk/2019-03-28/fastq_trimmed/FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_16
   porechop -i data/ont/uk/2019-03-28/fastq/FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_16.fastq -b data/ont/uk/2019-03-28/fastq_trimmed/FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_16
  Done.

  Mapping to P.f.
  Number of barcodes discovered: 13
    Mapping Barcode  0
    Name: BC09.fastq
    Done.


    Mapping Barcode  1
    Name: BC10.fastq
    Done.


    Mapping Barcode  2
    Name: BC12.fastq
    Done.




    Done.


    Mapping Barcode  1
    Name: BC10.fastq
    Done.


    Mapping Barcode  2
    Name: BC12.fastq
    Done.


    Mapping Barcode  3
    Name: BC08.fastq
    Done.


    Mapping Barcode  4
    Name: BC11.fastq
    Done.


    Mapping Barcode  5
    Name: BC04.fastq
    Done.


    Mapping Barcode  6
    Name: BC06.fastq
    Done.


    Mapping Barcode  7
    Name: BC02.fastq
    Done.


    Mapping Barcode  8
    Name: BC07.fastq
    Done.


    Mapping Barcode  9
    Name: BC05.fastq
    Done.


    Mapping Barcode  10
    Name: BC01.fastq
    Done.


    Mapping Barcode  11
    Name: none.fastq
    Done.


    Mapping Barcode  12
    Name: BC03.fastq
    Done.


File 1
  Name: FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_12.fastq
  ID: FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_12
  Copying to /fastq directory...
   cp available_data/FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_12.fastq data/ont/uk/2019-03-28/fastq
  Done.

  Beginning adapter trimming.

    Done.


    Mapping Barcode  10
    Name: BC01.fastq
    Done.


    Mapping Barcode  11
    Name: none.fastq
    Done.


    Mapping Barcode  12
    Name: BC03.fastq
    Done.


File 7
  Name: FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_15.fastq
  ID: FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_15
  Copying to /fastq directory...
   cp available_data/FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_15.fastq data/ont/uk/2019-03-28/fastq
  Done.

  Beginning adapter trimming...
  Output dir: data/ont/uk/2019-03-28/fastq_trimmed/FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_15
   porechop -i data/ont/uk/2019-03-28/fastq/FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_15.fastq -b data/ont/uk/2019-03-28/fastq_trimmed/FAK50713_a671f1021fbd9d88b93239249358a9bb33ca4ae5_15
  Done.

  Mapping to P.f.
  Number of barcodes discovered: 13
    Mapping Barcode  0
    Name: BC09.fastq
    Done.


    Mapping Barcode  1
    Name: BC10.fastq
    Done.


    Mapping Barcode  2
   