# Trim Primers

Given a single-sequence FASTQ file and primer ID, updates the file to contain only the sequence between the primers.

## Setup

In [1]:
%load_ext autoreload
%autoreload 1
%aimport RCUtils

In [20]:
from Bio import Align
from Bio import SeqIO
import matplotlib_inline.backend_inline
import os
import pandas as pd
import RCUtils
import random
import glob

qPCRprimers = RCUtils.readPrimers("qPCRPrimers.fasta", display=True)

def getPrimers(name):
    return filter(lambda p: p.name.startswith(name), qPCRprimers)

from pathlib import Path

def trimAllPrimers(pathGlob,primers):
    for path in glob.glob(pathGlob):
        trimPrimers(path, primers)

def trimPrimers(fastQPath, primers):
    record = SeqIO.read(fastQPath, "fastq")
    hits = RCUtils.computePrimerHits(record, primers)
    print()
    print(f"{fastQPath}, len={len(record.seq)}")
    for hit in hits:
        print(f" {hit.primer.name} {hit.start}-{hit.end} ({hit.mr*100:.0f}%)")
    if len(hits) != 2:
        print("  ERROR: didn't get 2 primer hits")
    elif hits[0].end > hits[1].start:
        print("  ERROR: hits in wrong order")
    else:
        start = hits[0].end
        end = hits[1].start
        trimmed = record[start:end]
        print(f" trimmed to {start}-{end} len={len(trimmed.seq)}")
        SeqIO.write(trimmed, fastQPath+".trimmed", "fastq")
    

trimPrimers("myseqs/S28-RVA-23.fastq", getPrimers("ENTrc"))
trimPrimers("myseqs/S44-RVA-56.fastq", getPrimers("ENTrc"))

Reading primers: qPCRPrimers.fasta
  ENTng-f (2 variations)
  ENTng-r
  ENTng-p (8 variations)
  ENTrc-f1
  ENTrc-f2
  ENTrc-r
  HRVma-f
  HRVma-r
  HRVma-p
  HRVkaV-fo (2 variations)
  HRVkaV-fi
  HRVkaV-r (768 variations)
  HRVka5-f
  HRVka5-ro
  HRVka5-ri
Read 791 primers

myseqs/S28-RVA-23.fastq, len=348
  ERROR: didn't get 2 primer hits

myseqs/S44-RVA-56.fastq, len=350
  ERROR: didn't get 2 primer hits
