# Assay Binding analysis
Analyze how well my PCR primers and probes match the sequences I've found in my samples.

## Initialization, configuration and utility functions

In [7]:
from Bio.Seq import Seq
from Bio import SeqIO
from Bio import Align
import RCUtils

%load_ext autoreload
%autoreload 1
%aimport RCUtils

primers = RCUtils.readPrimers("RespiCovPrimers.fasta", display=True)

aligner = Align.PairwiseAligner(mode='local', match_score=1, mismatch_score=0, gap_score=-1)

def printSeqBinding(path, format="fastq"):
    # TODO: Try to print a semi-global alignment. Can use global with end_gap_score=0    
    record = SeqIO.read(path, format)
    hits = RCUtils.computePrimerHits(record, primers, allowOverlaps=True)
    for hit in sorted(hits, key=lambda hit: hit.primer.id):
        print ("%s len=%d match=%d%% [%d:%d]" % (hit.primer.description, len(hit.primer.seq), 100*hit.mr, hit.start, hit.end))
        if hit.mr < 1:
            a = aligner.align(record.seq, hit.primer.seq, strand="-" if hit.rev else "+")[0]
            if a.coordinates[1][0] > 0:
                if a.coordinates[0][0] == 0:
                    print ("  Primer falls %d bases off the start of the sequence" % a.coordinates[1][0])
                else:
                    print ("  Primer mismatch in first %d bases" % a.coordinates[1][0])
            pt = len(hit.primer.seq) - a.coordinates[1][-1]
            if pt > 0:
                if a.coordinates[0][-1] == len(record):
                    print ("  Primer falls %d bases off the end of the sequence" % pt)
                else:
                    print ("  Primer mismatch in the last %d bases" % pt)
            print(a)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Reading primers: RespiCovPrimers.fasta
  Bordetella pertussis 1
  Bordetella pertussis 2
  Chlamydophila pneumoniae 1
  Chlamydophila pneumoniae 2
  Epstein-Barr virus 1
  Epstein-Barr virus 2
  Hantaan virus 1
  Hantaan virus 2
  Hantaan virus 3
  Hantaan virus 4
  Hendra virus 5
  Hendra virus 6
  Herpes simplex virus 1
  Herpes simplex virus 2
  Human adenovirus 1
  Human adenovirus 2
  Human adenovirus 3
  Human adenovirus 4
  Human adenovirus 5
  Human adenovirus 6
  Human adenovirus 7
  Human adenovirus 8
  Human adenovirus 9
  Human adenovirus 10
  Human adenovirus 11
  Human adenovirus 12
  Human adenovirus 13
  Human adenovirus 14
  Human bocavirus 1
  Human bocavirus 2
  Human coronavirus 229E 1
  Human coronavirus 229E 2
  Human coronavirus HKU1 1
  Human coronavirus HKU1 2
  Human coronavirus NL63 1
  Human coronavirus NL63 2
  Human coronavirus OC43 1
  Human coronavirus OC43 2
  Human 

# SRC1 - SARS CoV 2

In [8]:
printSeqBinding("myseqs/SRC1-SC2.fasta", "fasta")

SARS coronavirus 1/2 1 len=20 match=100% [15825:15845]
SARS coronavirus 1/2 2 len=22 match=100% [16177:16199]
SARS coronavirus 1/2 3 len=21 match=100% [16832:16853]
SARS coronavirus 1/2 4 len=22 match=100% [17073:17095]
SARS coronavirus 1/2 5 len=20 match=100% [29324:29344]
SARS coronavirus 1/2 6 len=20 match=100% [29667:29687]
