In [107]:
import re
from pydna.utils import rc
from pydna.dseq import Dseq
from pydna.dseqrecord import Dseqrecord
from abc import ABC, abstractmethod

class USER2():
    def __init__(self):
        self.compsite = re.compile(
            "(?=(?P<watson>[ACGT]{5}U))|(?=(?P<crick>U[ACGT]{5}))", re.UNICODE
        )

    def search(self, dna: Dseq, linear=True):
        """
        Search function for USER enzyme.

        Parameters
        ----------
        dna : Dseq
            Dseq object representing the DNA sequence to search for USER site.
        linear : bool
            If True, the search is performed on the input sequence.
            If False, the search is performed on the sequence + sequence[1:].
        
        Returns
        -------
        list
            A list of the positions of the USER target sites.
        """
        dna = str(dna).upper()
        if linear:
            dna = dna
        else:
            dna = dna + dna[1 : self.size]
        results = []
        for mobj in self.compsite.finditer(dna):
            w, c = mobj.groups()
            if w:
                results.append(mobj.start("watson"))
            if c:
                results.append(mobj.start("crick"))
        return results


class CGAAfinder():
    def __init__(self):
        self.compsite = re.compile(
            "(?=(?P<watson>CGAA))|(?=(?P<crick>TTCG))", re.UNICODE
        )

    def search(self, dna: Dseq, linear=True):
        """
        Search function for USER enzyme.

        Parameters
        ----------
        dna : Dseq
            Dseq object representing the DNA sequence to search for USER site.
        linear : bool
            If True, the search is performed on the input sequence.
            If False, the search is performed on the sequence + sequence[1:].
        
        Returns
        -------
        list
            A list of the positions of the USER target sites.
        """
        dna = str(dna).upper()
        if linear:
            dna = dna
        else:
            dna = dna + dna[1 : self.size]
        results = []
        for mobj in self.compsite.finditer(dna):
            w, c = mobj.groups()
            if w:
                results.append(mobj.start("watson"))
            if c:
                results.append(mobj.start("crick"))
        return results


class USER():
    pattern = "([ACGT]{5}U)"
    size = 6
    fst5 = 7  # First 5' cut 
    fst3 = None  # (there are no cuts in complementary strand)
    site = "NNNNNU"
    ovhg = fst5 - 1

    def __init__(self):
        self.compsite = re.compile(
            "(?=(?P<USER>[ACGT]{5}U))", re.UNICODE
        )

    def search(self, dna, linear=True):
        # TODO: Deal with circular DNA
        results = []
        for mobj in re.finditer("[ACGT]{5}U", dna.watson):
            print(mobj.start(), mobj.group())
            results.append(mobj.start() + self.fst5)
        
        for mobj in re.finditer("U[ACGT]{5}", dna.crick):
            print(mobj.end(), mobj.group())
            results.append(mobj.end() + self.fst5 - 1)
        
        return results
    
    def __repr__(self):
        return f"USER({self.site})"

    def __str__(self):
        return f"USER({self.site})"

In [95]:
target = Dseq("AAAAAUACGA", crick="TCGTATTTTT", ovhg=0)
USER().search(target)


0 AAAAAU


[7]

In [108]:
target.cut(USER())

0 AAAAAU


(Dseq(-6)
 AAAAAU,
 Dseq(-10)
       ACGA
 TTTTTATGCT)

In [109]:
target2 = Dseq("AAAAAUACGAACGATC", crick="GATCGUTCGTATTTTT", ovhg=0)
USER().search(target2)

0 AAAAAU
11 UTCGTA


[7, 17]

In [110]:
target2.cut(USER())

0 AAAAAU
11 UTCGTA


(Dseq(-6)
 AAAAAU,
 Dseq(-16)
       ACGAACGATC
 TTTTTATGCT,
 Dseq(-6)
       
 UGCTAG)

In [30]:
from pydna.crispr import cas9

sgrna = "TTTTTTTTTTTTTTTTTAAA"
crispr_target = "TTTTTTTTTTTTTTTTTTTTTAAACGGTTT"
cas_object = cas9(sgrna)
cas_object.search(crispr_target)



[22]

In [32]:
Dseq(crispr_target).get_cutsites(cas_object)

[((21, 0), cas9(TTT..AAA))]

In [26]:
Dseq(crispr_target).cut(cas_object)

(Dseq(-21)
 TTTTTTTTTTTTTTTTTTTTT
 AAAAAAAAAAAAAAAAAAAAA,
 Dseq(-9)
 AAACGGTTT
 TTTGCCAAA)

In [None]:
from Bio.Restriction import SalI
from Bio.Seq import Seq
from pydna.dseq import Dseq

#SalI.search(Seq("ACGATGCTAGGTCGACGTGAGCATCAGCTGAC"))
print(SalI.__dict__)

