In [None]:
import re
from pydna.utils import rc
from pydna.dseq import Dseq
from pydna.dseqrecord import Dseqrecord
from abc import ABC, abstractmethod

class USER2():
    def __init__(self):
        self.compsite = re.compile(
            "(?=(?P<watson>[ACGT]{5}U))|(?=(?P<crick>U[ACGT]{5}))", re.UNICODE
        )

    def search(self, dna: Dseq, linear=True):
        """
        Search function for USER enzyme.

        Parameters
        ----------
        dna : Dseq
            Dseq object representing the DNA sequence to search for USER site.
        linear : bool
            If True, the search is performed on the input sequence.
            If False, the search is performed on the sequence + sequence[1:].
        
        Returns
        -------
        list
            A list of the positions of the USER target sites.
        """
        dna = str(dna).upper()
        if linear:
            dna = dna
        else:
            dna = dna + dna[1 : self.size]
        results = []
        for mobj in self.compsite.finditer(dna):
            w, c = mobj.groups()
            if w:
                results.append(mobj.start("watson"))
            if c:
                results.append(mobj.start("crick"))
        return results


class CGAAfinder():
    def __init__(self):
        self.compsite = re.compile(
            "(?=(?P<watson>CGAA))|(?=(?P<crick>TTCG))", re.UNICODE
        )

    def search(self, dna: Dseq, linear=True):
        """
        Search function for USER enzyme.

        Parameters
        ----------
        dna : Dseq
            Dseq object representing the DNA sequence to search for USER site.
        linear : bool
            If True, the search is performed on the input sequence.
            If False, the search is performed on the sequence + sequence[1:].
        
        Returns
        -------
        list
            A list of the positions of the USER target sites.
        """
        dna = str(dna).upper()
        if linear:
            dna = dna
        else:
            dna = dna + dna[1 : self.size]
        results = []
        for mobj in self.compsite.finditer(dna):
            w, c = mobj.groups()
            if w:
                results.append(mobj.start("watson"))
            if c:
                results.append(mobj.start("crick"))
        return results


class USER():
    pattern = "([ACGT]{5}U)"
    size = 6
    fst5 = 7  # First 5' cut 
    fst3 = None  # (there are no cuts in complementary strand)
    site = "NNNNNU"
    ovhg = fst5 - 1

    def __init__(self):
        self.compsite = re.compile(
            "(?=(?P<USER>[ACGT]{5}U))", re.UNICODE
        )

    def search(self, dna, linear=True):
        # TODO: Deal with circular DNA
        results = []
        for mobj in re.finditer("[ACGT]{5}U", dna.watson):
            print(mobj.start(), mobj.group())
            results.append(mobj.start() + self.fst5)
        
        for mobj in re.finditer("U[ACGT]{5}", dna.crick):
            print(mobj.end(), mobj.group())
            results.append(mobj.end() + self.fst5 - 1)
        
        return results
    
    def __repr__(self):
        return f"USER({self.site})"

    def __str__(self):
        return f"USER({self.site})"

In [None]:
target = Dseq("AAAAAUACGA", crick="TCGTATTTTT", ovhg=0)
USER().search(target)


0 AAAAAU


[7]

In [None]:
target.cut(USER())

0 AAAAAU


(Dseq(-6)
 AAAAAU,
 Dseq(-10)
       ACGA
 TTTTTATGCT)

In [None]:
target2 = Dseq("AAAAAUACGAACGATC", crick="GATCGUTCGTATTTTT", ovhg=0)
USER().search(target2)

0 AAAAAU
11 UTCGTA


[7, 17]

In [None]:
target2.cut(USER())

0 AAAAAU
11 UTCGTA


(Dseq(-6)
 AAAAAU,
 Dseq(-16)
       ACGAACGATC
 TTTTTATGCT,
 Dseq(-6)
       
 UGCTAG)

In [None]:
from Bio.Restriction import SalI
from Bio.Seq import Seq
from pydna.dseq import Dseq

#SalI.search(Seq("ACGATGCTAGGTCGACGTGAGCATCAGCTGAC"))
print(SalI.__dict__)



{'charac': (1, -1, None, None, 'GTCGAC'), 'compsite': re.compile('(?=(?P<SalI>GTCGAC))'), 'dna': None, 'freq': 4096.0, 'fst3': -1, 'fst5': 1, 'id': 1588, 'inact_temp': 65, 'opt_temp': 37, 'ovhg': -4, 'ovhgseq': 'TCGA', 'results': None, 'scd3': None, 'scd5': None, 'site': 'GTCGAC', 'size': 6, 'substrat': 'DNA', 'suppl': ('B', 'C', 'I', 'J', 'K', 'M', 'N', 'O', 'Q', 'R', 'V', 'X'), 'uri': 'https://identifiers.org/rebase:1588', '__module__': 'Bio.Restriction.Restriction', '__doc__': None}


In [None]:
result = Dseq.from_representation(
        """\
          GTCGACTTAGATCTCACAGGCTTTTTTCAAGaCGGCCTTGAATTCAGTCATTTGGATCCGGCCGATCTTT
    TTGCTACAGCTGAATCTAGAGTGTCCGAAAAAAGTTCTGCCGGAACTTAAGTCAGTAAACCTAGGCCGGC
    """
    )
result

Dseq(-76)
      GTCG..GCCGATCTTT
TTGCTACAGC..CGGC      

In [None]:
from pydna.user import USER as us

a = Dseq.from_representation(
        """\
    AACGAuGTCGACTTAGATCTCACAGGCTTTTTTCAAGaCGGCCTTGAATTCAGTCATTTGGATCCGGCCGATCTTT
    TTGCTACAGCTGAATCTAGAGTGTCCGAAAAAAGTTCTGCCGGAACTTAAGTCAGTAAACCTAGGCCGGCuAGAAA
    """
    )

#a.get_cutsites(us())
a.cut(us())[1]

7 AACGAU
76 AAAGAU rev


Dseq(-75)
      GTCG..GGCCGATCTT
TTGCTACAGC..CCGG      

In [None]:
from pydna.user import ssUSER

a.get_cutsites(ssUSER())



[((6, 6), ssUSER(NNNNNU))]

### Approach with ssUSER

In [None]:
watson_cut = a.get_cutsites(ssUSER())[0][0][0]
watson_user = a[watson_cut:]
crick_cut = Dseq(a.crick).get_cutsites(ssUSER())[0][0][0]
crick_user = Dseq(a.crick)[crick_cut:]
result = Dseq(str(watson_user), crick=str(crick_user), ovhg=watson_cut)
result

Dseq(-76)
      GTCG..GCCGATCTTT
TTGCTACAGC..CGGC      

### Approach following Bjorn's notes

In [None]:
from pydna.tm import tm_product

results = []
b = Dseq.from_representation(
    """\
    AACGAuGTCGACTTAGATCuCACAGGCTTTTTTCAAGaCGGCCTTGAATTCAGTCATTTGGATCCGGCCGATCTTT
    TTGCTACAGCTGAATCTAGaGTGTCCGAAAAAAGTTCuGCCGGAACTTAAGTCAGTAAACCTAGGCCGGCuAGAAA
    """
)

# Iterate through all Us in the forward strand
for wcut in b.get_cutsites(ssUSER()):
    wcut = wcut[0][0]  # Get position
    watson_user = a[wcut:]  # Get the sequence from the cutsite

    # Iterate through all Us in the reverse strand
    for ccut in Dseq(b.crick).get_cutsites(ssUSER()):
        ccut = ccut[0][0]  # Get position
        crick_user = Dseq(b.crick)[ccut:]  # Get the sequence from the cutsite
        result = Dseq(str(watson_user), crick=str(crick_user), ovhg=wcut)
        print(result.__repr__())

        # Calculate the Tm of the double stranded portion
        ds_portion = a[wcut:-ccut]
        tm = tm_product(ds_portion)
        print(ds_portion, tm)
        results.append((result, tm))
    
stability_sorted = sorted(results, key=lambda x: x[1], reverse=True)



Dseq(-76)
      GTCG..GCCGATCTTT
TTGCTACAGC..CGGC      
GTCGACTTAGATCTCACAGGCTTTTTTCAAGaCGGCCTTGAATTCAGTCATTTGGATCCGGCCG 69.2154020719779
Dseq(-76)
      GTCG..CAAGaCGG..CTTT
TTGCTACAGC..GTTC          
GTCGACTTAGATCTCACAGGCTTTTTTCAAG 55.32225691068758
Dseq(-76)
          CACA..GCCGATCTTT
TTGC..TAGaGTGT..CGGC      
CACAGGCTTTTTTCAAGaCGGCCTTGAATTCAGTCATTTGGATCCGGCCG 66.9029020719779
Dseq(-76)
          CACA..CAAGaCGG..CTTT
TTGC..TAGaGTGT..GTTC          
CACAGGCTTTTTTCAAG 37.079372660213195
[(Dseq(-76)
      GTCG..GCCGATCTTT
TTGCTACAGC..CGGC      , 69.2154020719779), (Dseq(-76)
          CACA..GCCGATCTTT
TTGC..TAGaGTGT..CGGC      , 66.9029020719779), (Dseq(-76)
      GTCG..CAAGaCGG..CTTT
TTGCTACAGC..GTTC          , 55.32225691068758), (Dseq(-76)
          CACA..CAAGaCGG..CTTT
TTGC..TAGaGTGT..GTTC          , 37.079372660213195)]


In [None]:
Dseq("TTTTTT", crick="AA", ovhg=-4)

Dseq(-6)
TTTTTT
    AA