# DNA weaver

#### 
https://github.com/Edinburgh-Genome-Foundry/DnaWeaver/blob/master/examples/10kb_from_vendor_or_oligos.py

In [2]:
! pip install dnaweaver



In [21]:
import dnaweaver as dw
import time

cheap_dna_offer = dw.CommercialDnaOffer(
    name="CheapDNA.",
    sequence_constraints=[
        dw.NoPatternConstraint(enzyme="BsaI"),
        dw.SequenceLengthConstraint(max_length=4000),
        dw.GcContentConstraint(min_gc=0.3, max_gc=0.7)
    ],
    pricing=dw.PerBasepairPricing(0.10),
)

oligo_dna_offer = dw.CommercialDnaOffer(
    name="OliGoo",
    sequence_constraints=[
        dw.GcContentConstraint(min_gc=0.3, max_gc=0.7),
        dw.SequenceLengthConstraint(max_length=100),
    ],
    pricing=dw.PerBasepairPricing(0.07),
    memoize=True
)

oligo_assembly_station = dw.DnaAssemblyStation(
    name="Oligo Assembly Station",
    assembly_method=dw.OligoAssemblyMethod(
        overhang_selector=dw.TmSegmentSelector(
            min_size=15, max_size=25, min_tm=50, max_tm=70
        ),
        min_segment_length=40,
        max_segment_length=200,
        sequence_constraints=[dw.SequenceLengthConstraint(max_length=1500)],
        duration=8,
        cost=30,
    ),
    supplier=oligo_dna_offer,
    coarse_grain=20,
    a_star_factor="auto",
    memoize=True,
)

assembly_station = dw.DnaAssemblyStation(
    name="Gibson Assembly Station",
    assembly_method=dw.GibsonAssemblyMethod(
        overhang_selector=dw.TmSegmentSelector(min_tm=55, max_tm=70),
        min_segment_length=80,
        max_segment_length=120,
    ),
    supplier=[cheap_dna_offer, oligo_assembly_station],
    logger="bar",
    coarse_grain=100,
    fine_grain=10,
    a_star_factor="auto",
)
print("Looking for the best assembly plan...")
t0 = time.time()
sequence = "ATGACGACGCCGTGGTCGGGAGAGTTAATTGTGGATTTATTAAATCAGCGTCCTCGCGGACAAGTCGTGTTAGATGCTGCTAATGGGGTGGATTATGAGCTGCGCTCTCAAGAGACCGGACCAATTGACATCGACGCACACGGACCCGCTCGCTGCCTGACCGGAACGGGCTCCCGTACGACGCGTTGGATTCGCCAACATCAGCGCAATACTCGCGTTTTGGGCCTGTCGCATGaccctacagttgctgacgcgcgcgaattgttacgtttaggggctttggGCCTTTTACCTAAAAATGCCGAGCCAGCGGAAATCTTCCAAGCGTTAGATCAAGCTGCCGCCGACCACATTCACACGAATGACCTTGTGCGCGACGCACCCCGTACCCCCGCTCTTCCTGAAGAAGAGGAGGCAGAACGCGTCAACGCTTTGCCAGACCAGGGCGCGGCGCGCACGGGCGCTTTGGGAGCCCAAGGTTGGCCCCACCATACTCCTGACCAACCGGCGCGCTTAGGT"
quote = assembly_station.get_quote(sequence, with_assembly_plan=True)
print(quote.assembly_step_summary())
print("Finished in %.01d seconds" % (time.time() - t0))

Looking for the best assembly plan...


                                                                                                                                                                                       

Ordering plan (Gibson Assembly Station)::
  0-100: From CheapDNA. - price 11.10 - lead_time 0.0
  100-210: From Oligo Assembly Station - price 40.43 - lead_time 8.0
  210-320: From CheapDNA. - price 13.10 - lead_time 0.0
  320-430: From CheapDNA. - price 13.10 - lead_time 0.0
  430-519: From CheapDNA. - price 9.90 - lead_time 0.0
Price:87.63, total lead_time:8.0
Finished in 0 seconds




In [22]:
assembly_plan_report = quote.to_assembly_plan_report()
assembly_plan_report.write_full_report("report.zip")

  ax.figure.savefig(target.open("wb"), format="pdf", bbox_inches="tight")
  ax.figure.savefig(target.open("wb"), format="pdf", bbox_inches="tight")
  ax.figure.savefig(target.open("wb"), format="pdf", bbox_inches="tight")
  ax.figure.savefig(target.open("wb"), format="pdf", bbox_inches="tight")


In [27]:
len(assembly_plan_report.plan)

11

In [35]:
print(assembly_plan_report.plan.sequence)

ATGACGACGCCGTGGTCGGGAGAGTTAATTGTGGATTTATTAAATCAGCGTCCTCGCGGACAAGTCGTGTTAGATGCTGCTAATGGGGTGGATTATGAGCTGCGCTCTCAAGAGACCGGACCAATTGACATCGACGCACACGGACCCGCTCGCTGCCTGACCGGAACGGGCTCCCGTACGACGCGTTGGATTCGCCAACATCAGCGCAATACTCGCGTTTTGGGCCTGTCGCATGaccctacagttgctgacgcgcgcgaattgttacgtttaggggctttggGCCTTTTACCTAAAAATGCCGAGCCAGCGGAAATCTTCCAAGCGTTAGATCAAGCTGCCGCCGACCACATTCACACGAATGACCTTGTGCGCGACGCACCCCGTACCCCCGCTCTTCCTGAAGAAGAGGAGGCAGAACGCGTCAACGCTTTGCCAGACCAGGGCGCGGCGCGCACGGGCGCTTTGGGAGCCCAAGGTTGGCCCCACCATACTCCTGACCAACCGGCGCGCTTAGGT


In [38]:
for oligo in assembly_plan_report.plan.assembly_plan:
    if oligo.accepted:
        print(oligo.id, oligo.sequence)
        print(oligo.metadata)

S_00000 ATGACGACGCCGTGGTCGGGAGAGTTAATTGTGGATTTATTAAATCAGCGTCCTCGCGGACAAGTCGTGTTAGATGCTGCTAATGGGGTGGATTATGAGCTGCGCTCTCAA
{'via': ['Gibson Assembly Station comparator']}
S_00003 GATTATGAGCTGCGCTCTCAAGAGACCGGACCAATTGACATCGACGCACACGGACCCGCTCGCTGCCTGACCGGAACGGGCTCCCGTACGACGCGTTGGATTCGCCAACATCAGCGCAATACTCGCGTTTT
{'via': ['Gibson Assembly Station comparator', 'Gibson Assembly Station comparator']}
S_00004 TCAGCGCAATACTCGCGTTTTGGGCCTGTCGCATGaccctacagttgctgacgcgcgcgaattgttacgtttaggggctttggGCCTTTTACCTAAAAATGCCGAGCCAGCGGAAATCTTCCAAGCGTTAG
{'via': ['Gibson Assembly Station comparator']}
S_00005 GCGGAAATCTTCCAAGCGTTAGATCAAGCTGCCGCCGACCACATTCACACGAATGACCTTGTGCGCGACGCACCCCGTACCCCCGCTCTTCCTGAAGAAGAGGAGGCAGAACGCGTCAACGCTTTGCCAGA
{'via': ['Gibson Assembly Station comparator']}
S_00006 CGCGTCAACGCTTTGCCAGACCAGGGCGCGGCGCGCACGGGCGCTTTGGGAGCCCAAGGTTGGCCCCACCATACTCCTGACCAACCGGCGCGCTTAGGT
{'via': ['Gibson Assembly Station comparator']}


In [None]:
### Other installs

brew install glib
 brew install blast
brew install python pango libffi

In [16]:
!  pip install pdf_reports



In [14]:
! pip install dnachisel
! pip install 'dnachisel[reports]'

Collecting pdf-reports (from dnachisel[reports])
  Downloading pdf_reports-0.3.7-py3-none-any.whl.metadata (2.1 kB)
Collecting sequenticon (from dnachisel[reports])
  Downloading sequenticon-0.1.5-py3-none-any.whl.metadata (2.0 kB)
Collecting pypugjs (from pdf-reports->dnachisel[reports])
  Downloading pypugjs-5.11.0-py2.py3-none-any.whl.metadata (7.4 kB)
Collecting Markdown (from pdf-reports->dnachisel[reports])
  Using cached Markdown-3.7-py3-none-any.whl.metadata (7.0 kB)
Collecting backports.functools-lru-cache (from pdf-reports->dnachisel[reports])
  Downloading backports.functools_lru_cache-2.0.0-py2.py3-none-any.whl.metadata (3.5 kB)
Collecting pydenticon (from sequenticon->dnachisel[reports])
  Downloading pydenticon-0.3.1.tar.gz (26 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting snapgene-reader (from sequenticon->dnachisel[reports])
  Downloading snapgene_reader-0.1.21-py3-none-any.whl.metadata (1.6 kB)
Collecting xmltodict (from snapgene-reader->sequentic

In [8]:
from dnachisel import *

# DEFINE THE OPTIMIZATION PROBLEM
seq = 'MASPQLQMALDAFKTMGEKMAQAGNDVKALRAVMEEMSGFPSAGETKCTPVNAGGVPAEWISGPGAAD'
seq = reverse_translate(seq)
problem = DnaOptimizationProblem(
    sequence=seq,
    constraints=[
        AvoidPattern("BsaI_site"),
        EnforceGCContent(mini=0.3, maxi=0.7, window=50),
    ],
    objectives=[CodonOptimize(species='e_coli', location=(500, 1400))]
)


In [2]:
problem.optimize_with_report(target="dna_chisel_report.zip")


Solving constraints


constraint:   0%|                                                                                                        | 0/2 [00:00<?, ?it/s, now=AvoidPattern[0-10000](pat...]
location:   0%|                                                                                                                                  | 0/3 [00:00<?, ?it/s, now=None][A
location:   0%|                                                                                                                          | 0/3 [00:00<?, ?it/s, now=4574-4580(-)][A
                                                                                                                                                                                 [A
location:   0%|                                                                                                                          | 0/4 [00:00<?, ?it/s, now=4574-4580(-)][A
location:   0%|                                                                                   

Now optimizing the sequence


objective:   0%|                                                                                                         | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[500-1400](e_c...]
location:   0%|                                                                                                                           | 0/200 [00:00<?, ?it/s, now=2043-2099][A
location:   0%|                                                                                                                             | 0/200 [00:00<?, ?it/s, now=500-503][A
                                                                                                                                                                                 [A
location:   0%|                                                                                                                             | 0/200 [00:00<?, ?it/s, now=500-503][A
                                                                                                  

Success! Generating report.




(True, 'Optimization successful.', None)

In [9]:
# SOLVE THE CONSTRAINTS, OPTIMIZE WITH RESPECT TO THE OBJECTIVE

problem.resolve_constraints()
problem.optimize()

# PRINT SUMMARIES TO CHECK THAT CONSTRAINTS PASS

print(problem.constraints_text_summary())
print(problem.objectives_text_summary())

# GET THE FINAL SEQUENCE (AS STRING OR ANNOTATED BIOPYTHON RECORDS)

final_sequence = problem.sequence  # string
final_record = problem.to_record(with_sequence_edits=True)

constraint:   0%|                                                                                                        | 0/2 [00:00<?, ?it/s, now=AvoidPattern[0-10000](pat...]
location:   0%|                                                                                                                                  | 0/7 [00:00<?, ?it/s, now=None][A
location:   0%|                                                                                                                          | 0/7 [00:00<?, ?it/s, now=1114-1120(+)][A
                                                                                                                                                                                 [A
location:   0%|                                                                                                                          | 0/1 [00:00<?, ?it/s, now=1114-1120(+)][A
location:   0%|                                                                                   

===> SUCCESS - all constraints evaluations pass
✔PASS ┍ AvoidPattern[0-10000](pattern:BsaI(GGTCTC))
      │ Passed. Pattern not found !
✔PASS ┍ EnforceGCContent[0-10000](mini:0.30, maxi:0.70, window:50)
      │ Passed !
✔PASS ┍ EnforceTranslation[500-1400(+)]
      │ Enforced by nucleotides restrictions


===> TOTAL OBJECTIVES SCORE:     -4.70
     -4.70 ┍ MaximizeCAI[500-1400](e_coli) 
           │ Codon opt. on window 500-1400 scored -4.70E+00






In [10]:
final_sequence

'GTACGATCCATCGCCCCTGTTAACATTACGGAATTCAACAGGTTCTCCGGGCCGAACAAAAATGCGGAATGAGGAATAGATAGGCTATGCACGGCCTCCTAGAGCTTCTTGGTTCTTCCTCCATCCTACCCTAGCTGTTGCACGTATAGACATCCGCGATAGTCGGCACGAGCTTACCTAGTGATGGACATTCCTCACAGCATACGCTAGAAATCAGACTCATTCATTACGTTCGATCACCTACTCTGATCGAAGTGAATGTGGGAGGAGGTCACGAATGCCCCCAATATCCTACCGCATAGAGATAGGTTGAGATACGAACATACCTGGCGAGTAATGCGATAGTTGAAGGCAGAGAGGACCGCACACCGCCGTCGCGGTGTGGGTGCTGAAACATCCGTCAGGGTGGACCTTCGGAAGAAACTTACGTTCTGCCTGTTGCGTCAGTCGTCGGCAAGTCGTTTAGTACATAGGGACACGGCAAGCCTACGCCTCTCCAGGATTTTGGCATGCATCGCGTGGGCCCGAAACGCATTACCACCGTGCTGTCTCGCAGCGCACCGTGCACTCTGGGCGAAAAACAGGCGCTGCTGACCAAATGGCGCTGCCAGCTGAGCCATGCGCGTCGCCATCCGCGTTTTAGCACCGGCAGCGGCCGTCGCAAATAACTGTATGGCCCGAAAGGCCGCCGCCAGCATAGCCAGGTGGGCACCGATTGCTGCTAAATTTAATTTCCGGTGTTTGCGCTGGATGTGATGGGCCTGCCGGCGTGGGAAAACAACGGCACCAGCAGCGTGTATAGCGTGCGCCTGCATGCGCTGTAACCGAGCCATACCGATAGCTTTGCGCGCCTGCGCATTGAACTGCAGGTGACCGGCGTGACCACCGGCTATGGCCGCTATGCGAGCACCAGCCTGCTGACCCTGACCTATCTGGGCGAAGCGATTGAAGTGGTGCTGCTGATGAAAGAAGGCAAAAGCATTGATGAACATTGCGCGA

In [4]:
! pip install PyGObject

Collecting PyGObject
  Downloading pygobject-3.50.0.tar.gz (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpip subprocess to install build dependencies[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[79 lines of output][0m
  [31m   [0m Collecting meson-python>=0.12.1
  [31m   [0m   Downloading meson_python-0.17.1-py3-none-any.whl.metadata (4.1 kB)
  [31m   [0m Collecting pycairo>=1.16
  [31m   [0m   Downloading pycairo-1.27.0.tar.gz (661 kB)
  [31m   [0m [?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/661.5 kB[0m [31m?[0m eta [36m-:--:--[0m
  [31m   [0m [2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m661.5/661.5 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
  [31m   [0m [?25h  Inst

In [112]:
import dnaweaver as dw
import time
import dnachisel as dnachisel 
from sciutil import SciUtil
from Bio.Seq import Seq
from difflib import SequenceMatcher
import primer3

u = SciUtil()

from primer3 import calcHairpin, calcHomodimer

def check_secondary_structure(sequence):
    """
    Check secondary structures like hairpins and homodimers in a given primer sequence.
    
    Args:
        sequence (str): The DNA sequence of the primer to analyze.
        
    Returns:
        dict: Results for hairpin and homodimer properties.
    """
    # Check for hairpin structure
    hairpin_result = calcHairpin(sequence)
    hairpin_info = {
        "hairpin_found": hairpin_result.structure_found,
        "hairpin_tm": hairpin_result.tm,
        "hairpin_dg": hairpin_result.dg,
        "hairpin_dh": hairpin_result.dh,
        "hairpin_ds": hairpin_result.ds,
    }

    # Check for homodimer structure
    homodimer_result = calcHomodimer(sequence)
    homodimer_info = {
        "homodimer_found": homodimer_result.structure_found,
        "homodimer_tm": homodimer_result.tm,
        "homodimer_dg": homodimer_result.dg,
        "homodimer_dh": homodimer_result.dh,
        "homodimer_ds": homodimer_result.ds,
    }

    # Combine results
    return {"hairpin": hairpin_info, "homodimer": homodimer_info}

def build_oligos(seq_id: str, sequence: str, output_directory: str, min_gc=0.3, max_gc=0.7, min_tm=55, max_tm=70, min_segment_length=40, max_segment_length=100, max_length=1500):
    """ Use DNAweaver to build oligos """
    # Here we use a comercial supplier but don't actually care. 
    cheap_dna_offer = dw.CommercialDnaOffer(
        name="CheapDNA.",
        sequence_constraints=[
            dw.NoPatternConstraint(enzyme="BsaI"),
            dw.SequenceLengthConstraint(max_length=4000),
            dw.GcContentConstraint(min_gc=min_gc, max_gc=max_gc)
        ],
        pricing=dw.PerBasepairPricing(0.10),
    )

    oligo_dna_offer = dw.CommercialDnaOffer(
        name="OliGoo",
        sequence_constraints=[
            dw.GcContentConstraint(min_gc=min_gc, max_gc=max_gc),
            dw.SequenceLengthConstraint(max_length=4000),
        ],
        pricing=dw.PerBasepairPricing(0.07),
        memoize=True
    )

    oligo_assembly_station = dw.DnaAssemblyStation(
        name="Oligo Assembly Station",
        assembly_method=dw.OligoAssemblyMethod(
            overhang_selector=dw.TmSegmentSelector(
                min_size=15, max_size=25, min_tm=min_tm, max_tm=max_tm
            ),
            min_segment_length=min_segment_length,
            max_segment_length=max_segment_length,
            sequence_constraints=[dw.SequenceLengthConstraint(max_length=4000)],
            duration=8,
            cost=30,
        ),
        supplier=oligo_dna_offer,
        coarse_grain=20,
        a_star_factor="auto",
        memoize=True,
    )

    assembly_station = dw.DnaAssemblyStation(
        name="Gibson Assembly Station",
        assembly_method=dw.GibsonAssemblyMethod(
            overhang_selector=dw.TmSegmentSelector(min_tm=min_tm, max_tm=max_tm),
            min_segment_length=min_segment_length,
            max_segment_length=max_segment_length + 20, # add a bit of a buffer
        ),
        supplier=[cheap_dna_offer, oligo_assembly_station],
        logger="bar",
        coarse_grain=100,
        fine_grain=10,
        a_star_factor="auto",
    )
    
    print("Looking for the best assembly plan...")
    t0 = time.time()
    quote = assembly_station.get_quote(sequence, with_assembly_plan=True)
    assembly_plan_report = quote.to_assembly_plan_report()
    #assembly_plan_report.write_full_report(f"{output_directory}/oligo_assembly_plan_{seq_id}.zip")
    original_sequence = assembly_plan_report.plan.sequence
    # Then get the sequence 
    rows = []
    for oligo in assembly_plan_report.plan.assembly_plan:
        # If this was chosen then choose it
        if oligo.accepted:
            rows.append([oligo.id, oligo.sequence, original_sequence])
    return rows

def get_oligos(df, protein_column, id_column, output_directory, forward_primer: str, reverse_primer: str, min_overlap=10, min_gc=0.3, 
               max_gc=0.7, min_tm=55, max_tm=70, min_segment_length=40, max_segment_length=100, max_length=1500):
    """ Get the oligos for a dataframe """
    rows = []   
    for seq_id, protein_sequence in df[[id_column, protein_column]].values:
        # Add on the primers that the user has provided
        optimzed_sequence = codon_optimize(protein_sequence, min_gc, max_gc)
        if optimzed_sequence[:3] != "ATG":
            u.dp([f"Warning: {seq_id} does not start with a methionine. ", optimzed_sequence[:3]])
            if 'ATG' not in forward_primer:
                u.warn_p([f"Warning: {seq_id} does not start with a methionine. AND you don't have a methonine in your primer!!", forward_primer])
                print("We expect the primer to be in 5 to 3 prime direction.")
        # ALso check the end and or the reverse primer check for the three ones
        if optimzed_sequence[-3:] != "TAA" and optimzed_sequence[-3:] != "TGA" and optimzed_sequence[-3:] != "TAG":
            u.dp([f"Warning: {seq_id} does not end with a stop codon. ", optimzed_sequence[-3:]])
            if 'TAA' not in reverse_primer and "TGA" not in reverse_primer and "TAG" not in reverse_primer:
                u.warn_p([f"Warning: {seq_id} does not end with a stop codon. AND you don't have a stop codon in your primer!!", reverse_primer])
                print("We expect the primer to be in 5 to 3 prime direction.")
        print(optimzed_sequence)
        codon_optimized_sequence = forward_primer + optimzed_sequence + reverse_primer
        # Check now some simple things like that there is 
        oligos = build_oligos(seq_id, codon_optimized_sequence, output_directory, min_gc, max_gc, min_tm, max_tm, min_segment_length, max_segment_length, max_length)
        prev_oligo = None
        for i, oligo in enumerate(oligos):
            seq = oligo[1]
            # CHeck that there is an overlap with the previous sequence and that it is not too short
            # Also make sure we swap the directions of the oligos so they automatically anneal
            # Also assert that the start is a methionine (and if not warn it... )
            primer_overlap = None
            primer_tm = None
            primer_len = None
            homodimer_tm = None
            hairpin_tm = None
            if prev_oligo:
                # Get the overlap with the previous sequence
                match = SequenceMatcher(None, prev_oligo, seq).find_longest_match()
                primer_overlap = prev_oligo[match.a:match.a + match.size]
                # Analyze the primer sequence
                results = check_secondary_structure(primer_overlap)
                homodimer_tm = results['homodimer']['homodimer_tm']
                hairpin_tm = results['hairpin']['hairpin_tm']
                primer_tm = primer3.bindings.calcTm(primer_overlap)
                primer_len = len(primer_overlap)
            prev_oligo = seq
            if i % 2 == 0:
                seq = str(Seq(seq).reverse_complement())
            oligo_tm = primer3.bindings.calcTm(seq)
            rows.append([seq_id, oligo[0], seq, len(seq), oligo_tm, primer_overlap, primer_tm, primer_len, homodimer_tm, hairpin_tm, oligo[2]])
    oligo_df = pd.DataFrame(rows, columns=["id", "oligo_id", "oligo_sequence", "oligo_length", "oligo_tm", "primer_overlap_with_previous", "overlap_tm_5prime", "overlap_length", 
                                            "overlap_homodimer_tm", "overlap_hairpin_tm", "original_sequence"])
    return oligo_df

def codon_optimize(protein_sequence: str, min_gc=0.3, max_gc=0.7):
    """ Codon optimize the protein sequence using DNA chisel: https://github.com/Edinburgh-Genome-Foundry/DnaChisel"""
    seq = dnachisel.reverse_translate(protein_sequence)
    problem = dnachisel.DnaOptimizationProblem(
        sequence=seq,
        constraints=[
            AvoidPattern("BsaI_site"),
            EnforceGCContent(mini=min_gc, maxi=max_gc, window=50),
        ],
        objectives=[CodonOptimize(species='e_coli', location=(0, len(seq)))]
    )
    # SOLVE THE CONSTRAINTS, OPTIMIZE WITH RESPECT TO THE OBJECTIVE
    problem.resolve_constraints()
    problem.optimize()

    # PRINT SUMMARIES TO CHECK THAT CONSTRAINTS PASS
    print(problem.constraints_text_summary())
    print(problem.objectives_text_summary())

    # GET THE FINAL SEQUENCE (AS STRING OR ANNOTATED BIOPYTHON RECORDS)
    final_sequence = problem.sequence  # string
    final_record = problem.to_record(with_sequence_edits=True)
    return final_sequence



In [115]:
import pandas as pd

df = pd.DataFrame()
df['id'] = ['Seq1', 'Seq2']
min_gc = 0.25
max_gc = 0.65
min_tm = 10
max_tm = 100
min_segment_length = 40
max_segment_length = 100
max_length = 500

df['seqs'] = ['MRTSLLVAALGLALAAALPGGAPLAQPDPEATMDRSLLQRQDLPYRFSAVDLDSVDGQRHYRLWLGRPLQAPPAAGYPVVWMLDGNAAVGALDESTLRRLADGDAPLLVAIGYRTPLRIDRAGRTFDYTPASPGQADQRDPLNGLPSGGADAFLDLLRDGMRPAVAAQAPLDTARQTLWGHSYGGLLVLHALFTRPGEFARYAAASPSLWWRDGAILGERAGLEQRLRGKRAELLLWRGSAEPASPRGSLKAEPGQAMARLVDDLRRVAGLTLDFQPLDGLGHGETLGASLRLLLARPAVERQR', 
             'MQLFDLPLDQLQTYKPEKTAPKDFSEFWKLSLEELAKVQAEPDLQPVDYPADGVKVYRLTYKSFGNARITGWYAVPDKEGPHPAIVKYHGYNASYDGEIHEMVNWALHGYATFGMLVRGQQSSEDTSISPHGHALGWMTKGILDKDTYYYRGVYLDAVRALEVISSFDEVDETRIGVTGGSQGGGLTIAAAALSDIPKAAVADYPYLSNFERAIDVALEQPYLEINSFFRRNGSPETEVQAMKTLSYFDIMNLADRVKVPVLMSIGLIDKVTPPSTVFAAYNHLETKKELKVYRYFGHEYIPAFQTEKLAFFKQHLKG']
oligo_df = get_oligos(df, 'seqs', 'id', '.', 'cccctctagaaataattttgtttaactttaagaaggagatatacat', 'CTCGAGCACCACCACCACCACCACTGAgatccggctgctaacaaag', 
                     min_gc=min_gc, max_gc=max_gc, min_tm=min_tm, max_tm=max_tm, min_segment_length=min_segment_length, max_segment_length=max_segment_length)
oligo_df

objective:   0%|                                                  | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[0-912](e_coli...]
location:   0%|                                                                         | 0/238 [00:00<?, ?it/s, now=None][A
location:   0%|                                                                          | 0/238 [00:00<?, ?it/s, now=3-6][A
                                                                                                                          [A

===> SUCCESS - all constraints evaluations pass
✔PASS ┍ AvoidPattern[0-912](pattern:BsaI(GGTCTC))
      │ Passed. Pattern not found !
✔PASS ┍ EnforceGCContent[0-912](mini:0.25, maxi:0.65, window:50)
      │ Passed !


===> TOTAL OBJECTIVES SCORE:         0
✔        0 ┍ MaximizeCAI[0-912](e_coli) 
           │ Codon opt. on window 0-912 scored -0.00E+00


[94m--------------------------------------------------------------------------------[0m
[94m--------------------------------------------------------------------------------[0m
ATGCGCACCTATTTTTTTGTGGCGGCGTTTGGCTTTGCGTTTGCGGCGGATTTTCCGGTGGTGGATCCGTTTGCGCAGCCGGATCATGAAGCGACCATGGATCGCTATTTTTTTCAGCGCCAGGATTTTCCGTATCGCTTTTATGCGGTGGATTTTGATTATGTGGATGGCCAGCGCCATTATCGCTTTTGGTTTGGCCGCCCGTTTCAGGCGCATCCGGCGGATGGCTATCCGGATGATTGGATGTTTGATGGCAACGCGGCGGTGGGCGCGTTTGATGAATATACCTTTCGCCGCTTTGCGGATGGCGATGCGCCGTTTTTTGTGGCGATTGGCTATCGCACCCCGTTTCGCATTGATCGCGCGGGCCTGACCTTTGATTATACCCCGGCGTATCCGGGCCATGATGATCAGCGCGATCCGTTTAACGGCTTTCCGTATGGCGGCGCGGATGCGTTTTTTG

  return THERMO_ANALYSIS.calcTm(seq)
  return THERMO_ANALYSIS.calcHairpin(seq, output_structure).check_exc()
  return THERMO_ANALYSIS.calcHomodimer(seq, output_structure).check_exc()
constraint:   0%|                                                 | 0/2 [00:00<?, ?it/s, now=AvoidPattern[0-954](patte...]
location:   0%|                                                                           | 0/8 [00:00<?, ?it/s, now=None][A
location:   0%|                                                                           | 0/8 [00:00<?, ?it/s, now=0-99][A
objective:   0%|                                                  | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[0-954](e_coli...][A
location:   0%|                                                                         | 0/186 [00:00<?, ?it/s, now=0-99][A
location:   0%|                                                                          | 0/186 [00:00<?, ?it/s, now=3-6][A
                                                                

===> SUCCESS - all constraints evaluations pass
✔PASS ┍ AvoidPattern[0-954](pattern:BsaI(GGTCTC))
      │ Passed. Pattern not found !
✔PASS ┍ EnforceGCContent[0-954](mini:0.25, maxi:0.65, window:50)
      │ Passed !


===> TOTAL OBJECTIVES SCORE:         0
✔        0 ┍ MaximizeCAI[0-954](e_coli) 
           │ Codon opt. on window 0-954 scored -0.00E+00


[94m--------------------------------------------------------------------------------[0m
[94m--------------------------------------------------------------------------------[0m
ATGCTGCTGTTTGATTTTCCGTTTGATCAGTTTCAGACCTATAAACCGGAAAAAGCGGCGCCGAAAGATTTTTGGGAATTTTGGAAATGCTATTTTGAAGAATTTGCGAAAGTGCAGTATGAACCGGATTTTCAGCCGGTGGATTATCCGGCGGATGGCGTGAAAGTGTATCGCTTTACCTATAAATATTTTGGCAACGCGCGCATTGCGGGCTGGTATGCGGTGCCGGATAAAGAAGTGCCGCATCCGGCGATTGTGAAATATCATGGCTATAACGCGTATTATGATGGCGAAATTCATGAAATGGTGAACTGGGCGTTTCATGGCTATGCGACCTTTGGCATGTTTGTGCGCGGCCAGCAGTATTATGAAGATACCTATATTTATCCGCATGGCCATGCGTTTGGCTGGATGACCAAAGGCATTTTTGATAAAGATACCGATCATTTTCGCGGCGTGTATT

  return THERMO_ANALYSIS.calcTm(seq)
  return THERMO_ANALYSIS.calcHairpin(seq, output_structure).check_exc()
  return THERMO_ANALYSIS.calcHomodimer(seq, output_structure).check_exc()


Unnamed: 0,id,oligo_id,oligo_sequence,oligo_length,oligo_tm,primer_overlap_with_previous,overlap_tm_5prime,overlap_length,overlap_homodimer_tm,overlap_hairpin_tm,original_sequence
0,Seq1,S_00000,CAAACGGATCCACCACCGGAAAATCCGCCGCAAACGCAAAGCCAAA...,119,74.669187,,,,,,cccctctagaaataattttgtttaactttaagaaggagatatacat...
1,Seq1,S_00001,CGGTGGTGGATCCGTTTGCGCAGCCGGATCATGAAGCGACCATGGA...,128,82.149159,CGGTGGTGGATCCGTTTG,56.343909,18.0,7.388261,41.578087,cccctctagaaataattttgtttaactttaagaaggagatatacat...
2,Seq1,S_00002,GCGAAAGGTATATTCATCAAACGCGCCCACCGCCGCGTTGCCATCA...,129,85.231564,GATGGCCAGCGCCATTAT,55.811457,18.0,17.815718,68.998303,cccctctagaaataattttgtttaactttaagaaggagatatacat...
3,Seq1,S_00003,TGATGAATATACCTTTCGCCGCTTTGCGGATGGCGATGCGCCGTTT...,128,85.057453,TGATGAATATACCTTTCGC,47.775584,19.0,-64.31656,0.0,cccctctagaaataattttgtttaactttaagaaggagatatacat...
4,Seq1,S_00004,AAACGGATCATGCGCATCCACCGCCGGGCGCATGCCATCGCGAAAA...,128,85.70374,CCCCGGCGTATCCGGGCC,65.660649,18.0,30.462576,75.699579,cccctctagaaataattttgtttaactttaagaaggagatatacat...
5,Seq1,S_00005,GATGCGCATGATCCGTTTGATAACGCGCTGCAGACCTTTTGGGGCC...,128,82.795446,GATGCGCATGATCCGTTT,54.144649,18.0,30.763247,0.0,cccctctagaaataattttgtttaactttaagaaggagatatacat...
6,Seq1,S_00006,ACGCCGGTTCCGCATAGCCGCGCCAAAAAAAAAATTCCGCGCGTTT...,128,85.057453,TGCGGCGGATTATCCGTA,56.451446,18.0,30.590793,59.177368,cccctctagaaataattttgtttaactttaagaaggagatatacat...
7,Seq1,S_00007,GCTATGCGGAACCGGCGTATCCGCGCGGCTATTTTAAAGCGGAACC...,128,83.764878,GCTATGCGGAACCGGCGT,61.126936,18.0,15.010537,40.18831,cccctctagaaataattttgtttaactttaagaaggagatatacat...
8,Seq1,S_00008,TTCCACCGCCGGGCGCGCAAAAAAAAAGCGAAAATACGCGCCAAAG...,78,81.2655,CAGCCGTTTGATGGCTTT,54.16683,18.0,10.864156,58.661393,cccctctagaaataattttgtttaactttaagaaggagatatacat...
9,Seq1,S_00009,GCGCGCCCGGCGGTGGAACGCCAGCGCCTCGAGCACCACCACCACC...,73,80.600609,GCGCGCCCGGCGGTGGAA,69.523553,18.0,31.975298,63.355769,cccctctagaaataattttgtttaactttaagaaggagatatacat...


In [111]:
import pandas as pd
import random
import numpy 
SEED = 128
random.seed(SEED)
numpy.random.seed(SEED)  

df = pd.DataFrame()
df['id'] = ['Seq1', 'Seq2']
min_gc = 0.25
max_gc = 0.75
min_tm = 10
max_tm = 100
min_segment_length = 40
max_segment_length = 100
max_length = 500

df['seqs'] = ['MRTSLLVAALGLALAAALPGGAPLAQPDPEATMDRSLLQRQDLPYRFSAVDLDSVDGQRHYRLWLGRPLQAPPAAGYPVVWMLDGNAAVGALDESTLRRLADGDAPLLVAIGYRTPLRIDRAGRTFDYTPASPGQADQRDPLNGLPSGGADAFLDLLRDGMRPAVAAQAPLDTARQTLWGHSYGGLLVLHALFTRPGEFARYAAASPSLWWRDGAILGERAGLEQRLRGKRAELLLWRGSAEPASPRGSLKAEPGQAMARLVDDLRRVAGLTLDFQPLDGLGHGETLGASLRLLLARPAVERQR', 
             'MQLFDLPLDQLQTYKPEKTAPKDFSEFWKLSLEELAKVQAEPDLQPVDYPADGVKVYRLTYKSFGNARITGWYAVPDKEGPHPAIVKYHGYNASYDGEIHEMVNWALHGYATFGMLVRGQQSSEDTSISPHGHALGWMTKGILDKDTYYYRGVYLDAVRALEVISSFDEVDETRIGVTGGSQGGGLTIAAAALSDIPKAAVADYPYLSNFERAIDVALEQPYLEINSFFRRNGSPETEVQAMKTLSYFDIMNLADRVKVPVLMSIGLIDKVTPPSTVFAAYNHLETKKELKVYRYFGHEYIPAFQTEKLAFFKQHLKG']
oligo_df = get_oligos(df, 'seqs', 'id', '.', 'cccctctagaaataattttgtttaactttaagaaggagatatacat', 'CTCGAGCACCACCACCACCACCACTGAgatccggctgctaacaaag', 
                     min_gc=min_gc, max_gc=max_gc, min_tm=min_tm, max_tm=max_tm, min_segment_length=min_segment_length, max_segment_length=max_segment_length)
oligo_df

objective:   0%|                                                  | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[0-912](e_coli...]
location:   0%|                                                                         | 0/238 [00:00<?, ?it/s, now=None][A
location:   0%|                                                                          | 0/238 [00:00<?, ?it/s, now=3-6][A
                                                                                                                          [A

===> SUCCESS - all constraints evaluations pass
✔PASS ┍ AvoidPattern[0-912](pattern:BsaI(GGTCTC))
      │ Passed. Pattern not found !
✔PASS ┍ EnforceGCContent[0-912](mini:0.25, maxi:0.75, window:50)
      │ Passed !


===> TOTAL OBJECTIVES SCORE:         0
✔        0 ┍ MaximizeCAI[0-912](e_coli) 
           │ Codon opt. on window 0-912 scored -0.00E+00


[94m--------------------------------------------------------------------------------[0m
[94m--------------------------------------------------------------------------------[0m
[93m--------------------------------------------------------------------------------[0m
[93m--------------------------------------------------------------------------------[0m
We expect the primer to be in 5 to 3 prime direction.
ATGCGCACCTATTTTTTTGTGGCGGCGTTTGGCTTTGCGTTTGCGGCGGCGTTTCCGGGCGGCGCGCCGTTTGCGCAGCATGATCCGGAAGCGACCATGGATCGCTATTTTTTTCAGCGCCAGGATTTTCCGTATCGCTTTTATGCGGTGGATTTTGATTATGTGGATGGCCAGCGCCATTATCGCTTTTGGTTTGGCCGCCCGTTTCAGGCGCCGCCGGCGGCGGGCT

  return THERMO_ANALYSIS.calcTm(seq)
  return THERMO_ANALYSIS.calcHairpin(seq, output_structure).check_exc()
  return THERMO_ANALYSIS.calcHomodimer(seq, output_structure).check_exc()
constraint:   0%|                                                 | 0/2 [00:00<?, ?it/s, now=AvoidPattern[0-954](patte...]
location:   0%|                                                                           | 0/8 [00:00<?, ?it/s, now=None][A
location:   0%|                                                                           | 0/8 [00:00<?, ?it/s, now=0-99][A
objective:   0%|                                                  | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[0-954](e_coli...][A
location:   0%|                                                                         | 0/194 [00:00<?, ?it/s, now=0-99][A
location:   0%|                                                                          | 0/194 [00:00<?, ?it/s, now=3-6][A
                                                                

===> SUCCESS - all constraints evaluations pass
✔PASS ┍ AvoidPattern[0-954](pattern:BsaI(GGTCTC))
      │ Passed. Pattern not found !
✔PASS ┍ EnforceGCContent[0-954](mini:0.25, maxi:0.75, window:50)
      │ Passed !


===> TOTAL OBJECTIVES SCORE:         0
✔        0 ┍ MaximizeCAI[0-954](e_coli) 
           │ Codon opt. on window 0-954 scored -0.00E+00


[94m--------------------------------------------------------------------------------[0m
[94m--------------------------------------------------------------------------------[0m
[93m--------------------------------------------------------------------------------[0m
[93m--------------------------------------------------------------------------------[0m
We expect the primer to be in 5 to 3 prime direction.
ATGCAGTTTTTTGATTTTCCGTTTGATCCGCTGCAGACCTATAAACCGGAAGAAACCGCGCCGAAAGAATTTTATGAATTTGGCAAATGGTATTGGGAAGAATTTGCGAAAGTGCAGGCGGAACCGGATTTTCAGCCGGTGGATTATCCGGCGGATGGCGTGAAAGTGTATCGCGTGACCTATAAATATTTTGGCAACGCGCGCACCACCGGCTGGTATGCGGTGCCGG

  return THERMO_ANALYSIS.calcTm(seq)
  return THERMO_ANALYSIS.calcHairpin(seq, output_structure).check_exc()
  return THERMO_ANALYSIS.calcHomodimer(seq, output_structure).check_exc()


Unnamed: 0,id,oligo_id,oligo_sequence,oligo_length,oligo_tm,primer_overlap_with_previous,overlap_tm_5prime,overlap_length,overlap_homodimer_tm,overlap_hairpin_tm,original_sequence
0,Seq1,S_00000,CAAACGGCGCGCCGCCCGGAAACGCCGCCGCAAACGCAAAGCCAAA...,119,76.754686,,,,,,cccctctagaaataattttgtttaactttaagaaggagatatacat...
1,Seq1,S_00001,CGGGCGGCGCGCCGTTTGCGCAGCATGATCCGGAAGCGACCATGGA...,128,83.441734,CGGGCGGCGCGCCGTTTG,68.658228,18.0,56.01162,68.396815,cccctctagaaataattttgtttaactttaagaaggagatatacat...
2,Seq1,S_00002,GCGAAAGGTATATTCATCAAACGCGCCCACCGCCGCGTTGCCATCA...,129,86.834758,GATGGCCAGCGCCATTAT,55.811457,18.0,17.815718,68.998303,cccctctagaaataattttgtttaactttaagaaggagatatacat...
3,Seq1,S_00003,TGATGAATATACCTTTCGCCGCTTTGCGGATGGCGATGCGCCGTTT...,128,85.380596,TGATGAATATACCTTTCGC,47.775584,19.0,-64.31656,0.0,cccctctagaaataattttgtttaactttaagaaggagatatacat...
4,Seq1,S_00004,AAACGGCGCCTGCGCCGCCACCGCCGGGCGCATGCCATCGCGAAAA...,128,88.28889,CCCCGGCGTATCCGGGCC,65.660649,18.0,30.462576,75.699579,cccctctagaaataattttgtttaactttaagaaggagatatacat...
5,Seq1,S_00005,GCGGCGCAGGCGCCGTTTGATAACGCGCTGCAGACCTTTTGGGGCC...,128,85.057453,GCGGCGCAGGCGCCGTTT,68.094164,18.0,53.361828,83.411766,cccctctagaaataattttgtttaactttaagaaggagatatacat...
6,Seq1,S_00006,ACGCCGGTTCCGCATAGCCGCGCCAAAAAAAAAATTCCGCGCGTTT...,128,86.026884,TGCGGCGGCGTATCCGTA,62.137239,18.0,32.349272,58.411563,cccctctagaaataattttgtttaactttaagaaggagatatacat...
7,Seq1,S_00007,GCTATGCGGAACCGGCGTATCCGCGCGGCTATTTTAAAGCGGAACC...,128,85.057453,GCTATGCGGAACCGGCGT,61.126936,18.0,15.010537,40.18831,cccctctagaaataattttgtttaactttaagaaggagatatacat...
8,Seq1,S_00008,TTCCACCGCCGGGCGCGCAAAAAAAAAGCGAAAATACGCGCCAAAG...,78,81.2655,CAGCCGTTTGATGGCTTT,54.16683,18.0,10.864156,58.661393,cccctctagaaataattttgtttaactttaagaaggagatatacat...
9,Seq1,S_00009,GCGCGCCCGGCGGTGGAACGCCAGCGCCTCGAGCACCACCACCACC...,73,80.600609,GCGCGCCCGGCGGTGGAA,69.523553,18.0,31.975298,63.355769,cccctctagaaataattttgtttaactttaagaaggagatatacat...
