# Integration of basicsynbio and DNA chisel

This notebook explores the integration of DNA chisel into basicsynbio for linker design purposes

## Aims and objectives for cell/s below

- [x] Try out DNA Chisel with easy to implement constraints.
- [x] Make a Bowtie 2 file for sequences that will be present in the basicsynbio PartLinkerCollections.
- [x] Run DNA Chisel to generate the backbone linker for the addgene collection.

In [8]:
import basicsynbio as bsb
from basicsynbio.main import DEFAULT_ANNOTATIONS
from basicsynbio.cam.main import seqrecord_hexdigest
from Bio import (
    Entrez,
    SeqIO
)
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqUtils import MeltingTemp as mt
from dataclasses import dataclass
from dnachisel import (
    AvoidChanges,
    AvoidMatches,
    AvoidPattern,
    DnaOptimizationProblem,
    EnforceGCContent,
    EnforceMeltingTemperature,
    EnzymeSitePattern,
    random_dna_sequence,
)
from pathlib import Path
import pickle


In [3]:
UPSTREAM_SCAR = "GGCTCG"
DOWNSTREAM_SCAR = "GTCC"
overhang_indicies = (
    len(UPSTREAM_SCAR) + 12,
    len(UPSTREAM_SCAR) + 12 + 21
)
linker_base_sequence = UPSTREAM_SCAR + random_dna_sequence(45, seed=123) + DOWNSTREAM_SCAR
assert len(linker_base_sequence[overhang_indicies[0]:overhang_indicies[1]]) == 21
constraints = [
    EnforceMeltingTemperature(mini=50, maxi=65, location=(overhang_indicies[0], overhang_indicies[1])),
    AvoidPattern(EnzymeSitePattern("EcoRI")),
    AvoidPattern(EnzymeSitePattern("SpeI")),
    AvoidPattern(EnzymeSitePattern("XbaI")),
    AvoidPattern(EnzymeSitePattern("PstI")),
    AvoidPattern(EnzymeSitePattern("BsaI")),
    AvoidPattern(EnzymeSitePattern("BsmBI")),
    AvoidPattern("TTGACA"), # E.coli sig70 -35 site
    AvoidPattern("TATAAT"), # E.coli sig70 -10 site
    AvoidPattern("TTGNNNNNNNNNNNNNNNNNNNNTATNNT"), # E.coli sig70 promoter weak consensus,
    AvoidPattern("TGGCACGNNNNTTGC"), # E.coli sig54 promoter consensus
    AvoidPattern("GAACTNNNNNNNNNNNNNNNNGTCNNA"), # E.coli sig24 promoter consensus
    AvoidPattern("AAAGA"), # RBS
    AvoidPattern("AGGAGG"), # Shine-Dalgarno sequence or 2xArg bad codon
    AvoidPattern("ATG"), # Start codon
    AvoidPattern("TTATNCACA"), # DnaA binding sites
    AvoidPattern("TGTGANNNNNNTCACANT"), # CAP binding sites
    AvoidPattern("NGCTNAGCN"), # IS10 insertion site
    AvoidPattern("GGGNNNNNCCC"), # IS231 insertion site
    AvoidPattern("(G{3,}[ATGC]{1,7}){3,}G{3,}"), # G-quadruplex
    AvoidPattern("GGGG"), # G-quadruplex
    AvoidChanges(location=(0, 6, 1)),
    AvoidChanges(location=(len(linker_base_sequence) - 4, len(linker_base_sequence), 1))
]
problem = DnaOptimizationProblem(
    sequence=linker_base_sequence,
    constraints=constraints
)
problem.resolve_constraints()
print(problem.constraints_text_summary())
print(linker_base_sequence)
print(problem.sequence)

                                                                                    

===> SUCCESS - all constraints evaluations pass
✔PASS ┍ EnforceMeltingTemperature[18-39]
      │ Tm = 51.9
✔PASS ┍ AvoidPattern[0-55](pattern:EcoRI(GAATTC))
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:SpeI(ACTAGT))
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:XbaI(TCTAGA))
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:PstI(CTGCAG))
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:BsaI(GGTCTC))
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:BsmBI(CGTCTC))
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:TTGACA)
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:TATAAT)
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:TTGNNNNNNNNNNNNNNNNNNNNTATNNT)
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:TGGCACGNNNNTTGC)
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](patter



In [4]:
# Group together parts and linkers from collections
parts_linkers_collections = (
    bsb.BASIC_BIOLEGIO_LINKERS["v0.1"].values(),
    bsb.BASIC_PROMOTER_PARTS["v0.2"].values(),
    bsb.BASIC_SEVA_PARTS["v0.1"].values()
)
core_parts_linkers = []
for part_linker_collection in parts_linkers_collections:
    core_parts_linkers += list(part_linker_collection)
# Add E.coli MG1655 genome sequence
Entrez.email = "hainesm6@gmail.com"
with Entrez.efetch(db="Nucleotide", id="NZ_LR881938.1", rettype="fasta", retmode="text") as handle:
    mg1655 = SeqIO.read(handle, "fasta")
    seqs = core_parts_linkers + [mg1655]
path_to_seqs = Path.cwd().parents[0] / "sequences"
bsb.export_sequences_to_file(
    seqs,
    Path.cwd().parents[0] / "sequences" / "alternative_formats" / "fasta" / "basic_homology_sequences.fa",
    "fasta"
)


In [5]:
constraints += [AvoidMatches(15, bowtie_index=path_to_seqs / "alternative_formats" / "bowtie_indexes" / "2021-07-22_basic_homology" / "basic_homology", mismatches=1)]
problem = DnaOptimizationProblem(
    sequence=linker_base_sequence,
    constraints=constraints
)
problem.resolve_constraints()
assert problem.sequence[:len(UPSTREAM_SCAR)] == UPSTREAM_SCAR
assert problem.sequence[-1*len(DOWNSTREAM_SCAR):] == DOWNSTREAM_SCAR
print(problem.constraints_text_summary())
print(f"{'Initial sequence': <20} {linker_base_sequence}")
print(f"{'Optimised sequence': <20} {problem.sequence}")
print(f"{'Overhang sequence': <20} {problem.sequence[overhang_indicies[0]:overhang_indicies[1]]}")

                                                                                             

===> SUCCESS - all constraints evaluations pass
✔PASS ┍ EnforceMeltingTemperature[18-39]
      │ Tm = 50.2
✔PASS ┍ AvoidPattern[0-55](pattern:EcoRI(GAATTC))
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:SpeI(ACTAGT))
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:XbaI(TCTAGA))
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:PstI(CTGCAG))
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:BsaI(GGTCTC))
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:BsmBI(CGTCTC))
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:TTGACA)
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:TATAAT)
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:TTGNNNNNNNNNNNNNNNNNNNNTATNNT)
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](pattern:TGGCACGNNNNTTGC)
      │ Passed. Pattern not found !
✔PASS ┍ AvoidPattern[0-55](patter



## Aims and objectives for the cell/s below

- Evaluate the generated linker sequence:
  - [x] What is the melting temperature of the overlap region?
- [x] Generate and export long and adapter linker oligonucleotides for both prefix and suffix linkers?
- [ ] Pickle SEVA-BB1 BasicLinker enabling access by other modules.


In [6]:
print(f"Estimated melting temperature of the overlap region is {mt.Tm_NN(problem.sequence[overhang_indicies[0]:overhang_indicies[1]], saltcorr=1, Na=0, K=50, Mg=10, Tris=20)}")


Estimated melting temperature of the overlap region is 58.96818949127026


In [9]:
bb_linker = bsb.BasicLinker(
    seq=Seq(problem.sequence),
    id="foobar",
    name="SEVA-BB1",
    description="BASIC backbone linker for assembling SEVA AbR casettes and SEVA Oris.",
    overhang_indicies = (
        len(bsb.BasicLinker.UPSTREAM_SCAR) + 12,
        len(bsb.BasicLinker.UPSTREAM_SCAR) + 12 + 21
    )
)
bb_linker.id = seqrecord_hexdigest(bb_linker)
LINEAR_ANNOTATIONS = DEFAULT_ANNOTATIONS.copy()
LINEAR_ANNOTATIONS["topology"] = "linear"
SeqIO.write(
    bb_linker.linker_oligos.all_oligo_seqrecs(),
    path_to_seqs / "alternative_formats" / "tsv" / "SEVA-BB1-LINKER.tsv",
    "tab"
)
with open(path_to_seqs / "alternative_formats" / "pickles" / "SEVA-BB1", 'wb') as file:
    pickle.dump(bb_linker, file)