# BASIC promoter and CDS genbank files

## Aims and objectives

for relevant promoters and CDSs:
- [x] Generate FASTA sequences.
- [x] Upload to Benchling.
- [x] Make benchling feature libraries and auto-annotate.
- [x] Export annotated genbank files.
- [x] Assemble each into BASIC_SEVA_18, remove color annotations and add meta-data.
- [x] Consolodate parts into a single genbank file in parts_linkers sub-package.
- [x] Generate dict for users.
- [x] Generate v0.2 BASIC_PROMOTER_PARTS gb file containing only the original 60 promoters.
- [ ] Generate v0.3 BASIC_PROMOTER_PARTS and v0.2 BASIC_CDS_PARTS which are not in a storage vector.


In [8]:
from Bio import SeqIO
from pathlib import Path

DIR_MISC_BASIC = Path.cwd().parent / "sequences" / "genbank_files" / "misc_BASIC"
promoters = SeqIO.parse(DIR_MISC_BASIC /  "initial_BASIC_promoters.gb", "genbank")
SeqIO.write(promoters, DIR_MISC_BASIC / "initial_BASIC_promoters.fasta", "fasta")

63

## Objectives for cell below

Complete promoters according to the overal aims/objectives.

In [11]:
import basicsynbio as bsb
from basicsynbio.utils import (
    only_label_feature,
    all_feature_values
)
import re


def filter_list_w_re(regular_expression, mylist: list):
    "Filters the list using the given regular expression."
    r = re.compile(regular_expression)
    return filter(r.match, mylist)


def assign_promoter_elements(promoters):
    """Assign promoter elements as attributes."""
    for promoter in promoters:
        promoter = only_label_feature(promoter)
        promoter.all_feature_values = all_feature_values(promoter)
        promoter.terminator = next(filter_list_w_re("Terminator.*", promoter.all_feature_values))
        try:
            promoter.promoter = next(filter_list_w_re("J23.*", promoter.all_feature_values))
        except StopIteration:
            for promoter_id in ["Phlf", "CymR", "TetR", "VanR", "LuxR", "CinR", "LacI", "AraC", "BetI", "Ttg", "SaITTC", "T7_100", "T7_52", "T7_50", "T7_25"]:
                if promoter_id in promoter.all_feature_values:
                    promoter.promoter = promoter_id
        promoter.riboj = next(filter_list_w_re("Ribo.*", promoter.all_feature_values))
        yield promoter


def process_stored_promoters(promoters):
    """Returns promoters as required by objectives."""
    for promoter in promoters:
        promoter_part = bsb.BasicAssembly(
            promoter.id,
            bsb.BASIC_BIOLEGIO_LINKERS["v0.1"]["LMP"],
            promoter,
            bsb.BASIC_BIOLEGIO_LINKERS["v0.1"]["LMS"],
            bsb.BASIC_SEVA_PARTS["v0.1"]["18"]
        )
        promoter_part = promoter_part.return_part(
            id=promoter.id,
            name=f"{promoter.id}_{promoter.terminator}_{promoter.promoter}_{promoter.riboj}",
            description=f"{promoter.id} stored in BASIC_SEVA_18. The BASIC insulated promoter {promoter.id}, contains a {promoter.promoter} promoter with an upstream {promoter.terminator} and a downstream {promoter.riboj}"
        )
        yield promoter_part


def process_unstored_promoters(promoters):
    """Returns promoters as required by objectives without storing in a vector."""
    for promoter in promoters:
        promoter_part = bsb.seqrec2part(promoter)
        promoter_part.name = f"{promoter.id}_{promoter.terminator}_{promoter.promoter}_{promoter.riboj}"
        promoter_part.description = f"The BASIC insulated promoter {promoter.id}, contains a {promoter.promoter} promoter with an upstream {promoter.terminator} and a downstream {promoter.riboj}"
        yield promoter_part


def bpromoter_num(seqrec):
    """:return: number of the bseva backbone."""
    return int(seqrec.id[3:])


annotated_promoters = bsb.import_parts(DIR_MISC_BASIC / "annotated_BASIC_promoters.gb", "genbank")
assigned_promoters = list(assign_promoter_elements(annotated_promoters))

stored_promoters = list(process_stored_promoters(assigned_promoters))
stored_promoters.sort(key=bpromoter_num)
DIR_PARTS_LINKERS = Path.cwd().parent / "basicsynbio" / "parts_linkers"
# bsb.export_sequences_to_file(
#     processed_promoters,
#     DIR_PARTS_LINKERS / "BASIC_promoter_collection_v01.gb"
# )

unstored_promoters = list(process_unstored_promoters(assigned_promoters))
unstored_promoters.sort(key=bpromoter_num)

## Objectives for cell/s below

- Make gb file for `BASIC_PROMOTER_COLLECTION["v0.2"]`.
- Make gb file for `BASIC_PROMOTER_COLLECTION["v0.3"]`.

In [12]:
stored_promoters_v02 = stored_promoters[:-3]
assert len(stored_promoters_v02) == 60
# bsb.export_sequences_to_file(
#     processed_promoters_v02,
#     DIR_PARTS_LINKERS / "BASIC_promoter_collection_v02.gb"
# )
unstored_promoters = unstored_promoters[:-3]
assert len(unstored_promoters) == 60
bsb.export_sequences_to_file(
    unstored_promoters,
    DIR_PARTS_LINKERS / "BASIC_promoter_collection_v03.gb"
)



## Objectives for cell below

Complete CDSs for overal aim and objectives

In [16]:
from basicsynbio.utils import _easy_seqrec, feature_from_qualifier


class CDSPart():
    def __init__(self, id, handle_to_parent):
        self.id = id
        self.handle_to_parent = handle_to_parent
        self.part = bsb.import_part(self.handle_to_parent, "genbank")
        self.cds_part = self.make_part()

    def make_part(self):
        try:
            cds_feature = feature_from_qualifier(self.part, "gene", [self.id])
            cds_seq = cds_feature.extract(self.part.seq)
        except KeyError:
            print(f"{self.id} was not found in the corresponding BasicPart.")
        cds_seqrec = _easy_seqrec(
            str(cds_seq),
            self.id,
            annotation_type="CDS",
            label=[self.id]
        )
        return bsb.seqrec2part(cds_seqrec, add_i_seqs=True)

    def stored_part(self, backbone=bsb.BASIC_SEVA_PARTS["v0.1"]["18"]):
        cds_assembly = bsb.BasicAssembly(
            self.id,
            backbone,
            bsb.BASIC_BIOLEGIO_LINKERS["v0.1"]["LMP"],
            self.cds_part,
            bsb.BASIC_BIOLEGIO_LINKERS["v0.1"]["LMS"]
        )
        stored_part = cds_assembly.return_part(
            id=self.id,
            description=f"{self.id} stored in {backbone.name}",
            name=f"BASIC_{self.id}_CDS"
        )
        return stored_part

    def unstored_part(self):
        unstored_part = self.part.basic_slice()
        unstored_part = bsb.seqrec2part(
            unstored_part,
            add_i_seqs=True
        )
        unstored_part.id = self.id
        unstored_part.name=f"BASIC_{self.id}_CDS"
        unstored_part.description=""
        return unstored_part


cdss = [
    CDSPart("sfGFP", DIR_MISC_BASIC / "BASIC_sfGFP_ORF.1.gb"),
    CDSPart("mCherry", DIR_MISC_BASIC / "BASIC_mCherry_ORF.1.gb"),
    CDSPart("mTagBFP2", DIR_MISC_BASIC / "BASIC_mTagBFP2_ORF.1.gb")
]
# bsb.export_sequences_to_file(
#     (cds.stored_part() for cds in cdss),
#     DIR_PARTS_LINKERS / "BASIC_CDS_collection_v01.gb"
# )
bsb.export_sequences_to_file(
    (cds.unstored_part() for cds in cdss),
    DIR_PARTS_LINKERS / "BASIC_CDS_collection_v02.gb"
)

