# Features for BASIC SEVA plasmids

## Aims and objectives

- [x] Alter constructs with p15A ori to contain that given by SEVA. Initial files containing this ori have a "t" insertion.
- [x] Remove features from [BASIC_SEVA genbank files](/sequences/genbank_files/BASIC_SEVA_collection).
- [x] Auto-annotate genbank files using [benchling feature library](/csv_xlsx_files/BASIC_SEVA_benchling_misc_features_library.csv).
- [ ] Remove color annotations and add description.
- [ ] Upload genbank files to JBEI-ICE.
- [ ] Generate a dictionary linking Part ID numbers to SEVA nomenclature.

In [2]:
from Bio import SeqIO, SeqUtils
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from pathlib import Path
from basicsynbio.main import DEFAULT_ANNOTATIONS
from basicsynbio.utils import MARKER_DICT, ORI_DICT
import csv
import pandas as pd


In [3]:
benchling_features = pd.read_csv(Path.cwd().parent / "csv_xlsx_files" / "BASIC_SEVA_benchling_misc_features_library.csv", index_col=0)
p15a_str = benchling_features.loc["SEVA_p15A", "Feature"]
seva_p15a = SeqRecord(Seq(p15a_str), id="SEVA_p15A")
error_p15a = "CTAGAAATATTTTATCTGATTAATAAGATGATCTTCTTGAGATCGTTTTGGTCTGCGCGTAATCTCTTGCTCTGAAAACGAAAAAACCGCCTTGCAGGGCGGTTTTTCGAAGGTTCTCTGAGCTACCAACTCTTTGAACCGAGGTAACTGGCTTGGAGGAGCGCAGTCACCAAAACTTGTCCTTTCAGTTTAGCCTTAACCGGCGCATGACTTCAAGACTAACTCCTCTAAATCAATTACCAGTGGCTGCTGCCAGTGGTGCTTTTGCATGTCTTTCCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGACTGAACGGGGGGTTCGTGCATACAGTCCAGCTTGGAGCGAACTGCCTACCCGGAACTGAGTGTCAGGCGTGGAATGAGACAAACGCGGCCATAACAGCGGAATGACACCGGTAAACCGAAAGGCAGGAACAGGAGAGCGCACGAGGGAGCCGCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCACTGATTTGAGCGTCAGATTTCGTGATGCTTGTCAGGGGGGCGGAGCCTATGGAAAAACGGCTTTTGCCGCGGCCCTCTCACTTCCCTGTTAAGTATCTTCCTGGCATCTTCCAGGAAATCTCCGCCCCGTTCGTAAGCCATTTCCGCTCGCCGCAGTCGAACGACCGAGCGTAGCGAGTCAGTGAGCGAGGAAGCGGAATATATCC"

In [4]:
def remove_features(bseva_seqrec, id=None):
    """Removes features from bseva_seqrec."""
    bseva_seqrec.features = []
    bseva_seqrec.annotations = DEFAULT_ANNOTATIONS
    if not id:
        bseva_seqrec.id = "BASIC_" + bseva_seqrec.id
    else:
        bseva_seqrec.id = "BASIC_" + id
    bseva_seqrec.name = bseva_seqrec.id
    return bseva_seqrec


BSEVA_DIR_INIT = Path.cwd().parent / "sequences" / "genbank_files" / "BASIC_SEVA_collection" / "initial_files"
bseva_seqrecs = []
for gb_file in (BSEVA_DIR_INIT).iterdir():
    if "BASIC_SEVA_no_annotations.gb" not in gb_file.name:
        seqrec = SeqIO.read(gb_file, "genbank")
        if  int(gb_file.name[-4]) == 6:
            error_p15_loc = SeqUtils.nt_search(str(seqrec.seq), error_p15a)
            new_seqrec = seqrec[:error_p15_loc[1]] + seva_p15a + seqrec[error_p15_loc[1] + len(error_p15a):]
            bseva_seqrecs.append(remove_features(new_seqrec, id=seqrec.id))
        else:
            bseva_seqrecs.append(remove_features(seqrec))
SeqIO.write(bseva_seqrecs, BSEVA_DIR_INIT / "BASIC_SEVA_no_annotations.gb", "genbank")

48

## Objective for cell below

Remove color annotations and add description

In [5]:
def bseva_description(seqrec):
    """Generate string describing BASIC_SEVA collection member."""
    return f"BASIC SEVA vector containing {MARKER_DICT[seqrec.id[-2]]} resistance marker and {ORI_DICT[seqrec.id[-1]]} origin of replication."


def bseva_num(seqrec):
    """:return: number of the bseva backbone."""
    return int(seqrec.id[11:])


seqrecs = list(SeqIO.parse(BSEVA_DIR_INIT.parent / "BASIC_SEVA_benchling.gb", "genbank"))
for seqrec in seqrecs:
    seqrec.description = bseva_description(seqrec)
    for feature in seqrec.features:
        feature.qualifiers = {"label": feature.qualifiers["label"]}
seqrecs.sort(key=bseva_num)
path_to_collections = Path.cwd().parent / "basicsynbio" / "parts_linkers" 
SeqIO.write(seqrecs, path_to_collections / "BASIC_SEVA_collection_v01.gb", "genbank")

48

## Objectives for cell below

Generate a csv file for submission of genbank files to JBEI-ICE.

In [6]:
def jbei_ice_record(seqrec, 
pi="Paul Freemont",
creator = "Matthew C Haines",
creator_email = "mh2210@ic.ac.uk",
status = "In Progress",
biosafety = "Level 1",
host = "E. coli",
circular = "true"
):
    """Accepts a seqrec and metainformation and returns a list of elements corresponding to a record in a JBEI-ICE csv submission file."""
    return [pi, "", "", "", biosafety, seqrec.id, "", "", seqrec.description, "", "", "", status, creator, creator_email, circular, "", "", host, ORI_DICT[seqrec.id[-1]], MARKER_DICT[seqrec.id[-2]], "", seqrec.id + ".gb", ""]


jbei_ice_dir = Path.cwd().parent / "jbei-ice_submissions"
with open(jbei_ice_dir / "jbei-ice_plasmid_csv_upload.csv", "r", newline="") as template_file:
    with open(jbei_ice_dir / "2020-07-13_jbei-ice_submission" / "2020-07-13_jbei-ice_submission.csv", "w", newline="") as submission_file:
        csv_reader = csv.reader(template_file)
        csv_writer = csv.writer(submission_file)
        for row in csv_reader:
            csv_writer.writerow(row)
        for seqrec in seqrecs:
            csv_writer.writerow(jbei_ice_record(seqrec))

FileNotFoundError: [Errno 2] No such file or directory: '/home/hainesm6/github_repos/LondonBiofoundry/basicsynbio/jbei-ice_submissions/2020-07-13_jbei-ice_submission/2020-07-13_jbei-ice_submission.csv'

## Objectives for cell below

Generate a dictionary linking Part ID numbers to SEVA nomenclature.

In [None]:

seva_ind = 12
ice_py_loc = Path.cwd().parent / "basicsynbio" / "parts_linkers" / "ice.py"
with open(ice_py_loc, "w") as ice_py_file:
    ice_py_file.write("BSEVA_ICE_DICT = {\n")
    for ind in range(42):
        ice_num = 17297 + ind
        while str(seva_ind)[-1] in [str(num) for num in [0, 1, 6]]:
            seva_ind += 1
        ice_py_file.write(f"    \"{seva_ind}\": \"{ice_num}\",\n")
        seva_ind += 1
    ice_py_file.write("}")
