# NEB gibson assembly protocol

This notebook performs calculations for Gibson assembly based on recommendations from the [NEB Gibson assembly protocol](https://www.neb.com/protocols/2012/12/11/gibson-assembly-protocol-e5510).

## Restriction fragment assembly functions

New England Biolabs recommends setting up restriction enzyme digests in a 50 µl reaction volume.

In [76]:
# TODO

## Gibson assembly functions

NEB recommends `0.02` - `0.5` pmols of fragments when assembling 2-3 fragments. Here, we will be using the same backbone and many "equivalent" inserts. Additionally the total volume of unperufied PCR fragments should not exceed 20%. Here I am interpreting inserts as PCR fragments. NEB also says Optimized cloning efficiency is `50`-`100` ng of vector with a 2-fold molar excess of each insert. Use 5x more if inserts size less than 200. 

In [71]:
INSERT_CON_STOCK = 50  # stock insert concentation ng / ul
VECTOR_MASS = 75 # Mass of vector per insert per NEB rec of 50-100 ng
MASTER_MIX_CON = 2  # X concentration
INSERT_LIBRARY_PROP = 0.10  # proportion of volume of total reaction composed of inserts
INCUBATION_TIME = 15  # minutes NEB rec for 2 or 3 fragments
INCUBATION_TEMP = 50 # C per NEB rec
BACKBONE_CON = 200  # ng / ul

In [46]:
from pydna.dseqrecord import Dseqrecord
import pandas as pd


def ng_dna_to_pmol(insert_record, ng):
    # calculate the number of pmols of DNA from ng DNA
    insert_mw = insert_record.seq.mw()  # g / mol
    insert_mw_ng = insert_mw * 1e+9  # ng / mol
    insert_mols = ng / insert_mw_ng  # mols
    insert_pmols = insert_mols * 1e+12
    return insert_pmols

def target_fragment_pmols(vector_record, vector_mass_ng, insert_length):
    # get number of pmols of vector
    # NEB: "Optimized cloning efficiency is 50-100 ng of vector with a 2-fold 
    # molar excess of each insert". Use 5x more if inserts size less than 200.
    vector_pmol = ng_dna_to_pmol(vector_record, vector_mass_ng)
    if insert_length >= 200:
        return vector_pmol * 2 * 5
    else:
        return vector_pmol * 2

    
def ng_required(insert_record, insert_ng_ul, target_pmols):
    # calculate the number of ul to have target_pmols
    # of insert_record DNA
    pmol_per_ul = ng_dna_to_pmol(insert_record, insert_ng_ul)  # pmol / ul
    return (1 / pmol_per_ul) * target_pmols  # ul


## T7 initiation series

The initiation series relies on homology of the 5' and 3' arms of inserts to anneal to pFC9 backbone via Gibson asembly.

In [41]:
from pydna.genbankrecord import GenbankRecord
from pydna.readers import read

backbone = GenbankRecord(read(str(snakemake.input['t7_init_backbone'])))
inserts = [GenbankRecord(read(str(insert))) for insert in snakemake.input['inserts']]
placeholder = False
if snakemake.params['insert_concentration']:
    INSERT_CON_STOCK = float(snakemake.params['insert_concentration'])
else:
    placeholder = True
if snakemake.params['backbone_concentration']:
    BACKBONE_CON = float(snakemake.params['backbone_concentration'])
else:
    placeholder = True

In [68]:
def single_insert_assembly(insert, insert_ng_per_ul, backbone):
    # calculate insert volume and mass required for Gibson assembly into a
    # given backbone
    target_pmols = target_fragment_pmols(backbone, VECTOR_MASS, len(insert.seq))
    insert_ul = ng_required(insert, insert_ng_per_ul, target_pmols)
    return {
        'insert_name': insert.name,
        'insert_mass_ng': target_pmols,
        'insert_mw': insert.seq.mw(),
        'insert_ul': insert_ul,
        'backbone_mass_ng': VECTOR_MASS
    }

In [67]:
def _insert_table(inserts, insert_ng_per_ul, backbone):
    # helper function to create a table of all inidividual insert assemblies
    # should only be called from complete_library function
    records = []
    for each_insert in inserts:
        records.append(single_insert_assembly(
                each_insert, insert_ng_per_ul, backbone
            )
        )
    return pd.DataFrame(records)

In [77]:
def complete_library(inserts, insert_ng_per_ul, backbone, backbone_ng_per_ul):
    insert_table = _insert_table(inserts, insert_ng_per_ul, backbone)
    
    total_mass_backbone = insert_table['backbone_mass_ng'].sum()
    total_insert_volume_ul = insert_table['insert_ul'].sum()
    
    total_backbone_volume_ul = round(total_mass_backbone / backbone_ng_per_ul, 3)
    
    library_volume_ul = round(total_insert_volume_ul / INSERT_LIBRARY_PROP, 3)
    master_mix_volume_ul = round(library_volume_ul / MASTER_MIX_CON, 3)
    h20_volume = library_volume_ul - master_mix_volume_ul - total_insert_volume_ul - total_backbone_volume_ul
    
    assert sum(
        [master_mix_volume_ul, total_insert_volume_ul,
         total_backbone_volume_ul, h20_volume
        ]) == library_volume_ul
    
    library_record = [
        {
            'backbone_name': backbone.name,
            'library_volume_ul': library_volume_ul,
            'master_mix_volume_ul': master_mix_volume_ul,
            'h20_volume_ul': h20_volume,
            'total_backbone_volume_ul': total_backbone_volume_ul,
            'total_insert_volume_ul': total_insert_volume_ul,
            'incubation_time_mins': INCUBATION_TIME,
            'incubation_temp_C': INCUBATION_TEMP
        }
    ]
    
    library_table = pd.DataFrame(library_record)
    
    return insert_table, library_table

insert_table, library_table = complete_library(inserts, INSERT_CON_STOCK, backbone, BACKBONE_CON)

NameError: name 'backbone' is not defined

In [74]:
insert_table

Unnamed: 0,insert_name,insert_mass_ng,insert_mw,insert_ul,backbone_mass_ng
0,name,0.404907,123559.0,1.000597,75
1,name,0.404907,123559.0,1.000597,75
2,name,0.404907,123559.0,1.000597,75
3,name,0.404907,123559.0,1.000597,75
4,name,0.404907,123559.0,1.000597,75
5,name,0.404907,123559.0,1.000597,75
6,name,0.404907,123559.0,1.000597,75
7,name,0.404907,123559.0,1.000597,75
8,name,0.404907,123559.0,1.000597,75
9,name,0.404907,123559.0,1.000597,75


In [75]:
library_table

Unnamed: 0,backbone_name,library_volume_ul,master_mix_volume_ul,h20_volume_ul,total_backbone_volume_ul,total_insert_volume_ul,incubation_time_mins,incubation_temp_C
0,name,100.06,50.03,37.024029,3.0,10.005971,15,50


Write tables as tsv files.

In [1]:
output_library = str(snakemake.output['library'])
output_insert = str(snakemake.output['inserts'])

def write_table_as_tsv(table, output_path):
    table.to_csv(output_path, sep='\t', index=False)

write_table_as_tsv(library_table, output_library)
write_table_as_tsv(insert_table, output_insert)

NameError: name 'snakemake' is not defined