<img src="golden-gator.png" width="400">

# Godlen Gator notebook

## 1. setup 

### 1.1 Imports

In [14]:
import os

import networkx as nx
import pandas as pd
from Bio import Alphabet, SeqFeature, SeqIO

from dawdlib.create_embl.embl_maker import create_dc_features, create_path_features
from dawdlib.degenerate_dna.deg_table import TableColNames, generate_deg_csv
from dawdlib.degenerate_dna.utils import parse_degenerate_codon_csv
from dawdlib.dijkstra import colorful
from dawdlib.dijkstra.len_limit import all_shortest_paths
from dawdlib.gg_dc_combine.gg_dc_combine import dc_df_codon_list, gate_cdn_oligos
from dawdlib.golden_gate.find_gg import deg_table_to_dict
from dawdlib.golden_gate.gate_data import GGData
from dawdlib.golden_gate.graph_maker import (
    GraphMaker,
    build_custom_graph,
    create_default_valid_node_function,
    create_default_weight_func,
    make_default_graph,
)
from dawdlib.golden_gate.reaction_sim import ReactionSim
from dawdlib.golden_gate.utils import Requirements, expand_dna_var_poss, parse_dna

### 1.2 Constants and Paths

In [15]:
MIN_OLIGO_LENGTH: int = 20
MAX_OLIGO_LENGTH: int = 79
MIN_CONST_OLIGO_LENGTH: int = 20
MAX_NUM_GATES = 12

prefix = "GACATTGGTCTCA"
suffix = "TGAGACCAACGACGCCGTACTCTTTGTCAAC"

gg_data = GGData()

W_PATH = "/Users/sh/Code/dawdlib/example"
resfile_path = os.path.join(W_PATH,"chosen_18Dec.resfile")
dna_path = os.path.join(W_PATH,"wt_dna.fasta")
embl_path = os.path.join(W_PATH,"wt_features2.embl")
deg_table_path = os.path.join(W_PATH,"deg_table2.csv")

## 2. Degenerate codons

### 2.1 Generate degenerate codon table

In [3]:
generate_deg_csv(resfile_path, csv_filename=deg_table_path)
deg_table = pd.read_csv(deg_table_path, na_filter=True, keep_default_na=False,)

### 2.2 View degenerate codon table

In [4]:
deg_table

Unnamed: 0,AA_POS,DNA_POS,ENCODED_AAS,ENCODED_COUNT,AMBIGUOUS_CODONS1,AMBIGUOUS_CODONS2,AMBIGUOUS_CODONS3
0,16,46,"['I', 'V']","[1, 1]",RTT,,
1,42,124,"['L', 'V']","[1, 1]",STG,,
2,61,181,"['A', 'L', 'V']","[1, 1, 1]",STG,GCA,
3,65,193,"['S', 'T']","[1, 1]",ASC,,
4,68,202,"['A', 'M', 'V']","[1, 1, 1]",RTG,GCA,
5,69,205,"['A', 'L', 'P', 'Q']","[1, 1, 1, 1]",CHG,GCA,
6,72,214,"['A', 'C', 'S', 'T', 'V']","[1, 1, 1, 1, 1]",WSC,GYG,
7,108,322,"['E', 'I', 'L', 'T', 'V']","[1, 1, 1, 1, 1]",AYT,STG,GAA
8,112,334,"['I', 'V']","[1, 1]",RTT,,
9,145,433,"['A', 'F', 'I', 'M', 'S', 'T', 'V', 'Y']","[1, 1, 1, 1, 1, 1, 1, 1]",RYK,THT,


## 3. Find golden gates

### 3.1 Create a graph
**Either use the default, or custom blocks.**

In [5]:
use_custom = False

#### 3.1.1 Default graph

In [6]:
if not use_custom:
    gm = GraphMaker(gg_data)
    var_poss = expand_dna_var_poss(deg_table[TableColNames.DNA_POS.value].tolist())
    dna = parse_dna(dna_path).upper()
    reqs = Requirements(
        MIN_OLIGO_LENGTH,
        MAX_OLIGO_LENGTH,
        MIN_CONST_OLIGO_LENGTH,
        oligo_prefix=prefix,
        oligo_suffix=suffix,
    )
    graph, src, target = make_default_graph(
        GraphMaker(gg_data), dna, var_poss, deg_table_to_dict(deg_table), reqs
    )

#### 3.1.2 Custom graph

In [7]:
if use_custom:
    gm = GraphMaker(gg_data)
    var_poss = expand_dna_var_poss(deg_table[TableColNames.DNA_POS.value].tolist())
    dna = parse_dna(dna_path).upper()

    is_valid_edge = gm.create_default_valid_edge_func(
        var_poss,
        MIN_OLIGO_LENGTH,
        MAX_OLIGO_LENGTH - len(prefix) - len(suffix),
        MIN_CONST_OLIGO_LENGTH,
        1000,
    )


    def cost_func(nd1, nd2):
        default = create_default_weight_func(deg_table_to_dict(deg_table))
        return default(nd1, nd2) + len(suffix) + len(prefix)


    acceptable_fcws = gm.gg_data.filter_self_binding_gates(2000)
    is_valid_node = create_default_valid_node_function(acceptable_fcws, var_poss)

    graph, src, target = build_custom_graph(
        dna, var_poss, is_valid_node, is_valid_edge, cost_func
    )

### 3.2 Find gates (shortest paths)

#### 3.2.1 Find shortest paths

In [32]:
shortest_paths = all_shortest_paths(
    graph, src, target, weight="weight", len_cutoff=20
)
nbest_paths = {}
bad_paths = {}
try:
    for i, (pth, cost) in enumerate(shortest_paths):
        gates = [a.bps for a in pth[1:-1]]
        gates_crosstalk = gg_data.gateset_crosstalk(gates)
        if not gg_data.gate_set_has_off_target(gates):
            try:
                if nbest_paths[len(pth)][2] > gates_crosstalk:
                    nbest_paths[len(pth)] = (pth, cost, gates_crosstalk, i)
            except KeyError:
                nbest_paths[len(pth)] = (pth, cost, gates_crosstalk, i)
        else:
            try:
                bad_paths[len(pth)].append(pth)
            except KeyError:
                bad_paths[len(pth)] = [pth]
except nx.NetworkXNoPath:
    print(f"No path was found between {src} and {target}")

nbest_paths = dict((i, (p, c, g)) for p, c, g, i in nbest_paths.values())

##### 3.2.1.2 View found gates (paths)

In [36]:
for k, v in nbest_paths.items():
    print(f"Path ID: {k}. Number of gates: {len(v[0]) - 2}. Cost: {v[1]}.")

Path ID: 8434. Number of gates: 18. Cost: 1610.


#### 3.2.2 Find __*colorful*__ gates (shortest paths)

In [10]:
best_paths = {}
check_paths_limit = 1000

for max_gates in range(16, 17):
    try:
        colorful_shortest_paths = colorful.shortest_path(
            graph, src, target, no_colors=21, len_cutoff=max_gates, weight="weight"
        )
    except nx.NetworkXNoPath:
        print(f"No path was found between {src} and {target} with at most {max_gates} gates.")
        continue
            
    for i, cpth in enumerate(colorful_shortest_paths):
        if i > check_paths_limit:
            break
        gates = [a.bps for a in cpth[1:-1]]
        if gg_data.gate_set_has_off_target(gates):
            continue
        pth_len = len(cpth)
        cost = sum(
            (graph.edges[n1, n2]["weight"] for n1, n2 in zip(cpth[:-1], cpth[1:]))
        )
        gates_crosstalk = gg_data.gateset_crosstalk(gates)
        try:
            if best_paths[pth_len][1] > cost:
                best_paths[pth_len] = (cpth, cost, gates_crosstalk, i)
            elif best_paths[pth_len][2] > gates_crosstalk:
                best_paths[pth_len] = (cpth, cost, gates_crosstalk, i)
        except KeyError:
            best_paths[pth_len] = (cpth, cost, gates_crosstalk, i)

best_paths = dict((i, (p, c, g)) for p, c, g, i in best_paths.values())




##### 3.2.2.2 View found *colorful* gates (paths)

In [37]:
for k, v in best_paths.items():
    print(f"Path ID: {k}. Number of gates: {len(v[0]) - 2}. Cost: {v[1]}.")

Path ID: 18. Number of gates: 16. Cost: 1769.


### 3.3 choose whichever path you want

In [14]:
chosen_entry = best_paths[min(best_paths.keys())]
chosen_path = chosen_entry[0]

#### 3.3.1 save chosen path to csv

In [None]:
path_df = pd.DataFrame.from_records(chosen_path, columns=chosen_path[0].__annotations__.keys())
path_df.to_csv(os.path.join(W_PATH, 'chosen_path.csv'))

## 4. Create embl feature view

### 4.1 Prepare degenerate codon and gates features

In [19]:
deg_parsed_df = parse_degenerate_codon_csv(deg_table_path)
seq_features = create_dc_features(deg_parsed_df)
pth_features = create_path_features(chosen_path)

### 4.2 Save embl file with all features

In [20]:
seq_rec = SeqIO.read(dna_path, format="fasta", alphabet=Alphabet.DNAAlphabet())
seq_rec.features.extend(seq_features)
seq_rec.features.extend(pth_features)
SeqIO.write(seq_rec, embl_path, "embl")

1

## 5. Create oligos

### 5.1 Create oligo table

In [38]:
oligo_df = gate_cdn_oligos(chosen_path, dc_df_codon_list(deg_table), dna, prefix, suffix, "6vsb-5.5nbd")

### 5.2 Save oligo table

In [39]:
oligo_df.to_csv(os.path.join(W_PATH, "oligo_df.csv"))

## 6. Verify golden gate reaction

### 6.1 Create golden gate simulator and load oligo table

In [40]:
rs = ReactionSim(gg_data, reqs, ["BsaI"])
rs.create_reaction_graph(os.path.join(W_PATH, "oligo_df.csv"))

### 6.2 Check for WT sequence

In [44]:
reaction_wt = list(rs.get_wt_dna())
assert 1 == len(reaction_wt), "Error: More than one WT DNA was found!!!"
reaction_wt = reaction_wt[0]
assert reaction_wt[0] == dna, "Error: reaction DNA doesn't match input DNA!!!"

### 6.3 Verify all golden gate products
**Checks that all products are constructed correctly and have the same length and gates as WT**

* Note: This might take a while!

In [45]:
reaction = rs.verify_reaction(reaction_wt[-1] - reaction_wt[-2], reaction_wt[1])
if reaction[0]:
    print(f"Golden gate is good the number of different products is {reaction[1]}")
else:
    print("Verifying golden gate reaction failed!!!")
    print("The following product failed verification:\n")
    print(reaction[1])

(True, 18247680)

## 7. Write order table

### 7.1 Settings

#### Write constant segments?

In [5]:
output_const = False

#### write WT segments?

In [6]:
output_wt = False

### Write order table to csv file

In [16]:
oligo_df[(oligo_df.wt <= output_wt) & (oligo_df.const <= output_const)][['name', 'full_oligo_dna']].to_csv(os.path.join(W_PATH, "order_table.csv"))