In [1]:
from synbiolib import codon
import pandas as pd
import itertools
import os
from opentrons import robot

In [2]:
original_df = pd.read_excel("./../../data/orders/8491455_SapI-Synthesis.xls")

In [79]:
forward_attachment = "AGATGGCTCTTCT"
reverse_attachment = "TGAAGAGCCACGG"

def block_to_oligo(block_seq):
    for_primer = forward_attachment + block_seq + reverse_attachment
    rev_primer = codon.reverse_complement(for_primer)
    return (for_primer, rev_primer)

def oligo_to_block(oligo_seq):
    if forward_attachment in oligo_seq:
        block = oligo_seq[len(forward_attachment):len(forward_attachment)+5]
    elif codon.reverse_complement(forward_attachment) in oligo_seq:
        block = codon.reverse_complement(oligo_seq[len(reverse_attachment):len(reverse_attachment)+5])
    else:
        block = "NOT_BLOCK"
    return block      
def synseq_to_blocks(oligo): # synseq_to_blocks("GCCATGGCTAGCAAAGCAG")
    blocks = []
    for i,c in enumerate(oligo):
        start = i * 2
        block = oligo[start:start+5]
        if len(block) < 5:
            break
        blocks.append(block)
    return blocks
def find_wells(term,query): # 'Block','GCAAC'
    return list(df.loc[df[term] == query]['Well'])

def blocks_in_dataframe(df):
    return list(set(df[df['Block'] != 'NOT_BLOCK']['Block']))

def change_block(df,seq,block_name):
    df.loc[df['Seq'] == seq, 'Block'] = block_name
    return df

In [81]:
df = original_df.iloc[3:51][["Order Number:",8491455,"Unnamed: 3"]].reset_index(drop=True)
df.columns = ["Well", "Primer", "Seq"]
df["Block"] = df["Seq"].apply(oligo_to_block)

for seq in ["CGCAATTCCACCACAGAAGACTAGGAGGCCCAGCGCTATGTCTTCGCCTTGTCTCGCCAG", "CTGGCGAGACAAGGCGAAGACATAGCGCTGGGCCTCCTAGTCTTCTGTGGTGGAATTGCG"]:
    change_block(df,seq,"POS_CTRL")

for seq in ["CGCAATTCCACCACAGAAGACTAGGAGGCCCAG", "GGCCTCCTAGTCTTCTGTGGTGGAATTGCG"]:
    change_block(df,seq,"CAG_CTRL")
    
for seq in ["CTGGCGAGACAAGGCGAAGACATAGCGCTGGGC", "CAGCGCTATGTCTTCGCCTTGTCTCGCCAG"]:
    change_block(df,seq,"GCC_CTRL")
    
for seq in ["CGCAATTCCACCACAGAAGACTAGGAGGCC", "CTCCTAGTCTTCTGTGGTGGAATTGCG"]:
    change_block(df,seq,"prefix")
    
for seq in ["CTGGCGAGACAAGGCGAAGACATAGCGCTG", "CGCTATGTCTTCGCCTTGTCTCGCCAG"]:
    change_block(df,seq,"suffix")

blocks = blocks_in_dataframe(df)
df

Unnamed: 0,Well,Primer,Seq,Block
0,A1,KG_1,CGCAATTCCACCACAGAAGACTAGGAGGCCCAGCGCTATGTCTTCG...,POS_CTRL
1,A2,KG_2,CTGGCGAGACAAGGCGAAGACATAGCGCTGGGCCTCCTAGTCTTCT...,POS_CTRL
2,A3,KG_3,CGCAATTCCACCACAGAAGACTAGGAGGCCCAG,CAG_CTRL
3,A4,KG_4,CTGGCGAGACAAGGCGAAGACATAGCGCTGGGC,GCC_CTRL
4,A5,KG_5,CAGCGCTATGTCTTCGCCTTGTCTCGCCAG,GCC_CTRL
5,A6,KG_6,GGCCTCCTAGTCTTCTGTGGTGGAATTGCG,CAG_CTRL
6,A7,KG_7,CGCAATTCCACCACAGAAGACTAGGAGGCC,prefix
7,A8,KG_8,CTGGCGAGACAAGGCGAAGACATAGCGCTG,suffix
8,A9,KG_9,CGCTATGTCTTCGCCTTGTCTCGCCAG,suffix
9,A10,KG_10,CTCCTAGTCTTCTGTGGTGGAATTGCG,prefix


In [9]:

new_plate = []
for block in blocks:
    new_plate += [[block, find_wells('Block',block)]]
destination = pd.DataFrame(new_plate, columns = ['Block', 'From'])
destination = destination.reindex(destination['Block'].str.len().sort_values().index).reset_index(drop=True) # reorganize
destination.index += 1
destination['Well'] = destination.index

In [10]:
destination

Unnamed: 0,Block,From,Well
1,GCCAG,"[A12, C12]",1
2,CACAG,"[B5, D2]",2
3,TACAG,"[B12, D1]",3
4,GCAAC,"[B9, D10]",4
5,AGCAG,"[B4, C11]",5
6,CATGC,"[B6, D4]",6
7,TGGCC,"[C4, D6]",7
8,GCAAA,"[B8, D12]",8
9,GCTAG,"[B11, C9]",9
10,TGCAG,"[C3, C10]",10


In [75]:
stuff = destination.loc[destination['Block'] == 'TGGCT'].index.format()[0]
stuff

['12']

In [93]:
#Build main dataframe for reactions
rows_list = [{'Build_seq': 'POS_CTRL','Fragments': ['POS_CTRL'], 'Parent_wells': [24]},{'Build_seq': 'NEG_CTRL','Fragments': [''], 'Parent_wells': []},{'Build_seq': 'GCC_CTRL','Fragments': ['GCC_CTRL','prefix'], 'Parent_wells': [22,21]},{'Build_seq': 'CAG_CTRL','Fragments': ['CAG_CTRL','suffix'], 'Parent_wells': [23,20]}]

builds = ["GCCATGGCTAGCAAAGCAG", "GCCATGGCTAGCAACAG", "GCCATGGCTAGCCAG", "GCCATGGCTACAG", "GCCATGGCCAG", "GCCATGCAG", "GCCACAG", "GCCAG"]
for index,build in enumerate(builds):
    reaction = synseq_to_blocks(build) + ['suffix','prefix']
    parent_wells = []
    for block in reaction:
        parent = destination.loc[destination['Block'] == block].index.format()[0]
        parent_wells.append(parent)
    dict1 = {'Build_seq': build,
             'Fragments': reaction,
             'Parent_wells': parent_wells}
    rows_list.append(dict1)
dfr = pd.DataFrame(rows_list) 

# Add column for quantity of water to add to reaction
dfr['H2Oul'] = 5 - (dfr['Parent_wells'].apply(len) * .5)

# Add wells for later reference
dfr.index += 1
dfr['Well'] = dfr.index

dfr

Unnamed: 0,Build_seq,Fragments,Parent_wells,H2Oul,Well
1,POS_CTRL,[POS_CTRL],[24],4.5,1
2,NEG_CTRL,[],[],5.0,2
3,GCC_CTRL,"[GCC_CTRL, prefix]","[22, 21]",4.0,3
4,CAG_CTRL,"[CAG_CTRL, suffix]","[23, 20]",4.0,4
5,GCCATGGCTAGCAAAGCAG,"[GCCAT, CATGG, TGGCT, GCTAG, TAGCA, GCAAA, AAA...","[14, 15, 12, 9, 16, 8, 13, 5, 20, 21]",0.0,5
6,GCCATGGCTAGCAACAG,"[GCCAT, CATGG, TGGCT, GCTAG, TAGCA, GCAAC, AAC...","[14, 15, 12, 9, 16, 4, 19, 20, 21]",0.5,6
7,GCCATGGCTAGCCAG,"[GCCAT, CATGG, TGGCT, GCTAG, TAGCC, GCCAG, suf...","[14, 15, 12, 9, 11, 1, 20, 21]",1.0,7
8,GCCATGGCTACAG,"[GCCAT, CATGG, TGGCT, GCTAC, TACAG, suffix, pr...","[14, 15, 12, 17, 3, 20, 21]",1.5,8
9,GCCATGGCCAG,"[GCCAT, CATGG, TGGCC, GCCAG, suffix, prefix]","[14, 15, 7, 1, 20, 21]",2.0,9
10,GCCATGCAG,"[GCCAT, CATGC, TGCAG, suffix, prefix]","[14, 6, 10, 20, 21]",2.5,10
