In [2]:
import pandas as pd
import json
import glob
from config import *
import interact_db

In [216]:
def query_db(query="*"):
    '''
    Pulls all of the current entries in the database and compiles them
    into a DataFrame. Creates compounding column names all starting with "data" to allow for
    nested keys to be reindexed later into their json files. Individual fragments are set as
    the index, i.e. a gene with two fragments would result in two different rows in the df.
    '''
    def crawl_dict(dictionary,name):
        '''
        Scans the dictionary to find the lowest level keys and their values, adds to a list of
        tuples and then returns the full list
        '''
        new_row = []
        for key in dictionary.keys():
            # The fragment sequences and locations columns are dictionaries
            # that have keys unique to the fragment name and so they need to
            # remain nested
            if key == "fragment_sequences" or key == "fragments":
                new_row = new_row + [(name+"."+key,dict(dictionary[key]))]
            elif type(dictionary[key]) == type(dict()):
                new_row += crawl_dict(dictionary[key],str(name+"."+key))
            elif type(dictionary[key]) == type(str()):
                new_row = new_row + [(name+"."+key,dictionary[key])]
            elif type(dictionary[key]) == type(list()) and dictionary[key] != [""]:
                new_row += crawl_dict(dictionary[key][-1],str(name+"."+key))
            elif type(dictionary[key]) == type(bool()):
                if dictionary[key]:
                    new_row = new_row + [(name+"."+key,True)]
                else:
                    new_row = new_row + [(name+"."+key,False)]
        return new_row
    # Iterates through all of the json files in the database and concatenates
    # all of the rows together to create the final dataframe
    full_db = pd.DataFrame()
    for file in glob.glob(BASE_PATH + "/data/*/BBF10K_{}.json".format(query)):
        with open(file,"r") as json_file:
            data = json.load(json_file)
        new_row = dict(crawl_dict(data,"data"))
        new_df = pd.DataFrame(new_row)
        full_db = pd.concat([full_db,new_df])
    return full_db

db = query_db()
print(db.columns.unique())

Index(['data.author.affiliation', 'data.author.email', 'data.author.name',
       'data.author.orcid', 'data.author.project', 'data.cloned.organism',
       'data.cloned.vector', 'data.dates.build_ready', 'data.dates.complete',
       'data.dates.ordered', 'data.dates.submitted', 'data.description',
       'data.gene_id', 'data.gene_name', 'data.info.IP.results',
       'data.info.IP.submission_number', 'data.info.IP.submitted',
       'data.info.collection', 'data.info.database_links',
       'data.info.order_week', 'data.info.other_tags', 'data.info.safety',
       'data.info.type.build_type', 'data.info.type.cloning_method',
       'data.info.type.part_type', 'data.location.fragments',
       'data.project_description', 'data.sequence.fragment_sequences',
       'data.sequence.optimized_sequence', 'data.sequence.original_sequence',
       'data.status.abandoned', 'data.status.build_attempts.build_date',
       'data.status.build_attempts.build_number',
       'data.status.build_atte

In [4]:
all_maps = pd.DataFrame()
for file in sorted(glob.glob("{}/plate_maps/*.csv".format(BASE_PATH))):
    plate_map = pd.read_csv(file)
    all_maps = pd.concat([all_maps,plate_map])
all_maps = list(all_maps["Plate"].unique())
# all_maps
numbers = range(0,len(all_maps))
priority = dict(zip(all_maps, numbers))
print(priority)

{'pSHPs0807B412037MU': 0, 'pSHPs0807B412038MU': 1, 'pSHPs0807B412039MU': 2, 'pSHPs0807B412040MU': 3, 'pSHPs0826B426849MU': 4, 'pSHPs0826B426850MU': 5, 'pSHPs1025B525648MU': 6, 'pSHPs1106B253446MU': 7, 'pSHPs1121B618499MU': 8, 'pSHPs1121B618500MU': 9, 'pSHPs1207B618708MU': 10, 'pSHPs1212B325156MU': 11, 'pSHPs1212B325157MU': 12, 'pSHPs1212B325158MU': 13, 'pSHPs0220B502370MU': 14, 'pSHPs0220B292296MU': 15}


In [92]:
def well_addresses():
    letter = ["A","B","C","D","E","F","G","H"]
    number = ["1","2","3","4","5","6","7","8","9","10","11","12"]
    target_well = []
    temp_well = 0
    for n in number:
        for l in letter:
            temp_well = l + n
            target_well.append(temp_well)
    return target_well

target_well = well_addresses()
print(target_well[49])
print(len(target_well))

B7
96


In [96]:
from opentrons import robot, containers, instruments

In [145]:
# Pull out the desirable genes
build_db = db[['data.gene_id','data.status.build_ready','data.status.abandoned','data.status.build_complete','data.location.fragments']]
build_db = build_db[build_db['data.status.build_ready'] == True]
build_db = build_db[build_db['data.status.abandoned'] == False]
build_db = build_db[build_db['data.status.build_complete'] != "Good_sequence"]

# Remove the extra fragments
build_db = build_db.reset_index()
build_db = build_db.set_index(["data.gene_id"])
build_db = build_db[build_db['index'].str[-1] == "1"]

# Pull the subset of genes that will actually be built in this run
num_genes = 90
building = build_db.iloc[:num_genes]
building = building.reset_index()
gene_list = [g for g in building['data.gene_id']]

# Create the dataframe containing information for the build and 
# for aliquoting master mix
gene_ids = []
frag_names = []
locations = []
dest_wells = []
master_wells = []
frag_nums = []
well_counter = 0
for gene in gene_list:
    with open("{}/data/{}/{}.json".format(BASE_PATH,gene,gene),"r") as json_file:
        data = json.load(json_file)
    dest_well = target_well[well_counter]
    frag_count = 0
    for fragment in data["location"]["fragments"]:
        gene_ids.append(data["gene_id"])
        frag_names.append(fragment)
        locations.append(data["location"]["fragments"][fragment])
        dest_wells.append(dest_well)
        frag_count += 1
    master_wells.append(dest_well)
    frag_nums.append(frag_count)
    well_counter += 1

plates = []
wells = []
for loc in locations:
    plate,well = loc.split("_")
    plates.append(plate)
    wells.append(well)
    
plan = pd.DataFrame({
    "Gene" : gene_ids,
    "Fragment" : frag_names,
    "Plate" : plates,
    "Well" : wells,
    "Destination" : dest_wells
},columns=["Gene","Fragment","Plate","Well","Destination"])
master = pd.DataFrame({
    "Well" : master_wells,
    "Fragments" : frag_nums
},columns=["Well","Fragments"])
plan = plan.sort_values("Plate")

num_rxns = len(plan)
print(num_rxns)

SOURCE_SLOTS = ['D2','D3','B2']

unique_plates = list(pd.unique(plan["Plate"]))
group_plates = [unique_plates[n:n+3] for n in range(0, len(unique_plates), len(SOURCE_SLOTS))]
print(group_plates)

plate_counter = 0
current_group = group_plates[plate_counter]
print(current_group)

def change_plates(current_plates):
    source_plates = {}
    plate_locations = list(zip(pd.unique(current_plates),SOURCE_SLOTS[:len(current_plates)]))
    print("plate_locations","\n", plate_locations)
    input("Switch out plates for those listed. Press enter when ready.")
    for plate, slot in plate_locations:
        source_plates[plate] = containers.load('96-flat', slot)
    return source_plates

# Check if any plates have low volume and need to be diluted
plate_dict = dict(enumerate(unique_plates))

for num,plate in enumerate(unique_plates):
    print("{}: {}".format(num,plate))
print("{}: No plates".format(num+1))
more = True

dil_plates = []
plate_num = int(input("Choose the number of any plate that is low: "))
print(num)
if plate_num == int(num)+1:
    print("No plates were added")
    more = False
else:
    dil_plates.append(plate_dict[int(plate_num)])
    while more:
        print("Plates to be diluted: {}".format(dil_plates))
        plate_num = int(input("Another plate? "))
        if plate_num == (int(num)+1):
            more = False
        else:
            dil_plates.append(plate_dict[int(plate_num)])
            print("No more plates were added")
    
input("Low volume in any plates")

plate_map = plan[["Gene","Destination"]]
plate_map = plate_map.drop_duplicates(subset=['Gene'])
plate_map = plate_map.sort_index()

last_well = plate_map.iloc[-1]["Destination"]
remaining_wells = target_well[(target_well.index(last_well)+1):]
empty = ["Empty" for num in range(len(remaining_wells))]
empty = pd.DataFrame({
    "Gene" : empty,
    "Destination" : remaining_wells
})

plate_map = pd.concat([plate_map,empty])
plate_map

112
[['pSHPs0807B412037MU', 'pSHPs0807B412038MU', 'pSHPs0807B412039MU'], ['pSHPs0807B412040MU', 'pSHPs0826B426849MU', 'pSHPs0826B426850MU']]
['pSHPs0807B412037MU', 'pSHPs0807B412038MU', 'pSHPs0807B412039MU']
0: pSHPs0807B412037MU
1: pSHPs0807B412038MU
2: pSHPs0807B412039MU
3: pSHPs0807B412040MU
4: pSHPs0826B426849MU
5: pSHPs0826B426850MU
6: No plates
['C12', 'D12', 'E12', 'F12', 'G12', 'H12']


Unnamed: 0,Destination,Gene
0,A1,BBF10K_000008
1,B1,BBF10K_000013
2,C1,BBF10K_000014
3,D1,BBF10K_000016
4,E1,BBF10K_000017
9,F1,BBF10K_000018
10,G1,BBF10K_000022
11,H1,BBF10K_000025
12,A2,BBF10K_000026
14,B2,BBF10K_000030


In [170]:
import numpy as np
import math

In [197]:
plate_8 = pd.read_csv("../builds/build008/build008_2018-02-27 11:10:36.csv")
plate_8 = plate_8[["Gene","Destination"]]
last_well = plate_8.iloc[-1]["Destination"]
remaining_wells = target_well[(target_well.index(last_well)+1):]
plate_8["Result"] = ""
empty = ["Empty" for num in range(len(remaining_wells))]
pick = ["Pick" for num in range(len(remaining_wells))]
print(empty)
empty = pd.DataFrame({
    "Gene" : empty,
    "Destination" : remaining_wells,
    "Result" : pick
})

plate_8 = pd.concat([plate_8,empty])
plate_8 = plate_8[["Gene","Destination","Result"]]

plate_8.to_csv('../builds/build008/build008_2018-02-27 11:10:36.csv',index=False)


['Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty', 'Empty']


In [237]:
point_db = db[['data.gene_id','data.status.build_ready','data.status.abandoned','data.status.build_complete','data.location.fragments']]

exclude = ["Good_sequence","","Original_vector_sequence","Original Vector Sequence","Unknown_sequence","In_process","Unknown Sequence"]
for result in exclude:
    point_db = point_db[point_db['data.status.build_complete'] != result]

point_db = point_db.reset_index()
point_list = point_db["data.gene_id"].tolist()
print(point_list)

point_db["data.status.build_complete"].unique()

['BBF10K_000008', 'BBF10K_000016', 'BBF10K_000026', 'BBF10K_000026', 'BBF10K_000052', 'BBF10K_000053', 'BBF10K_000054', 'BBF10K_000057', 'BBF10K_000057', 'BBF10K_000059', 'BBF10K_000072', 'BBF10K_000072', 'BBF10K_000075', 'BBF10K_000076', 'BBF10K_000102', 'BBF10K_000102', 'BBF10K_000120', 'BBF10K_000133', 'BBF10K_000171', 'BBF10K_000191', 'BBF10K_000195', 'BBF10K_000195', 'BBF10K_000199', 'BBF10K_000212', 'BBF10K_000222', 'BBF10K_000224', 'BBF10K_000225', 'BBF10K_000233', 'BBF10K_000250', 'BBF10K_000250', 'BBF10K_000252', 'BBF10K_000279', 'BBF10K_000279', 'BBF10K_000313', 'BBF10K_000315', 'BBF10K_000338', 'BBF10K_000338', 'BBF10K_000342', 'BBF10K_000342', 'BBF10K_000353', 'BBF10K_000364', 'BBF10K_000373', 'BBF10K_000387', 'BBF10K_000395', 'BBF10K_000395', 'BBF10K_000402', 'BBF10K_000403', 'BBF10K_000407', 'BBF10K_000413', 'BBF10K_000417', 'BBF10K_000419', 'BBF10K_000421', 'BBF10K_000424', 'BBF10K_000425', 'BBF10K_000433', 'BBF10K_000439', 'BBF10K_000446', 'BBF10K_000453', 'BBF10K_00045

array(['Point_mutation', 'Bad_reads', 'Split Reads', 'Partial',
       'Point Mutation', 'Incomplete'], dtype=object)

In [253]:
build_map = pd.read_csv(BASE_PATH + "/builds/build008/build008_2018-02-27 11:10:36.csv")
build_map["Result"] = build_map["Result"].str.replace("e","Replace")
build_map["Result"] = build_map["Result"].str.replace("Pick","Replace")
build_map["Result"] = build_map["Result"].fillna("Good")
# build_map = build_map.reset_index()
# build_map["Picked"] = ''
# build_map["Plate"] = ''
# build_map["Well"] = ''

picked = []
plates = []
wells = []
mut_counter = 0

for index,row in build_map.iterrows():
    if mut_counter >= len(point_list):
        print("start over")
        mut_counter = 0
    if row["Result"] == "Good":
        picked.append(row["Gene"])
    elif row["Result"] == "Replace":
#         print("counter ", mut_counter)
#         print("gene ", point_list[mut_counter])
        picked.append(point_list[mut_counter])
        mut_counter += 1

build_map["Picked"] = picked

gene_list = build_map["Picked"].tolist()

build_nums = []
wells = []

for gene in gene_list:
    with open("{}/data/{}/{}.json".format(BASE_PATH,gene,gene),"r") as json_file:
        data = json.load(json_file)
    build_nums.append(data["status"]["build_attempts"][-1]["build_number"])
    wells.append(data["status"]["build_attempts"][-1]["build_well"])

build_map["Build"] = build_nums
build_map["Well"] = wells

build_map = build_map.sort_values(["Build"],ascending=False)

build_map
# print(results)

Unnamed: 0,Gene,Destination,Result,Picked,Build,Well
0,BBF10K_000632,A1,Good,BBF10K_000632,build008,A1
11,BBF10K_000354,D2,Good,BBF10K_000354,build008,D2
1,BBF10K_000460,B1,Good,BBF10K_000460,build008,B1
22,BBF10K_000319,G3,Good,BBF10K_000319,build008,G3
21,BBF10K_000022,F3,Good,BBF10K_000022,build008,F3
20,BBF10K_000334,E3,Good,BBF10K_000334,build008,E3
19,BBF10K_000276,D3,Good,BBF10K_000276,build008,D3
16,BBF10K_000283,A3,Good,BBF10K_000283,build008,A3
14,BBF10K_000018,G2,Good,BBF10K_000018,build008,G2
13,BBF10K_000051,F2,Good,BBF10K_000051,build008,F2
