In [7]:
import glob, os
import string
import math 
import pandas as pd
import numpy as np


## Input to modify
- Path to parameter file in tsv, make sure the format/column names are exactly the same as example: 'tests\data\sampler\input\parameters.tsv'. Column names are: Component, maxValue, stockConcentration, deadVolume, Ratios
- out_path: folder for ALL files (from sampler to instructor), please generate new folder for different samples, otherwise modification will be made with unnecessary files. It's possible to have different folders for convert,plates generate... however you have change file names manually at each step
- nb_sample: number of sample generated from parameter files using latin hypercube
- Volume_file: .tsv of experiments in volume.
- nb_replicate: number of replications
- volume_path: path and file name to save replicated samples
- min_volume/max_volume: machine transfer volumn limit, outside of this range the machine transfer volumn cannot guaranty to be correct, usually within [20; 1000] nL, Mahnaz has already tested up to 2000nL
- source_size: size of source plate
- inexpensive_component: list of component that can split into multiple well in source plates tp speed up transfer
- max_volume_source: the max value can fill in source plate
- optimal_volume_source: some volumn smaller than the max above, this is the level that inexpensive can be fill 
- dead_volumn of source wells
- extra: number < 1, represent the extra percentage of source concentration in source plate, use to calculate the final filling volumes
- to_split_component: list of name of components, where they need to be fill first or later than other components, therefore, theirs instructor will be seperated to feed first/later to ECHO
- viscous_component: change Source Plate Type into '384PP_AQ_CP' , instead of default '384PP_AQ_GP3'

NOTICE: all path or string should be within "" instead of ''

In [8]:
parameter_path = "al_test\\240202_param.txt"
out_path = "round3"

# generated sample 
nb_sample = 100

# volume
volumn_file = "/sampling_volumes.tsv"
volume_path = out_path + "/sampling_volumes.tsv"
nb_replicate = 6
shuffle = False

# machine transfer limit
min_volume = 20
max_volume = 1000

# source plate
source_size = [16,24]
inexpensive_component = ["Water"]
max_volume_source = 60000
optimal_volume_source = 30000
dead_volumn = 15000
extra = 0.5


# instrutor 
to_split_component = ["HEPES","Malachite green"]
viscous_component = ['PEG-8000']

Functions needed, no modify

In [9]:
def split_values(value, lower_limit, upper_limit, nb_column):
    int_divide = int(value // upper_limit)
    residual = value % upper_limit

    if (residual < lower_limit) & (int_divide > 0):
        full_column = int_divide - 1
    else:
        full_column = int_divide
    
    result = [upper_limit]*full_column
    result += [value - full_column*upper_limit]
    result += [0]*(nb_column-full_column-1)
    return result

def find_well(source_size, first_well, size):
    names_list = []

    # Get the first letters of the alphabet
    alphabet_letters = string.ascii_uppercase[:source_size[0]]

    # Generate combinations
    for number in range(1, source_size[1]):
        for letter in alphabet_letters:
            name = letter + str(number)
            names_list.append(name)
    
    #find where is the first index in name list
    start_idx = names_list.index(first_well)
    well_list = names_list[start_idx:(start_idx+size)]

    return well_list

def divide_volumn(dead_volumn, limit_volume, vol):
    capacity = limit_volume - dead_volumn
    nb_well = math.ceil(vol/capacity)
    result = [min(limit_volume, round((vol - i*capacity + dead_volumn),-2)) for i in range(nb_well)]
    return result

### Sample new experiments from latin hypercube
Skip if using result from active learning
- Input: Path to a .tsv file containing cfps parameters and features, optional: nb of samples and output folder
- Output: sampling.tsv
More info, please run these command in a seperated cell 


%%cmd 
python -m icfree.sampler --help


In [10]:
%run -m icfree.sampler {parameter_path} --nb-samples $nb_sample -of $out_path

[32mList of parameters[0m
[32m   Mg-glutamate	(6 possible values)[0m
[32m   K-glutamate	(6 possible values)[0m
[32m   Amino acid	(6 possible values)[0m
[32m   Spermidine	(6 possible values)[0m
[32m   3-PGA	(6 possible values)[0m
[32m   NTPs	(6 possible values)[0m
[32m   PEG-8000	(6 possible values)[0m
[32m   DNA	(6 possible values)[0m
[32m   HEPES	(1 possible values)[0m
[32m   Malachite green	(1 possible values)[0m
[32m[0m
[32mMaximum number of unique samples: 1679616[0m
[32m[0m


icfree.sampler 2.1.0



### Convert from concentration into volume of source mediums

- Input: Path to a .tsv parameter file and files, .tsv sampling (or active learning files), optional: -v (destination maximun volume, here 10.5 µL) and -of (output folder)
- Output: sampling_volumes.tsv
More info, please run these command in a seperated cell 


%%cmd 
python -m icfree.converter --help

In [11]:
%%cmd
python -m icfree.converter "al_test\240202_param.txt" "round3\round4_ei.csv" -v 10500 -of round3

Microsoft Windows [version 10.0.19045.3930]
(c) Microsoft Corporation. Tous droits r�serv�s.

(cellfree) c:\Users\tnhoang\Documents\icfree\icfree-ml>python -m icfree.converter "al_test\240202_param.txt" "round3\round4_ei.csv" -v 10500 -of round3


[32mConverting concentrations to volumes...[0m



(cellfree) c:\Users\tnhoang\Documents\icfree\icfree-ml>

### Some modifications:
- If add control, should add in volumn before this step
- Calculated balance volume for next step, aka the level of all wells, reached by add different water level to different well. This result (here 812nL) will be input for plate_generator command
- Multiple volume file n times, for repetitions 

In [12]:
volumn = pd.read_csv(out_path + volumn_file, sep='\t')
accumulate_volumn = np.sum(volumn, axis = 1)
max_plate = np.max(accumulate_volumn)
print(f'Well_volume {math.ceil(max_plate + min_volume)} nL')

# repeat samples
volumn = pd.concat([volumn]*nb_replicate, ignore_index=True)
if shuffle:
    volumn = volumn.sample(frac=1)
volumn.to_csv(volume_path, sep='\t')

Well_volume 5970 nL


### Generated sample position on destination plates and the water amount to add

- Input: Path to a .tsv parameter file and files .tsv sampling IN VOLUMN
- Optional: -v (number calculated above, here 812 µL), -of (output folder), -dsw (beginning well position for DESTINATION plate, here H3), --ssw (beginning well position for SOURCE plate, here B1)
- Output: destination_plate (.csv and .json) and source_plate(.csv and .json), also volumn_summary
More info, please run these command in a seperated cell 


%%cmd 
python -m icfree.plates_generator --help

In [13]:
%%cmd
python -m icfree.plates_generator "al_test\240202_param.txt" "round3\sampling_volumes.tsv" -of round3 -v 5970 -dsw A1 -ssw B1 

Microsoft Windows [version 10.0.19045.3930]
(c) Microsoft Corporation. Tous droits r�serv�s.

(cellfree) c:\Users\tnhoang\Documents\icfree\icfree-ml>python -m icfree.plates_generator "al_test\240202_param.txt" "round3\sampling_volumes.tsv" -of round3 -v 5970 -dsw A1 -ssw B1 



(cellfree) c:\Users\tnhoang\Documents\icfree\icfree-ml>

### Some modifications
- Modify destination column to fit within transfer volumn of ECHO, if the volume is too big, split column_A into column_A_1, column_A_2...within the limit, unless the residual (or last column) is smaller than lower limit, combine this residual with the previous volume (we prefer value bigger than upper limit, than smaller than lower limit)
- At the same time, modify source file .csv with the new column names. Also, re-calculate volume of source plate as total volume need multiple some extra percentages. Some expensive component will be kept in the least well the possible (to avoid waste by dead volumn), other inexpensive components will be fill to an optimal volume only, therefore machine has multiple source to take and can skip some waiting time

In [14]:
plate_list = glob.glob(os.path.join(out_path, 'destination_plate_*.csv'))

#import each destination plate
for plate in plate_list:
    plate_data = pd.read_csv(plate)

    #find which component volumn is too big
    idx = plate_data.columns[1:]
    component_max_vol = np.max(plate_data.iloc[:,1:], axis = 0)
    component_list = idx[component_max_vol > max_volume].tolist()

    for component in component_list:
        # calculate how many column to divide 
        highest = component_max_vol[component]
        res = highest % max_volume
        col = int(highest // max_volume)
        if  (res > min_volume) | (col == 0):
            col += 1

        # make new column name 
        name_list = []
        for order in range(col):
            name_list += [component + '_' + str(order + 1)]

        # Apply the function to create new columns
        plate_data[name_list] = pd.DataFrame(plate_data[component].apply(lambda x: split_values(x, min_volume, max_volume, col)).to_list(), index=plate_data.index)

        # Drop the original 'value' column if you want
        plate_data = plate_data.drop(columns=[component])
  

    # save the modify files
    plate_data.to_csv(plate, index= False)

    # open source file for the same destination files
    source = plate.replace("destination","source")
    source_data = pd.read_csv(source)


    # calculate source volumn needed
    volumn_need = np.sum(plate_data, axis = 0)[1:]
    volumn_need = np.array(volumn_need)*extra
    component_name = plate_data.columns[1:]

    # re-arrange column with desire volumn
    source_frame = pd.DataFrame([])
    for i in range(len(component_name)):
        name = component_name[i]
        vol = volumn_need[i]
        # divide columns according to inexpensive or expensive 
        if any([item in name for item in inexpensive_component]):
            result = divide_volumn(dead_volumn, optimal_volume_source, vol)
        else: 
            result = divide_volumn(dead_volumn, max_volume_source, vol)
        result = pd.DataFrame(result, columns=[component_name[i]])
        source_frame = pd.concat([source_frame,result], axis = 0, join = 'outer')
    
    # assign well column
    first_source_well = source_data.Well[0]
    nb__well_source = source_frame.shape[0]
    source_well_destination = find_well(source_size, first_source_well, nb__well_source)
    source_frame.insert(0, 'Well', source_well_destination)

    # save source file
    source_frame.to_csv(source, index=False)

### Translate from plate composition and destinations into machine instructors

- Input: Path to .json source file (or files) and .json destination file (or files)
- Optional: -of (output folder), -spt (to change Source_Plate_Type, here use CP instead of GP3)
- Output: instructors.csv and volumes_warning.txt (capture all warning)
More info, please run these command in a seperated cell 


%%cmd 
python -m icfree.instructor --help

In [15]:
%%cmd
python -m icfree.instructor --source_plates "round3\source_plate_1.json" --dest_plates "round3\destination_plate_1.json" -of round3 

Microsoft Windows [version 10.0.19045.3930]
(c) Microsoft Corporation. Tous droits r�serv�s.

(cellfree) c:\Users\tnhoang\Documents\icfree\icfree-ml>python -m icfree.instructor --source_plates "round3\source_plate_1.json" --dest_plates "round3\destination_plate_1.json" -of round3 

(cellfree) c:\Users\tnhoang\Documents\icfree\icfree-ml>

In [16]:
%%cmd 
python -m icfree.instructor --help

Microsoft Windows [version 10.0.19045.3930]
(c) Microsoft Corporation. Tous droits r�serv�s.

(cellfree) c:\Users\tnhoang\Documents\icfree\icfree-ml>python -m icfree.instructor --help


usage: Generates instructions for robots

options:
  -h, --help            show this help message and exit
  --source_plates SOURCE_PLATES [SOURCE_PLATES ...]
                        Path to .json files containing source plates
                        information
  --source_wells SOURCE_WELLS [SOURCE_WELLS ...]
                        Path to .csv/tsv files containing source wells
                        content. If set, overwrite "Wells" entry in .json
                        file.
  --dest_plates DEST_PLATES [DEST_PLATES ...]
                        Path to .json files containing destination plates
                        information
  --dest_wells DEST_WELLS [DEST_WELLS ...]
                        Path to .csv/tsv files containing dest wells content.
                        If set, overwrite "Wells" entry in .json file.
  --robot ROBOT         Name of the robot to use (default: echo)
  -of OUTPUT_FOLDER, --output-folder OUTPUT_FOLDER
                        Output folder to write o

### Delete no volumn instruction and slit instructions if some component needed to be run first/later

In [20]:
instruction_list = glob.glob(os.path.join(out_path, 'instructions*.csv'))

for file in instruction_list:
    instruction_data = pd.read_csv(file)

    # delete transfer has 0 volume
    idx = instruction_data['Transfer Volume'] == 0
    instruction_data = instruction_data[~idx]

    # change Source Plate Type of vicous component into 384PP_AQ_CP
    for name in viscous_component:
        idx = pd.Series([str(value).startswith(name) for value in instruction_data['Sample ID']], index = instruction_data.index)
        idx = idx.reindex(instruction_data.index)
        instruction_data.loc[idx, "Source Plate Type"] = "384PP_AQ_CP"

    # split into multiple instruction files depend on component
    for name in to_split_component:
        if not to_split_component:
            print('No component to split')
        idx = pd.Series([str(value).startswith(name) for value in instruction_data['Sample ID']], index = instruction_data.index)
        idx = idx.reindex(instruction_data.index)
        subset_instructor = instruction_data[idx]
        instruction_data = instruction_data[~idx]

        sub_name = file.replace(".csv","_" + name + ".csv")
        subset_instructor.to_csv(sub_name, index = False)
        
    instruction_data.to_csv(file, index = False)
