# Assembly ECHO dispense
### Takes the postional gRNA information from L+D and dispenses 

<b>Input Files </b>
- The notebook takes positional gRNA units from the destination plate of the L+D 
- Those positional gRNA units are then combined into arrays based on the input file

<b>Output Files </b>
- .csv ECHO dispense sheet for plasmid assemblies of 1-60 
- Always dispenses 1 uL of each linker and vector, 0.5 uL of cutsmart

<b>Notes for new users </b>
- If using more or less than 4 gRNAs, some changes are necessary
- Changes also necessary if the number of gRNA specific locations is greater than 40 (e.g., impinges upon the deadvolume of PP384 ECHO plate)
- Changes necessary if number of arrays of a specific vector are greater than 40 for the same reason aboe
    - May be adjusted by adding another vector step in the ligation + digestion and allocation at here with a "_2" suffix

#### Import libraries and relevant files

In [1]:
import matplotlib.pylab as plt
import chart_studio.plotly as py
import numpy as np
import pandas as pd
import array
import random
import string
import os
from datetime import datetime
from collections import defaultdict

In [2]:
cycle = 6

dispense_path= f"dbtl{cycle}"
timestamp = datetime.now().strftime("%Y%m%d")

In [3]:
filename = f'{timestamp}_ECHO_assemblies_DBTL{cycle}.csv'
data=pd.read_csv(os.path.join(dispense_path,f'{timestamp}_Targets_DBTL{cycle}.csv'))
df=pd.DataFrame(data)
data.head(10)

Unnamed: 0,number_of_grna,gRNA_1,gRNA_2,gRNA_3
0,3,PP_0226,PP_0437,PP_0751
1,3,PP_0813,PP_1769,PP_4191
2,3,PP_0226,PP_0597,PP_4678
3,3,PP_0999,PP_3578,PP_5186
4,3,PP_0999,PP_4120,PP_4191
5,3,PP_1319,PP_1457,PP_4192
6,3,PP_1319,PP_4121,PP_4667
7,3,PP_1444,PP_4189,PP_4191
8,3,PP_1506,PP_4651,PP_4862
9,3,PP_2112,PP_2136,PP_4678


* Include the vector, then relabel the gRNAs based on their position by fitting the string "_{i}" according to column label

In [4]:
temp_df = df.copy() #useful to avoid iterating when rerunning
temp_df['number_of_grna'] = 'Vector_' + temp_df['number_of_grna'].astype(str)

# Adding suffixes to the gRNA columns
for i in range(1, 4):
    temp_df[f'gRNA_{i}'] = temp_df[f'gRNA_{i}'] + f'_{i}'

temp_df

Unnamed: 0,number_of_grna,gRNA_1,gRNA_2,gRNA_3
0,Vector_3,PP_0226_1,PP_0437_2,PP_0751_3
1,Vector_3,PP_0813_1,PP_1769_2,PP_4191_3
2,Vector_3,PP_0226_1,PP_0597_2,PP_4678_3
3,Vector_3,PP_0999_1,PP_3578_2,PP_5186_3
4,Vector_3,PP_0999_1,PP_4120_2,PP_4191_3
5,Vector_3,PP_1319_1,PP_1457_2,PP_4192_3
6,Vector_3,PP_1319_1,PP_4121_2,PP_4667_3
7,Vector_3,PP_1444_1,PP_4189_2,PP_4191_3
8,Vector_3,PP_1506_1,PP_4651_2,PP_4862_3
9,Vector_3,PP_2112_1,PP_2136_2,PP_4678_3


In [5]:
# Assign the result to all_dispense
df.loc[:, ['number_of_grna', 'gRNA_1', 'gRNA_2', 'gRNA_3', 'gRNA_4']] = temp_df[['number_of_grna', 'gRNA_1', 'gRNA_2', 'gRNA_3', 'gRNA_4']]
df.head(10)

SyntaxError: invalid syntax (1409883177.py, line 2)

Reindex the DataFrame and then use a dictionary to map to a 384 well, assuming those are the wells we want to use (A1..)

In [None]:
#384-well plate Dictionary
well_count = {'Value': range(1, 385)}
df384 = pd.DataFrame(well_count)

# Dictionary for a 384-well plate
rows = list(string.ascii_uppercase[:16])  # 'A' to 'P'
wells = [f"{row}{col}" for row in rows for col in range(1, 25)]
df384_translate = {i: wells[i-1] for i in range(1, 385)}

In [None]:
# Remapping source plate for ECHO
df.index = range(1, len(df) + 1)
df['Well'] = df.index.map(df384_translate)
new_order=['Well', 'number_of_grna','gRNA_1','gRNA_2','gRNA_3','gRNA_4']
df=df[new_order]
df.head()

* Assuming that we are using a maximum of 4 gRNAs + 1 vector, we will have a reaction size of 6.0 uL
* The reaction table will be as follows:
* Assume the following reaction:

| Reagent | Volume (uL) |
|:---|:---|
| gRNAs | 1.0 |
| Vector | 1.0 |
| Cutsmart buffer | 0.5 |
| Water | 4.5 - count(gRNAs) |

Add columns for water_values and buffer_values

In [None]:
def calculate_water_val(grna):
    # Extract the numeric part from the string
    value = int(grna.split('_')[-1])
    # Perform the calculation
    return 4.5 - value

# Apply the function to the 'number_of_grna' column and create a new column 'Water_val'
df['water_volume'] = df['number_of_grna'].apply(calculate_water_val)
df['buffer_volume'] = 0.5
df

In [None]:
# Function to extract the gRNA type and return it
def extract_grna_type(grna):
    # Extract the numeric part from the string
    return int(grna.split('_')[-1])

df['gRNA_type'] = df['number_of_grna'].apply(extract_grna_type)

# Group by the gRNA type and count the occurrences
gRNA_counts = df['gRNA_type'].value_counts().sort_index()

In [None]:
# Ensure the 'Count', 'plasmid', and 'water' columns are numeric, coerce errors to NaN
df_temp=df
df_temp.loc[:,'water_volume'] = pd.to_numeric(df['water_volume'], errors='coerce')
df_temp.loc[:,'buffer_volume'] = pd.to_numeric(df['buffer_volume'], errors='coerce')
sum_buffer = (df_temp['buffer_volume']).sum()
sum_water = (df_temp['water_volume']).sum()
maximum_plasmid=gRNA_counts.max()

# Print the results
print(f"Maximum plasmid volume is {maximum_plasmid}")
print(f"Total water dispense is: {sum_water}")
print(f"Total buffer dispense is: {sum_buffer}")

### Import the assembly dispense list
* The destination plate from the ligation and digestion is purified and then it becomes the dispense plate for the electroporations

In [None]:
ECHO_Source=pd.read_csv(os.path.join(dispense_path,f'{timestamp}_ECHO_Assembly_Source_Map_DBTL{cycle}.csv'))

ECHO_Source=ECHO_Source[['gRNA_Target']]
ECHO_Source.head()

In [None]:
sum_water=sum_water.round(0)
sum_buffer=sum_buffer.round(0)

water_aliquot=65
working_vol=water_aliquot-20

water_wells = np.ceil(sum_water/working_vol)
buffer_wells=np.ceil(sum_buffer/working_vol)

buffer_rows_list=[]
water_rows_list=[]

for i in range(int(water_wells)):
    water_row = {f"Water_{i+1}"}
    water_rows_list.append(water_row)
    
for i in range(int(buffer_wells)):
    buffer_row = {f"Buffer_{i+1}"}
    buffer_rows_list.append(buffer_row)
    
water_df=pd.DataFrame(water_rows_list,columns=ECHO_Source.columns)
buffer_df=pd.DataFrame(buffer_rows_list,columns=ECHO_Source.columns)

temp_df=ECHO_Source.copy()
temp_df=pd.concat([ECHO_Source, water_df,buffer_df], ignore_index=True)

ECHO_Source=temp_df[['gRNA_Target']]
ECHO_Source.tail()

In [None]:
ECHO_Source.index = range(1, len(ECHO_Source) + 1)
ECHO_Source['Well'] = ECHO_Source.index.map(df384_translate)
new_order=['Well', 'gRNA_Target']
ECHO_Source=ECHO_Source[new_order]
ECHO_Source

In [None]:
# Initialize variables
running_sum = 0

water_counter = 1
buffer_counter = 1

water_list = []
buffer_list =[] 

# Loop through the DataFrame and assign wells
for value in df['water_volume']:
    running_sum += value
    if running_sum > 40:
        water_counter += 1
        running_sum = value  # start new well with current value
    water_list.append(f'Water_{water_counter}')

df['water_source'] = water_list
running_sum = 0

# Loop through the DataFrame and assign wells
for value2 in df['buffer_volume']:
    running_sum += value2
    if running_sum > 40:
        buffer_counter += 1
        running_sum = value2  # start new well with current value
    buffer_list.append(f'Buffer_{buffer_counter}')

df['buffer_source'] = buffer_list
df

In [None]:
dispense_dict = ECHO_Source.set_index('gRNA_Target')['Well'].to_dict()

In [None]:
# Replace the prefix, suffix, and gRNA_Target in the source DataFrame using the dictionary
for column in df.columns:
    df[column] = df[column].map(dispense_dict).fillna(df[column])

df.head()

In [None]:
df = df.rename(columns={'Well': 'Destination Well'})
df.head()

#### Consolidate individual dispenses
* We want a three column output with Source Well, volume, and destination well

| Sample Group | Source Plate Type | Source Well | Source Plate Name | Destination Plate Name | Destination Well | Transfer Volume |
|--------------|-------------------|-------------|-------------------|------------------------|------------------|-----------------|
| AQ           | 384PP_AQ_SP2      | A1          | Parts_Library_1   | Digest_and_Ligate      | A1               | 2000            |
| AQ           | 384PP_AQ_SP2      | A2          | Parts_Library_1   | Digest_and_Ligate      | A2               | 2000            |

In [None]:
gRNA_suffixes = ['1','2','3','4']
gRNA_dfs=[]
for suffix in gRNA_suffixes:
    #Rename the gRNA_suffix column to the Source Well
    gRNA_dfs.append(df[['Destination Well', f'gRNA_{suffix}']].rename(columns={f'gRNA_{suffix}': 'Source Well'}))

vectors = ['Destination Well','number_of_grna']
vectors_df=df[vectors].rename(columns={'number_of_grna':'Source Well'})

#appending the vectors to the gRNAs
gRNA_dfs.append(vectors_df)

In [None]:
# Concatenate all DataFrames in the list
gRNAs_df = pd.concat(gRNA_dfs, axis=0, ignore_index=True)
gRNAs_df_filtered = gRNAs_df[gRNAs_df['Source Well'].notna()]

#Setting all volumes to be 1 for each positional guide arrangement
gRNAs_slice_df=gRNAs_df_filtered.copy()
gRNAs_slice_df.loc[:, 'Transfer Volume']=1
gRNAs_slice_df

In [None]:
other=['buffer_source','buffer_volume','Destination Well']
water=['water_source','water_volume','Destination Well']

# Extracting columns and creating DataFrames
other_df = pd.DataFrame(df[other])
other_df = other_df.rename(columns={'buffer_source': 'Source Well'})
other_df = other_df.rename(columns={'buffer_volume':'Transfer Volume'})

water_df = pd.DataFrame(df[water])
water_df = water_df.rename(columns={'water_source': 'Source Well'})
water_df = water_df.rename(columns={'water_volume':'Transfer Volume'})         

# Concatenate the data frames so that everything is on the same list
CRISPRi_Assemblies = []
CRISPRi_Assemblies = pd.concat([gRNAs_slice_df, water_df, other_df], axis=0)
CRISPRi_Assemblies

In [None]:
ECHO_columns = {
    "Sample Group": "AQ",
    "Source Plate Name": f"Parts_Library_DBTL{cycle}",
    "Destination Plate Name": f"CRISPRi_Assemblies_DBTL{cycle}",
    "Source Plate Type": "384PP_AQ_SP2"
}

# Add new columns to DataFrame
CRISPRi_Assemblies = CRISPRi_Assemblies.assign(**ECHO_columns)
CRISPRi_Assemblies.head(1)

In [None]:
#Reorganize the DataFrame headers
columns_order = [3, 6, 1, 4, 5, 0, 2]  # Ordering according to the ECHO
CRISPRi_Assemblies = CRISPRi_Assemblies[CRISPRi_Assemblies.columns[columns_order]]
CRISPRi_Assemblies['Transfer Volume'] = CRISPRi_Assemblies['Transfer Volume'].multiply(1000).round()
CRISPRi_Assemblies

### Save the file under Dispense_Files

In [None]:
CRISPRi_Assemblies = CRISPRi_Assemblies.sort_values('Destination Well',ascending=(True))

CRISPRi_Assemblies.to_csv(os.path.join(dispense_path, f'{timestamp}_ECHO_Assembly_Dispense_DBTL{cycle}.csv'), index=False)