# Create EDD Study Files From Data

This notebook creates the files needed for importing a study into Experiment Data Depot (EDD).

## Inputs and outputs

### Required file to run this notebook:
   - `../data/DBTL7/media_descriptions.csv` - media designs for each of the wells
   
   - `../data/DBTL7/OD.xlsx` - production data from the plate reader


### File generated by running this notebook:
   - `edd_experiment_description.csv`
   
   - `edd_protocol.csv`
 
    
The files are stored in the user defined directory.

## Setup

Importing needed libraries:

In [2]:
import sys
sys.path.append('../../media_compiler')

import pandas as pd
import openpyxl

from core import create_media_description

### User parameters

In [3]:
CYCLE = 7

user_params = {
    'media_file': f'../data/DBTL{CYCLE}/media_descriptions.csv',  
    'measurement_file': f'../data/DBTL{CYCLE}/OD.xlsx',
    'output_file_path': f'../data/DBTL{CYCLE}', # Folder for output files,
    'num_replicates': 3,
    'num_designs': 16,
    'protocol_name': ['OD600', 'OD340'],
    'time_point': 48,
    'part_id': 'JBx_193086',
    'media': 'MOPS',
    'culture_volume': 15,
    'well_volume': 1500,
    'shaking_speed': 800,
    'temperature': 30,
    } 


In [4]:
df = pd.read_csv(user_params['media_file'], index_col=0)
df.head()

Unnamed: 0_level_0,MOPS[mM],Tricine[mM],H3BO3[mM],Glucose[mM],K2SO4[mM],K2HPO4[mM],FeSO4[mM],NH4Cl[mM],MgCl2[mM],NaCl[mM],(NH4)6Mo7O24[mM],CoCl2[mM],CuSO4[mM],MnSO4[mM],ZnSO4[mM],CaCl2[mM]
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
A1,40.0,4.0,0.033485,157.760522,0.964123,6.992958,0.067892,23.233056,1.93201,1313.377579,0.000108,0.002725,0.004785,0.007395,0.001752,0.035968
B1,40.0,4.0,0.033485,157.760522,0.964123,6.992958,0.067892,23.233056,1.93201,1313.377579,0.000108,0.002725,0.004785,0.007395,0.001752,0.035968
C1,40.0,4.0,0.033485,157.760522,0.964123,6.992958,0.067892,23.233056,1.93201,1313.377579,0.000108,0.002725,0.004785,0.007395,0.001752,0.035968
D1,40.0,4.0,0.073964,197.001031,0.805718,8.730541,0.024996,34.4809,1.287242,959.379367,0.000295,0.001569,0.005873,0.013567,0.000728,0.01677
E1,40.0,4.0,0.073964,197.001031,0.805718,8.730541,0.024996,34.4809,1.287242,959.379367,0.000295,0.001569,0.005873,0.013567,0.000728,0.01677


## Create Line Description

In [5]:
df['Line Description'] = df.apply(create_media_description, axis=1)


## Create Line Names

Add metadata for media and replicates to craft Line Names as **C[.]\_W[.]1\_[.]3-R[.]** denoting cycle number, wells occupying the same design and replicate number:

Check if it's row or column order of lines:

In [6]:
column_order = True if df.index[1][0] == 'B' else False

In [7]:
reps = user_params['num_replicates']
num_media_designs = user_params['num_designs']

def linefunction_colum_order(row):
    well = row.name
    if well[0] in 'ABC':
        return f'C{CYCLE}_WA{well[1]}_C{well[1]}-R{row["Replicate"]}'
    else:
        return f'C{CYCLE}_WD{well[1]}_F{well[1]}-R{row["Replicate"]}'                                                                                              

def linefunction_row_order(row):
    well = row.name
    if int(well[1]) < 5:
        return f'C{CYCLE}_W{well[0]}1_{well[0]}4-R{row["Replicate"]}'
    else:
        return f'C{CYCLE}_W{well[0]}5_{well[0]}8-R{row["Replicate"]}'                                                                                              

    
df['Replicate'] = [i+1 for _ in range(num_media_designs) for i in range(reps)]

if column_order:
    df['Line Name'] = df.apply(linefunction_colum_order, axis=1)
else:
    df['Line Name'] = df.apply(linefunction_row_order, axis=1)

In [8]:
df.head(2)

Unnamed: 0_level_0,MOPS[mM],Tricine[mM],H3BO3[mM],Glucose[mM],K2SO4[mM],K2HPO4[mM],FeSO4[mM],NH4Cl[mM],MgCl2[mM],NaCl[mM],(NH4)6Mo7O24[mM],CoCl2[mM],CuSO4[mM],MnSO4[mM],ZnSO4[mM],CaCl2[mM],Line Description,Replicate,Line Name
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
A1,40.0,4.0,0.033485,157.760522,0.964123,6.992958,0.067892,23.233056,1.93201,1313.377579,0.000108,0.002725,0.004785,0.007395,0.001752,0.035968,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",1,C7_WA1_C1-R1
B1,40.0,4.0,0.033485,157.760522,0.964123,6.992958,0.067892,23.233056,1.93201,1313.377579,0.000108,0.002725,0.004785,0.007395,0.001752,0.035968,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",2,C7_WA1_C1-R2


## Process measurement files

Read measurements file:

In [10]:
# df_600 = pd.read_excel(user_params['measurement_file'], sheet_name='600', index_col=0)
df_340 = pd.read_excel(user_params['measurement_file'], sheet_name='340', index_col=0)

# df_600

In [11]:
df_340

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,0.1148,0.4512,0.2727,0.1077,0.0973,0.8191,0.1212,0.1203,0.0839,0.0936,0.0925,0.0902
B,0.1061,0.4977,0.295,0.1071,0.0996,0.8159,0.1131,0.1169,0.0926,0.0909,0.0894,0.0858
C,0.0993,0.5047,0.2772,0.1011,0.0891,0.9093,0.1016,0.1072,0.0854,0.0868,0.0827,0.0805
D,0.0874,0.1399,0.0859,0.1019,0.0947,0.0792,0.117,0.3477,0.0794,0.0797,0.0762,0.0739
E,0.0833,0.1353,0.0829,0.0984,0.0917,0.0749,0.1138,0.3604,0.0752,0.0751,0.0721,0.07
F,0.0796,0.1312,0.0817,0.0977,0.0906,0.0733,0.1129,0.3847,0.0731,0.0717,0.0694,0.0665
G,0.0755,0.0687,0.0713,0.0716,0.0723,0.0701,0.0704,0.0721,0.0719,0.0705,0.0682,0.0653
H,0.0678,0.069,0.0704,0.07,0.0692,0.0688,0.0689,0.0693,0.07,0.0693,0.069,0.0671


### Process OD600

Normalize the data to the control well A9 value (water content):

In [10]:
zero_value_600 = df_600.at['A', 9]
df_600.loc[:, df_600.columns] -= zero_value_600
df_600

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,0.1337,0.0715,0.1027,0.1133,0.0991,0.0987,0.08,0.1299,0.0,0.0106,0.0106,0.0102
B,0.0751,0.0639,0.0947,0.0895,0.0915,0.0693,0.0713,0.0799,0.0111,0.011,0.0107,0.0101
C,0.1153,0.1017,0.1275,0.1426,0.0885,0.1283,0.114,0.1693,0.0109,0.0111,0.0108,0.01
D,0.123,0.0854,0.0719,0.0578,0.0604,0.0753,0.0673,0.1064,0.0108,0.0108,0.0105,0.0102
E,0.1124,0.0726,0.0612,0.0534,0.0594,0.0732,0.0621,0.098,0.0108,0.0106,0.0105,0.0099
F,0.1376,0.0938,0.0672,0.0514,0.0693,0.0765,0.0424,0.1275,0.011,0.0105,0.0105,0.0101
G,0.0104,0.0107,0.0115,0.0111,0.0108,0.0105,0.0104,0.0111,0.0111,0.0107,0.0104,0.0099
H,0.0105,0.0106,0.0103,0.0103,0.0102,0.0101,0.0101,0.0102,0.0105,0.0106,0.01,0.0097


Set negative values to zero, multiply the values by 10 to account for 10x dilution and keep only 8 columns and 6 rows:

In [11]:
df_600[df_600 < 0] = 0
df_600.loc[:, df_600.columns] *= 10
df_600 = df_600.iloc[:6,:8]
df_600

Unnamed: 0,1,2,3,4,5,6,7,8
A,1.337,0.715,1.027,1.133,0.991,0.987,0.8,1.299
B,0.751,0.639,0.947,0.895,0.915,0.693,0.713,0.799
C,1.153,1.017,1.275,1.426,0.885,1.283,1.14,1.693
D,1.23,0.854,0.719,0.578,0.604,0.753,0.673,1.064
E,1.124,0.726,0.612,0.534,0.594,0.732,0.621,0.98
F,1.376,0.938,0.672,0.514,0.693,0.765,0.424,1.275


### Process OD340

Do the same for OD340, except of multiplication by 10, as the OD340 measurements were taken from non-diluted samples:

In [12]:
zero_value_340 = df_340.at['A', 9]
df_340.loc[:, df_340.columns] -= zero_value_340
df_340[df_340 < 0] = 0
df_340 = df_340.iloc[:6,:8]
df_340

Unnamed: 0,1,2,3,4,5,6,7,8
A,0.0309,0.3673,0.1888,0.0238,0.0134,0.7352,0.0373,0.0364
B,0.0222,0.4138,0.2111,0.0232,0.0157,0.732,0.0292,0.033
C,0.0154,0.4208,0.1933,0.0172,0.0052,0.8254,0.0177,0.0233
D,0.0035,0.056,0.002,0.018,0.0108,0.0,0.0331,0.2638
E,0.0,0.0514,0.0,0.0145,0.0078,0.0,0.0299,0.2765
F,0.0,0.0473,0.0,0.0138,0.0067,0.0,0.029,0.3008


Transform wide to long format, matching the order of wells in index to the one from `df`:

In [13]:
if column_order:
    indfcn = lambda x: f'{x.name}{int(x["variable"])}'
    # df_600 = df_600.melt(ignore_index=False, value_name='OD600')
    df_340 = df_340.melt(ignore_index=False, value_name='OD340')
else:
    indfcn = lambda x: f'{x["variable"]}{(x.name)}'
    # df_600 = df_600.T.melt(ignore_index=False, value_name='OD600')
    df_340 = df_340.T.melt(ignore_index=False, value_name='OD340')

# df_600['Well'] = df_600.apply(indfcn, axis=1)
df_340['Well'] = df_340.apply(indfcn, axis=1)

# df_600.index = df_600['Well']
df_340.index = df_340['Well']

# df_600.drop(columns=['variable', 'Well'], inplace=True)
df_340.drop(columns=['variable', 'Well'], inplace=True)

# df_600.head()

Add measurements to the main dataframe:

In [14]:
# measurOD600 = user_params['protocol_name'][0]
measurOD340 = user_params['protocol_name'][1]
# df[measurOD600] = df_600
df[measurOD340] = df_340
df.head(2)


Unnamed: 0_level_0,MOPS[mM],Tricine[mM],H3BO3[mM],Glucose[mM],K2SO4[mM],K2HPO4[mM],FeSO4[mM],NH4Cl[mM],MgCl2[mM],NaCl[mM],(NH4)6Mo7O24[mM],CoCl2[mM],CuSO4[mM],MnSO4[mM],ZnSO4[mM],CaCl2[mM],Line Description,Replicate,Line Name,OD340
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
A1,40.0,4.0,0.033485,157.760522,0.964123,6.992958,0.067892,23.233056,1.93201,1313.377579,0.000108,0.002725,0.004785,0.007395,0.001752,0.035968,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",1,C7_WA1_C1-R1,0.0309
B1,40.0,4.0,0.033485,157.760522,0.964123,6.992958,0.067892,23.233056,1.93201,1313.377579,0.000108,0.002725,0.004785,0.007395,0.001752,0.035968,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",2,C7_WA1_C1-R2,0.0222


## Create EDD Experiment Description File

In [15]:
df['Media'] = user_params['media']
df['Part ID'] = user_params['part_id']
df['Culture Volume'] = user_params['culture_volume']
df['Flask Volume'] = user_params['well_volume']
df['Growth Temperature'] = user_params['temperature']
df['Shaking speed'] = user_params['shaking_speed']
# df['Starting OD'] =
# df['Replicate Count'] = 24


In [16]:
# Invalid columns for now in EDD
# df['Humidity[%]'] = user_params['humidity']
# df['Plate'] = user_params['plate']

In [17]:
exp_descr_file = f'{user_params["output_file_path"]}/edd_experiment_description.xlsx'
df[['Line Name',
    'Line Description',
    'Part ID',
    'Media',
    'Culture Volume',
    'Flask Volume',
    'Growth Temperature',
    'Shaking speed',
]].to_excel(exp_descr_file, index=False)

## Create EDD Measurement File

OD600

In [19]:
# measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD600}.xlsx'
# df['Measurement Type'] = 'Optical Density'

# df['Time'] = user_params['time_point']
# df['Value'] = df[measurOD600]
# df['Units'] = 'n/a'
# df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)

OD340

In [20]:
# measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD340}.xlsx'

# df['Time'] = user_params['time_point']
# df['Value'] = df[measurOD340]
# df['Units'] = 'n/a'
# df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)

OD

In [21]:
measurement_file = f'{user_params["output_file_path"]}/edd_OD.xlsx'
df['Measurement Type'] = measurOD340

df['Time'] = user_params['time_point']
df['Value'] = df[measurOD340]
# df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)