# Create EDD Study Files From Data

This notebook creates the files needed for importing a study into Experiment Data Depot (EDD).

## Inputs and outputs

### Required file to run this notebook:
   - `../data/flaviolin/DBTL1/media_descriptions.csv` - media designs for each of the wells
   
   - `../data/flaviolin/DBTL1/OD.xlsx` - production data from the plate reader


### File generated by running this notebook:
   - `edd_experiment_description.csv`
   
   - `edd_protocol.csv`
 
    
The files are stored in the user defined directory.

## Setup

Importing needed libraries:

In [1]:
import sys
sys.path.append('../')

import pandas as pd
import openpyxl

from core import create_media_description

### User parameters

In [2]:
CYCLE = 1

user_params = {
    'media_file': f'../data/flaviolin/DBTL{CYCLE}/media_descriptions.csv',  
    'measurement_file': f'../data/flaviolin/DBTL{CYCLE}/OD.xlsx',
    'output_file_path': f'../data/flaviolin/DBTL{CYCLE}', # Folder for output files,
    'num_replicates': 4,
    'num_designs': 12,
    'protocol_name': ['OD600', 'OD340'],
    'time_point': 48,
    'part_id': 'JBx_193086',
    'media': 'MOPS',
    'culture_volume': 15,
    'well_volume': 1500,
    'shaking_speed': 800,
    'temperature': 30,
    } 


In [3]:
df = pd.read_csv(user_params['media_file'], index_col=0)
df.head()

Unnamed: 0_level_0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
A1,40.0,4.0,0.003744,20.0,0.029492,3.995413,0.042847,6.490478,0.214493,197.916509,1.6e-05,0.000254,0.000309,0.006371,0.000866
A2,40.0,4.0,0.003744,20.0,0.029492,3.995413,0.042847,6.490478,0.214493,197.916509,1.6e-05,0.000254,0.000309,0.006371,0.000866
A3,40.0,4.0,0.003744,20.0,0.029492,3.995413,0.042847,6.490478,0.214493,197.916509,1.6e-05,0.000254,0.000309,0.006371,0.000866
A4,40.0,4.0,0.003744,20.0,0.029492,3.995413,0.042847,6.490478,0.214493,197.916509,1.6e-05,0.000254,0.000309,0.006371,0.000866
A5,40.0,4.0,0.009185,20.0,0.380499,1.156774,0.007971,11.618192,5.030266,50.60791,0.000155,0.000152,2.9e-05,0.00041,0.000277


## Create Line Description

In [4]:
df['Line Description'] = df.apply(create_media_description, axis=1)


## Create Line Names

Add metadata for media and replicates to craft Line Names as **C[.]\_W[.]1\_[.]4-R[.]** denoting cycle number, wells occupying the same design and replicate number:

In [5]:
reps = user_params['num_replicates']
num_media_designs = user_params['num_designs']

lnfcn = lambda x: f'C{CYCLE}_W{x.name[0]}1_{x.name[0]}4-R{x["Replicate"]}' if int(x.name[1]) < 5 else f'C{CYCLE}_W{x.name[0]}5_{x.name[0]}8-R{x["Replicate"]}'
df['Replicate'] = [i+1 for _ in range(num_media_designs) for i in range(reps)]
df['Line Name'] = df.apply(lnfcn, axis=1)

In [6]:
df.head(2)

Unnamed: 0_level_0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4,Line Description,Replicate,Line Name
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
A1,40.0,4.0,0.003744,20.0,0.029492,3.995413,0.042847,6.490478,0.214493,197.916509,1.6e-05,0.000254,0.000309,0.006371,0.000866,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0037,...",1,C1_WA1_A4-R1
A2,40.0,4.0,0.003744,20.0,0.029492,3.995413,0.042847,6.490478,0.214493,197.916509,1.6e-05,0.000254,0.000309,0.006371,0.000866,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0037,...",2,C1_WA1_A4-R2


## Process measurement files

Read measurements file:

In [7]:
df_600 = pd.read_excel(user_params['measurement_file'], sheet_name='600', index_col=0)
df_340 = pd.read_excel(user_params['measurement_file'], sheet_name='340', index_col=0)

df_600

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,0.2379,0.0876,0.0796,0.0983,0.1059,0.1283,0.1367,0.1385,0.036,0.0462,0.0463,0.0459
B,0.218,0.0812,0.0865,0.0593,0.0861,0.105,0.0904,0.0896,0.0467,0.0467,0.0465,0.0457
C,0.0874,0.0854,0.0857,0.0877,0.1179,0.113,0.0439,0.1156,0.0465,0.0466,0.0463,0.0456
D,0.1334,0.1157,0.1406,0.1295,0.0711,0.076,0.0849,0.0438,0.0466,0.0466,0.0462,0.0458
E,0.1138,0.1007,0.1057,0.1029,0.0507,0.0517,0.0488,0.048,0.0466,0.0464,0.0461,0.0456
F,0.045,0.0419,0.0412,0.0414,0.1091,0.1123,0.1024,0.1155,0.0467,0.0463,0.046,0.0456
G,0.0461,0.0462,0.0469,0.0468,0.0465,0.0461,0.046,0.0468,0.0467,0.0464,0.0461,0.0456
H,0.0461,0.0463,0.0459,0.0459,0.0456,0.0455,0.0456,0.0457,0.0461,0.0462,0.0457,0.0455


In [8]:
df_340

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,0.5654,0.4637,0.4629,0.4834,0.3931,0.3987,0.4053,0.4063,0.1232,0.1343,0.1326,0.131
B,0.4516,0.3893,0.381,0.3928,0.7352,0.7064,0.743,0.7527,0.1389,0.1389,0.1346,0.1312
C,0.3893,0.3849,0.3853,0.3983,0.3552,0.3517,0.34,0.3639,0.1383,0.1382,0.1359,0.1311
D,0.4688,0.4683,0.4929,0.4812,0.319,0.3223,0.321,0.3011,0.1389,0.138,0.1357,0.1333
E,0.7368,0.7045,0.7216,0.7292,0.6231,0.6179,0.6272,0.6212,0.1398,0.1377,0.1366,0.133
F,0.4379,0.4379,0.4439,0.451,0.4088,0.4168,0.3947,0.4123,0.1397,0.1378,0.1363,0.1312
G,0.1316,0.1335,0.1379,0.1389,0.1385,0.138,0.1378,0.1389,0.1389,0.1368,0.1343,0.1309
H,0.1313,0.1331,0.1348,0.1348,0.1346,0.134,0.1341,0.1354,0.1342,0.1337,0.1329,0.1308


### Process OD600

Normalize the data to the control well A9 value (water content):

In [9]:
zero_value_600 = df_600.at['A', 9]
df_600.loc[:, df_600.columns] -= zero_value_600
df_600

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,0.2019,0.0516,0.0436,0.0623,0.0699,0.0923,0.1007,0.1025,0.0,0.0102,0.0103,0.0099
B,0.182,0.0452,0.0505,0.0233,0.0501,0.069,0.0544,0.0536,0.0107,0.0107,0.0105,0.0097
C,0.0514,0.0494,0.0497,0.0517,0.0819,0.077,0.0079,0.0796,0.0105,0.0106,0.0103,0.0096
D,0.0974,0.0797,0.1046,0.0935,0.0351,0.04,0.0489,0.0078,0.0106,0.0106,0.0102,0.0098
E,0.0778,0.0647,0.0697,0.0669,0.0147,0.0157,0.0128,0.012,0.0106,0.0104,0.0101,0.0096
F,0.009,0.0059,0.0052,0.0054,0.0731,0.0763,0.0664,0.0795,0.0107,0.0103,0.01,0.0096
G,0.0101,0.0102,0.0109,0.0108,0.0105,0.0101,0.01,0.0108,0.0107,0.0104,0.0101,0.0096
H,0.0101,0.0103,0.0099,0.0099,0.0096,0.0095,0.0096,0.0097,0.0101,0.0102,0.0097,0.0095


Set negative values to zero, multiply the values by 10 to account for 10x dilution and keep only 8 columns and 6 rows:

In [10]:
df_600[df_600 < 0] = 0
df_600.loc[:, df_600.columns] *= 10
df_600 = df_600.iloc[:6,:8]
df_600

Unnamed: 0,1,2,3,4,5,6,7,8
A,2.019,0.516,0.436,0.623,0.699,0.923,1.007,1.025
B,1.82,0.452,0.505,0.233,0.501,0.69,0.544,0.536
C,0.514,0.494,0.497,0.517,0.819,0.77,0.079,0.796
D,0.974,0.797,1.046,0.935,0.351,0.4,0.489,0.078
E,0.778,0.647,0.697,0.669,0.147,0.157,0.128,0.12
F,0.09,0.059,0.052,0.054,0.731,0.763,0.664,0.795


### Process OD340

Do the same for OD340, except of multiplication by 10, as the OD340 measurements were taken from non-diluted samples:

In [11]:
zero_value_340 = df_340.at['A', 9]
df_340.loc[:, df_340.columns] -= zero_value_340
df_340[df_340 < 0] = 0
df_340 = df_340.iloc[:6,:8]
df_340

Unnamed: 0,1,2,3,4,5,6,7,8
A,0.4422,0.3405,0.3397,0.3602,0.2699,0.2755,0.2821,0.2831
B,0.3284,0.2661,0.2578,0.2696,0.612,0.5832,0.6198,0.6295
C,0.2661,0.2617,0.2621,0.2751,0.232,0.2285,0.2168,0.2407
D,0.3456,0.3451,0.3697,0.358,0.1958,0.1991,0.1978,0.1779
E,0.6136,0.5813,0.5984,0.606,0.4999,0.4947,0.504,0.498
F,0.3147,0.3147,0.3207,0.3278,0.2856,0.2936,0.2715,0.2891


Transform wide to long format, matching the order of wells in index to the one from `df`:

In [12]:
df_600 = df_600.T.melt(ignore_index=False, value_name='OD600')
df_340 = df_340.T.melt(ignore_index=False, value_name='OD340')

indfcn = lambda x: f'{x["variable"]}{(x.name)}'

df_600['Well'] = df_600.apply(indfcn, axis=1)
df_600.index = df_600['Well']
df_600.drop(columns=['variable', 'Well'], inplace=True)

df_340['Well'] = df_340.apply(indfcn, axis=1)
df_340.index = df_340['Well']
df_340.drop(columns=['variable', 'Well'], inplace=True)

df_600.head()

Unnamed: 0_level_0,OD600
Well,Unnamed: 1_level_1
A1,2.019
A2,0.516
A3,0.436
A4,0.623
A5,0.699


Add measurements to the main dataframe:

In [13]:
measurOD600 = user_params['protocol_name'][0]
measurOD340 = user_params['protocol_name'][1]
df[measurOD600] = df_600
df[measurOD340] = df_340
df.head(2)


Unnamed: 0_level_0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4,Line Description,Replicate,Line Name,OD600,OD340
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
A1,40.0,4.0,0.003744,20.0,0.029492,3.995413,0.042847,6.490478,0.214493,197.916509,1.6e-05,0.000254,0.000309,0.006371,0.000866,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0037,...",1,C1_WA1_A4-R1,2.019,0.4422
A2,40.0,4.0,0.003744,20.0,0.029492,3.995413,0.042847,6.490478,0.214493,197.916509,1.6e-05,0.000254,0.000309,0.006371,0.000866,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0037,...",2,C1_WA1_A4-R2,0.516,0.3405


## Create EDD Experiment Description File

In [14]:
df['Media'] = user_params['media']
df['Part ID'] = user_params['part_id']
df['Culture Volume'] = user_params['culture_volume']
df['Flask Volume'] = user_params['well_volume']
df['Growth Temperature'] = user_params['temperature']
df['Shaking speed'] = user_params['shaking_speed']
# df['Starting OD'] =
# df['Replicate Count'] = 24


In [15]:
# Invalid columns for now in EDD
# df['Humidity[%]'] = user_params['humidity']
# df['Plate'] = user_params['plate']

In [16]:
exp_descr_file = f'{user_params["output_file_path"]}/edd_experiment_description.xlsx'
df[['Line Name',
    'Line Description',
    'Part ID',
    'Media',
    'Culture Volume',
    'Flask Volume',
    'Growth Temperature',
    'Shaking speed',
]].to_excel(exp_descr_file, index=False)

## Create EDD Measurement File

OD600

In [17]:
measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD600}.xlsx'
df['Measurement Type'] = 'Optical Density'

df['Time'] = user_params['time_point']
df['Value'] = df[measurOD600]
df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)

OD340

In [18]:
measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD340}.xlsx'

df['Time'] = user_params['time_point']
df['Value'] = df[measurOD340]
df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)