# Create EDD Study Files From Data

This notebook creates the files needed for importing a study into Experiment Data Depot (EDD)

## Inputs and outputs

#### Required file to run this notebook:
   - `../data/flaviolin/DBTL0.3/media_descriptions.csv` - media designs
   
   - `../data/flaviolin/DBTL0.3/OD.xlsx` - production data


#### File generated by running this notebook:
   - `edd_experiment_description.csv`
   
   - `edd_protocol.csv`
 
    
The files are stored in the user defined directory.

## Setup

Importing needed libraries:

In [1]:
import sys
sys.path.append('../')

import pandas as pd
import openpyxl

from core import create_media_description

## User parameters

In [2]:
CYCLE = '0.3'

user_params = {
    'media_file': f'../data/flaviolin/DBTL{CYCLE}/media_descriptions.csv',  
    'measurement_file': f'../data/flaviolin/DBTL{CYCLE}/OD.xlsx',
    'output_file_path': f'../data/flaviolin/DBTL{CYCLE}', # Folder for output files,
    'num_replicates': 48,
    'num_designs': 1,
    'protocol_name': ['OD600', 'OD340'],
    'time_point': 48,
    'part_id': 'JBx_193086',
    'media': 'MOPS',
    'culture_volume': 15,
    'well_volume': 1500,
    'shaking_speed': 600,
    'temperature': 30,
    } 


In [3]:
df = pd.read_csv(user_params['media_file'], index_col=0)
df.head()

Unnamed: 0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4
A1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
B1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
C1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
D1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
E1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001


Create Line Description

In [4]:
df['Line Description'] = df.apply(create_media_description,axis=1)

Add metadata for media and replicates to craft Line Names

In [5]:
reps = user_params['num_replicates']
num_media_designs = user_params['num_designs']

lnfcn = lambda x: f'C{CYCLE}_W{x.name}-R{x["Replicate"]}'
df['Replicate'] = [i+1 for _ in range(num_media_designs) for i in range(reps)]
df['Line Name'] = df.apply(lnfcn, axis=1)

In [6]:
df.head(2)

Unnamed: 0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4,Line Description,Replicate,Line Name
A1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0040,...",1,C0.3_WA1-R1
B1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0040,...",2,C0.3_WB1-R2


Read measurements file

In [7]:
df_600 = pd.read_excel(user_params['measurement_file'], sheet_name='600', index_col=0)
df_340 = pd.read_excel(user_params['measurement_file'], sheet_name='340', index_col=0)

df_600

Unnamed: 0,1,2,3,4,5,6,7,8,9
A,0.0674,0.069,0.0677,0.0534,0.0447,0.0629,0.0738,0.0602,0.0358
B,0.0618,0.0641,0.0622,0.0549,0.062,0.0603,0.0695,0.0626,0.0467
C,0.073,0.0677,0.0722,0.0714,0.0754,0.0779,0.0669,0.0589,0.0463
D,0.0794,0.0765,0.0916,0.0745,0.0792,0.0684,0.0695,0.0717,0.0464
E,0.0713,0.0628,0.0707,0.0757,0.0743,0.0729,0.0723,0.0766,0.0464
F,0.0759,0.0701,0.0685,0.0724,0.0646,0.0732,0.0621,0.0659,0.0466


In [8]:
df_340

Unnamed: 0,1,2,3,4,5,6,7,8,9
A,0.3901,0.4056,0.4055,0.3481,0.3291,0.3509,0.3794,0.376,0.1179
B,0.3952,0.3795,0.3808,0.3514,0.3978,0.3702,0.3733,0.3792,0.1353
C,0.3826,0.3652,0.3786,0.3798,0.3868,0.3447,0.3871,0.3924,0.1341
D,0.4078,0.3851,0.3912,0.3452,0.3984,0.3991,0.393,0.4125,0.1343
E,0.3978,0.3834,0.3928,0.4021,0.397,0.4025,0.3943,0.4087,0.1346
F,0.4104,0.4028,0.3975,0.4001,0.4093,0.4118,0.402,0.423,0.1356


Normalize the data to the control well A9 value (water content):

In [9]:
zero_value_600 = df_600.at['A', 9]
df_600.loc[:, df_600.columns] -= zero_value_600
df_600

Unnamed: 0,1,2,3,4,5,6,7,8,9
A,0.0316,0.0332,0.0319,0.0176,0.0089,0.0271,0.038,0.0244,0.0
B,0.026,0.0283,0.0264,0.0191,0.0262,0.0245,0.0337,0.0268,0.0109
C,0.0372,0.0319,0.0364,0.0356,0.0396,0.0421,0.0311,0.0231,0.0105
D,0.0436,0.0407,0.0558,0.0387,0.0434,0.0326,0.0337,0.0359,0.0106
E,0.0355,0.027,0.0349,0.0399,0.0385,0.0371,0.0365,0.0408,0.0106
F,0.0401,0.0343,0.0327,0.0366,0.0288,0.0374,0.0263,0.0301,0.0108


Set negative values to zero, multiply the values by 10 to account for 10x dilution and eliminate the 9th column:

In [10]:
df_600[df_600 < 0] = 0
df_600.loc[:, df_600.columns] *= 10
df_600.drop(columns=9, inplace=True)
df_600

Unnamed: 0,1,2,3,4,5,6,7,8
A,0.316,0.332,0.319,0.176,0.089,0.271,0.38,0.244
B,0.26,0.283,0.264,0.191,0.262,0.245,0.337,0.268
C,0.372,0.319,0.364,0.356,0.396,0.421,0.311,0.231
D,0.436,0.407,0.558,0.387,0.434,0.326,0.337,0.359
E,0.355,0.27,0.349,0.399,0.385,0.371,0.365,0.408
F,0.401,0.343,0.327,0.366,0.288,0.374,0.263,0.301


Do the same for OD340, except of multiplication by 10, as the OD340 measurements were taken from non-diluted samples:

In [11]:
zero_value_340 = df_340.at['A', 9]
df_340.loc[:, df_340.columns] -= zero_value_340
df_340[df_340 < 0] = 0
df_340.drop(columns=9, inplace=True)
df_340

Unnamed: 0,1,2,3,4,5,6,7,8
A,0.2722,0.2877,0.2876,0.2302,0.2112,0.233,0.2615,0.2581
B,0.2773,0.2616,0.2629,0.2335,0.2799,0.2523,0.2554,0.2613
C,0.2647,0.2473,0.2607,0.2619,0.2689,0.2268,0.2692,0.2745
D,0.2899,0.2672,0.2733,0.2273,0.2805,0.2812,0.2751,0.2946
E,0.2799,0.2655,0.2749,0.2842,0.2791,0.2846,0.2764,0.2908
F,0.2925,0.2849,0.2796,0.2822,0.2914,0.2939,0.2841,0.3051


Transform wide to long format

In [12]:
df_600 = df_600.melt(ignore_index=False, value_name='OD600')
df_340 = df_340.melt(ignore_index=False, value_name='OD340')

indfcn = lambda x: f'{x.name}{int(x["variable"])}'

df_600['Well'] = df_600.apply(indfcn, axis=1)
df_600.index = df_600['Well']
df_600.drop(columns=['variable', 'Well'], inplace=True)

df_340['Well'] = df_340.apply(indfcn, axis=1)
df_340.index = df_340['Well']
df_340.drop(columns=['variable', 'Well'], inplace=True)

df_600.head()

Unnamed: 0_level_0,OD600
Well,Unnamed: 1_level_1
A1,0.316
B1,0.26
C1,0.372
D1,0.436
E1,0.355


In [13]:
measurOD600 = user_params['protocol_name'][0]
measurOD340 = user_params['protocol_name'][1]
df[measurOD600] = df_600
df[measurOD340] = df_340
df.head(2)


Unnamed: 0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4,Line Description,Replicate,Line Name,OD600,OD340
A1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0040,...",1,C0.3_WA1-R1,0.316,0.2722
B1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0040,...",2,C0.3_WB1-R2,0.26,0.2773


## Create EDD Experiment Description File

In [14]:
df['Media'] = user_params['media']
df['Part ID'] = user_params['part_id']
df['Culture Volume'] = user_params['culture_volume']
df['Flask Volume'] = user_params['well_volume']
df['Growth Temperature'] = user_params['temperature']
df['Shaking speed'] = user_params['shaking_speed']
# df['Starting OD'] =
# df['Replicate Count'] = 24


In [15]:
# Invalid columns for now in EDD
# df['Humidity[%]'] = user_params['humidity']
# df['Plate'] = user_params['plate']

In [16]:
exp_descr_file = f'{user_params["output_file_path"]}/edd_experiment_description.xlsx'
df[['Line Name',
    'Line Description',
    'Part ID',
    'Media',
    'Culture Volume',
    'Flask Volume',
    'Growth Temperature',
    'Shaking speed',
]].to_excel(exp_descr_file, index=False)

## Create EDD Measurement File

OD600

In [17]:
measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD600}.xlsx'
df['Measurement Type'] = 'Optical Density'

df['Time'] = user_params['time_point']
df['Value'] = df[measurOD600]
df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)

OD340

In [18]:
measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD340}.xlsx'

df['Time'] = user_params['time_point']
df['Value'] = df[measurOD340]
df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)