# Create EDD Study Files From Data

This notebook creates the files needed for importing a study into Experiment Data Depot (EDD)

## Inputs and outputs

#### Required file to run this notebook:
   - `../data/flaviolin/DBTL0.2/media_descriptions.csv` - media designs
   
   - `../data/flaviolin/DBTL0.2/OD.xlsx` - production data


#### File generated by running this notebook:
   - `edd_experiment_description.csv`
   
   - `edd_protocol.csv`
 
    
The files are stored in the user defined directory.

## Setup

Importing needed libraries:

In [1]:
import sys
sys.path.append('../')

import pandas as pd
import openpyxl

from core import create_media_description

## User parameters

In [2]:
CYCLE = '0.2'

user_params = {
    'media_file': f'../data/flaviolin/DBTL{CYCLE}/media_descriptions.csv',  
    'measurement_file': f'../data/flaviolin/DBTL{CYCLE}/OD.xlsx',
    'output_file_path': f'../data/flaviolin/DBTL{CYCLE}', # Folder for output files,
    'num_replicates': 48,
    'num_designs': 1,
    'protocol_name': ['OD600', 'OD340'],
    'time_point': 48,
    'part_id': 'JBx_193086',
    'media': 'MOPS',
    'culture_volume': 15,
    'well_volume': 1500,
    'shaking_speed': 200,
    'temperature': 30,
#     'plate': 'm2p_flower_black_bottom'
    } 


In [3]:
df = pd.read_csv(user_params['media_file'], index_col=0)
df.head()

Unnamed: 0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4
A1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
B1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
C1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
D1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
E1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001


Create Line Description

In [4]:
df['Line Description'] = df.apply(create_media_description,axis=1)

Add metadata for media and replicates to craft Line Names

In [5]:
reps = user_params['num_replicates']
num_media_designs = user_params['num_designs']

lnfcn = lambda x: f'C{CYCLE}_W{x.name}-R{x["Replicate"]}'
df['Replicate'] = [i+1 for _ in range(num_media_designs) for i in range(reps)]
df['Line Name'] = df.apply(lnfcn, axis=1)

In [6]:
df.head(2)

Unnamed: 0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4,Line Description,Replicate,Line Name
A1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0040,...",1,C0.2_WA1-R1
B1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0040,...",2,C0.2_WB1-R2


Read measurements file

In [7]:
df_600 = pd.read_excel(user_params['measurement_file'], sheet_name='600', index_col=0)
df_340 = pd.read_excel(user_params['measurement_file'], sheet_name='340', index_col=0)

df_600

Unnamed: 0,1,2,3,4,5,6,7,8,9
A,0.0637,0.0592,0.059,0.0837,0.0703,0.0735,0.0582,0.0801,0.0365
B,0.0713,0.0555,0.0652,0.0739,0.0553,0.0826,0.085,0.0807,0.0469
C,0.0646,0.066,0.0559,0.0529,0.0655,0.0579,0.0506,0.067,0.0474
D,0.0684,0.0728,0.089,0.0796,0.0701,0.0604,0.0355,0.1384,0.0466
E,0.0727,0.0602,0.0669,0.0517,0.0516,0.0786,0.0856,0.036,0.0465
F,0.0604,0.0612,0.0716,0.0881,0.0632,0.0364,0.0368,0.0366,0.0475


In [8]:
df_340

Unnamed: 0,1,2,3,4,5,6,7,8,9
A,0.1881,0.1844,0.187,0.2359,0.2106,0.2164,0.1804,0.225,0.1189
B,0.2318,0.1913,0.2044,0.213,0.1781,0.2291,0.2413,0.2331,0.1353
C,0.1995,0.1976,0.1809,0.1741,0.1999,0.1879,0.173,0.1972,0.1357
D,0.1988,0.2164,0.244,0.2314,0.2047,0.1802,0.1188,0.3143,0.1355
E,0.2153,0.1934,0.2052,0.1751,0.1724,0.238,0.2454,0.1214,0.136
F,0.1907,0.1907,0.2133,0.2536,0.1987,0.1236,0.124,0.1236,0.1354


Normalize the data to the control well A9 value (water content):

In [9]:
zero_value_600 = df_600.at['A', 9]
df_600.loc[:, df_600.columns] -= zero_value_600
df_600

Unnamed: 0,1,2,3,4,5,6,7,8,9
A,0.0272,0.0227,0.0225,0.0472,0.0338,0.037,0.0217,0.0436,0.0
B,0.0348,0.019,0.0287,0.0374,0.0188,0.0461,0.0485,0.0442,0.0104
C,0.0281,0.0295,0.0194,0.0164,0.029,0.0214,0.0141,0.0305,0.0109
D,0.0319,0.0363,0.0525,0.0431,0.0336,0.0239,-0.001,0.1019,0.0101
E,0.0362,0.0237,0.0304,0.0152,0.0151,0.0421,0.0491,-0.0005,0.01
F,0.0239,0.0247,0.0351,0.0516,0.0267,-0.0001,0.0003,0.0001,0.011


Set negative values to zero, multiply the values by 10 to account for 10x dilution and eliminate the 9th column:

In [10]:
df_600[df_600 < 0] = 0
df_600.loc[:, df_600.columns] *= 10
df_600.drop(columns=9, inplace=True)
df_600

Unnamed: 0,1,2,3,4,5,6,7,8
A,0.272,0.227,0.225,0.472,0.338,0.37,0.217,0.436
B,0.348,0.19,0.287,0.374,0.188,0.461,0.485,0.442
C,0.281,0.295,0.194,0.164,0.29,0.214,0.141,0.305
D,0.319,0.363,0.525,0.431,0.336,0.239,0.0,1.019
E,0.362,0.237,0.304,0.152,0.151,0.421,0.491,0.0
F,0.239,0.247,0.351,0.516,0.267,0.0,0.003,0.001


Do the same for OD340:

In [11]:
zero_value_340 = df_340.at['A', 9]
df_340.loc[:, df_340.columns] -= zero_value_340
df_340[df_340 < 0] = 0
df_340.loc[:, df_340.columns] *= 10
df_340.drop(columns=9, inplace=True)
df_340

Unnamed: 0,1,2,3,4,5,6,7,8
A,0.692,0.655,0.681,1.17,0.917,0.975,0.615,1.061
B,1.129,0.724,0.855,0.941,0.592,1.102,1.224,1.142
C,0.806,0.787,0.62,0.552,0.81,0.69,0.541,0.783
D,0.799,0.975,1.251,1.125,0.858,0.613,0.0,1.954
E,0.964,0.745,0.863,0.562,0.535,1.191,1.265,0.025
F,0.718,0.718,0.944,1.347,0.798,0.047,0.051,0.047


Transform from wide to long format:

In [12]:
df_600 = df_600.melt(ignore_index=False, value_name='OD600')
df_340 = df_340.melt(ignore_index=False, value_name='OD340')

# Function for defining indeces
indfcn = lambda x: f'{x.name}{int(x["variable"])}'

df_600['Well'] = df_600.apply(indfcn, axis=1)
df_600.index = df_600['Well']
df_600.drop(columns=['variable', 'Well'], inplace=True)

df_340['Well'] = df_340.apply(indfcn, axis=1)
df_340.index = df_340['Well']
df_340.drop(columns=['variable', 'Well'], inplace=True)

df_600.head()

Unnamed: 0_level_0,OD600
Well,Unnamed: 1_level_1
A1,0.272
B1,0.348
C1,0.281
D1,0.319
E1,0.362


Add measurements to the main dataframe:

In [14]:
measurOD600 = user_params['protocol_name'][0]
measurOD340 = user_params['protocol_name'][1]
df[measurOD600] = df_600
df[measurOD340] = df_340
df.head(2)


Unnamed: 0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4,Line Description,Replicate,Line Name,OD600,OD340
A1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0040,...",1,C0.2_WA1-R1,0.272,0.692
B1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0040,...",2,C0.2_WB1-R2,0.348,1.129


## Create EDD Experiment Description File

In [15]:
df['Media'] = user_params['media']
df['Part ID'] = user_params['part_id']
df['Culture Volume'] = user_params['culture_volume']
df['Flask Volume'] = user_params['well_volume']
df['Growth Temperature'] = user_params['temperature']
df['Shaking speed'] = user_params['shaking_speed']
# df['Starting OD'] =
# df['Replicate Count'] = 24


In [16]:
# Invalid columns for now in EDD
# df['Humidity[%]'] = user_params['humidity']
# df['Plate'] = user_params['plate']

In [17]:
exp_descr_file = f'{user_params["output_file_path"]}/edd_experiment_description.xlsx'
df[['Line Name',
    'Line Description',
    'Part ID',
    'Media',
    'Culture Volume',
    'Flask Volume',
    'Growth Temperature',
    'Shaking speed',
]].to_excel(exp_descr_file, index=False)

## Create EDD Measurement File

OD600

In [18]:
measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD600}.xlsx'
df['Measurement Type'] = 'Optical Density'

df['Time'] = user_params['time_point']
df['Value'] = df[measurOD600]
df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)

OD340

In [19]:
measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD340}.xlsx'

df['Time'] = user_params['time_point']
df['Value'] = df[measurOD340]
df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)