# Create EDD Study Files From Data

This notebook creates the files needed for importing a study into Experiment Data Depot (EDD)

## Inputs and outputs

#### Required file to run this notebook:
   - `../data/flaviolin/DBTL0.1/media_descriptions.csv` - media designs
   
   - `../data/flaviolin/DBTL0.1/OD.xlsx` - production data


#### File generated by running this notebook:
   - `edd_experiment_description.csv`
   
   - `edd_protocol.csv`
 
    
The files are stored in the user defined directory.

## Setup

Importing needed libraries:

In [19]:
import sys
sys.path.append('../')

import pandas as pd
import openpyxl

from core import create_media_description

### User parameters

In [20]:
CYCLE = '0.5'

user_params = {
    'media_file': f'../data/flaviolin/DBTL{CYCLE}/media_descriptions.csv',  
    'measurement_file': f'../data/flaviolin/DBTL{CYCLE}/OD.xlsx',
    'output_file_path': f'../data/flaviolin/DBTL{CYCLE}', # Folder for output files,
    'num_replicates': 24,
    'num_designs': 2,
    'protocol_name': ['OD600', 'OD340'],
    'time_point': 48,
    'part_id': 'JBx_193086',
    'media': 'MOPS',
    'culture_volume': 15,
    'well_volume': 1500,
    'shaking_speed': 800,
    'temperature': 30,
    } 


In [21]:
df = pd.read_csv(user_params['media_file'], index_col=0)
df.head()

Unnamed: 0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4
A1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
B1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
C1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
D1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
E1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001


## Create Line Description

In [22]:
df['Line Description'] = df.apply(create_media_description,axis=1)

Add metadata for media and replicates to craft Line Names

In [23]:
reps = user_params['num_replicates']
num_media_designs = user_params['num_designs']

lnfcn = lambda x: f'C{CYCLE}_W{x.name}_{x["Media"]}-R{x["Replicate"]}'
df['Media'] =     [media for media in ['biomek', 'manual'] for _ in range(reps)]
df['Replicate'] = [i+1 for _ in range(num_media_designs) for i in range(reps)]
df['Line Name'] = df.apply(lnfcn, axis=1)

In [24]:
df.head(2)

Unnamed: 0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4,Line Description,Media,Replicate,Line Name
A1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0040,...",biomek,1,C0.5_WA1_biomek-R1
B1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0040,...",biomek,2,C0.5_WB1_biomek-R2


## Process measurement files

Read measurements file

In [25]:
df_600 = pd.read_excel(user_params['measurement_file'], sheet_name='600', index_col=0)
df_340 = pd.read_excel(user_params['measurement_file'], sheet_name='340', index_col=0)

df_600

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,0.0657,0.0707,0.0623,0.105,0.0554,0.0572,0.0552,0.0501,0.0358,0.0463,0.046,0.0458
B,0.0593,0.0611,0.066,0.1013,0.0538,0.0545,0.0522,0.0511,0.0467,0.0467,0.0463,0.0458
C,0.0583,0.0567,0.0573,0.0525,0.0577,0.0548,0.0519,0.0515,0.0464,0.0464,0.0464,0.0455
D,0.0575,0.0547,0.0657,0.0468,0.0496,0.0535,0.0545,0.048,0.0465,0.0464,0.0463,0.0457
E,0.0599,0.0886,0.0735,0.0364,0.0499,0.053,0.0504,0.0505,0.0465,0.0463,0.0461,0.0456
F,0.0617,0.0853,0.0474,0.0361,0.0536,0.0505,0.0507,0.0499,0.0465,0.0459,0.0459,0.0457
G,0.046,0.0462,0.0469,0.0465,0.0464,0.046,0.046,0.0464,0.0465,0.0463,0.0461,0.0557
H,0.046,0.0462,0.0458,0.0458,0.0455,0.0472,0.0456,0.0455,0.046,0.0461,0.0456,0.0452


In [26]:
df_340

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,0.3323,0.3433,0.3387,0.3707,0.3337,0.3306,0.3287,0.3313,0.1255,0.136,0.1326,0.1312
B,0.3354,0.3338,0.3427,0.4422,0.3399,0.3362,0.3371,0.3399,0.1392,0.1386,0.1345,0.1318
C,0.3365,0.3143,0.3308,0.1652,0.3318,0.3335,0.3396,0.3415,0.1393,0.1381,0.1363,0.132
D,0.3369,0.3379,0.3472,0.1636,0.3332,0.3284,0.3311,0.3378,0.1387,0.1388,0.1365,0.1315
E,0.3381,0.3664,0.3386,0.1432,0.3423,0.3346,0.3341,0.3348,0.1389,0.139,0.1367,0.1318
F,0.3419,0.3791,0.1615,0.1745,0.3456,0.3395,0.3387,0.3365,0.1392,0.1388,0.136,0.132
G,0.1324,0.1368,0.1385,0.1399,0.1391,0.1379,0.1382,0.1397,0.1387,0.1374,0.1349,0.1314
H,0.1308,0.1336,0.1352,0.1347,0.1351,0.1339,0.1342,0.1348,0.1346,0.1352,0.1333,0.1307


### Process OD600

Normalize the data to the control well A9 value (water content):

In [27]:
zero_value_600 = df_600.at['A', 9]
df_600.loc[:, df_600.columns] -= zero_value_600
df_600

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,0.0299,0.0349,0.0265,0.0692,0.0196,0.0214,0.0194,0.0143,0.0,0.0105,0.0102,0.01
B,0.0235,0.0253,0.0302,0.0655,0.018,0.0187,0.0164,0.0153,0.0109,0.0109,0.0105,0.01
C,0.0225,0.0209,0.0215,0.0167,0.0219,0.019,0.0161,0.0157,0.0106,0.0106,0.0106,0.0097
D,0.0217,0.0189,0.0299,0.011,0.0138,0.0177,0.0187,0.0122,0.0107,0.0106,0.0105,0.0099
E,0.0241,0.0528,0.0377,0.0006,0.0141,0.0172,0.0146,0.0147,0.0107,0.0105,0.0103,0.0098
F,0.0259,0.0495,0.0116,0.0003,0.0178,0.0147,0.0149,0.0141,0.0107,0.0101,0.0101,0.0099
G,0.0102,0.0104,0.0111,0.0107,0.0106,0.0102,0.0102,0.0106,0.0107,0.0105,0.0103,0.0199
H,0.0102,0.0104,0.01,0.01,0.0097,0.0114,0.0098,0.0097,0.0102,0.0103,0.0098,0.0094


Set negative values to zero, multiply the values by 10 to account for 10x dilution and keep only 8 columns and 6 rows:

In [28]:
df_600[df_600 < 0] = 0
df_600.loc[:, df_600.columns] *= 10
df_600 = df_600.iloc[:6,:8]
df_600

Unnamed: 0,1,2,3,4,5,6,7,8
A,0.299,0.349,0.265,0.692,0.196,0.214,0.194,0.143
B,0.235,0.253,0.302,0.655,0.18,0.187,0.164,0.153
C,0.225,0.209,0.215,0.167,0.219,0.19,0.161,0.157
D,0.217,0.189,0.299,0.11,0.138,0.177,0.187,0.122
E,0.241,0.528,0.377,0.006,0.141,0.172,0.146,0.147
F,0.259,0.495,0.116,0.003,0.178,0.147,0.149,0.141


### Process OD340

Do the same for OD340, except of multiplication by 10, as the OD340 measurements were taken from non-diluted samples:

In [29]:
zero_value_340 = df_340.at['A', 9]
df_340.loc[:, df_340.columns] -= zero_value_340
df_340[df_340 < 0] = 0
df_340 = df_340.iloc[:6,:8]
df_340

Unnamed: 0,1,2,3,4,5,6,7,8
A,0.2068,0.2178,0.2132,0.2452,0.2082,0.2051,0.2032,0.2058
B,0.2099,0.2083,0.2172,0.3167,0.2144,0.2107,0.2116,0.2144
C,0.211,0.1888,0.2053,0.0397,0.2063,0.208,0.2141,0.216
D,0.2114,0.2124,0.2217,0.0381,0.2077,0.2029,0.2056,0.2123
E,0.2126,0.2409,0.2131,0.0177,0.2168,0.2091,0.2086,0.2093
F,0.2164,0.2536,0.036,0.049,0.2201,0.214,0.2132,0.211


Transform wide to long format, matching the order of wells in index to the one from `df`:

In [30]:
df_600 = df_600.melt(ignore_index=False, value_name='OD600')
df_340 = df_340.melt(ignore_index=False, value_name='OD340')

# Function for defining indeces
indfcn = lambda x: f'{x.name}{int(x["variable"])}'

df_600['Well'] = df_600.apply(indfcn, axis=1)
df_600.index = df_600['Well']
df_600.drop(columns=['variable', 'Well'], inplace=True)

df_340['Well'] = df_340.apply(indfcn, axis=1)
df_340.index = df_340['Well']
df_340.drop(columns=['variable', 'Well'], inplace=True)

df_600.head()

Unnamed: 0_level_0,OD600
Well,Unnamed: 1_level_1
A1,0.299
B1,0.235
C1,0.225
D1,0.217
E1,0.241


Add measurements to the main dataframe:

In [31]:
measurOD600 = user_params['protocol_name'][0]
measurOD340 = user_params['protocol_name'][1]
df[measurOD600] = df_600
df[measurOD340] = df_340
df.head(2)

Unnamed: 0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,...,CoCl2,CuSO4,MnSO4,ZnSO4,Line Description,Media,Replicate,Line Name,OD600,OD340
A1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,...,0.0003,0.0001,0.0008,0.0001,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0040,...",biomek,1,C0.5_WA1_biomek-R1,0.299,0.2068
B1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,...,0.0003,0.0001,0.0008,0.0001,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0040,...",biomek,2,C0.5_WB1_biomek-R2,0.235,0.2099


## Create EDD Experiment Description File

In [32]:
df['Media'] = user_params['media']
df['Part ID'] = user_params['part_id']
df['Culture Volume'] = user_params['culture_volume']
df['Flask Volume'] = user_params['well_volume']
df['Growth Temperature'] = user_params['temperature']
df['Shaking speed'] = user_params['shaking_speed']
# df['Starting OD'] =
# df['Replicate Count'] = 24


In [33]:
# Invalid columns for now in EDD
# df['Humidity[%]'] = user_params['humidity']
# df['Plate'] = user_params['plate']

In [34]:
exp_descr_file = f'{user_params["output_file_path"]}/edd_experiment_description.xlsx'
df[['Line Name',
    'Line Description',
    'Part ID',
    'Media',
    'Culture Volume',
    'Flask Volume',
    'Growth Temperature',
    'Shaking speed',
]].to_excel(exp_descr_file, index=False)

## Create EDD Measurement File

OD600

In [35]:
measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD600}.xlsx'
df['Measurement Type'] = 'Optical Density'

df['Time'] = user_params['time_point']
df['Value'] = df[measurOD600]
df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)

OD340

In [36]:
measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD340}.xlsx'

df['Time'] = user_params['time_point']
df['Value'] = df[measurOD340]
df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)