# Create EDD Study Files From Data

This notebook creates the files needed for importing a study into Experiment Data Depot (EDD).

## Inputs and outputs

### Required file to run this notebook:
   - `../data/flaviolin/DBTL[.]/media_descriptions.csv` - media designs for each of the wells
   
   - `../data/flaviolin/DBTL[.]/OD.xlsx` - production data from the plate reader


### File generated by running this notebook:
   - `edd_experiment_description.csv`
   
   - `edd_protocol.csv`
 
    
The files are stored in the user defined directory.

## Setup

Importing needed libraries:

In [1]:
import sys
sys.path.append('../')

import pandas as pd
import openpyxl

from core import create_media_description

### User parameters

In [2]:
CYCLE = 4

user_params = {
    'media_file': f'../data/flaviolin/DBTL{CYCLE}/media_descriptions.csv',  
    'measurement_file': f'../data/flaviolin/DBTL{CYCLE}/OD.xlsx',
    'output_file_path': f'../data/flaviolin/DBTL{CYCLE}', # Folder for output files,
    'num_replicates': 3,
    'num_designs': 16,
    'protocol_name': ['OD600', 'OD340'],
    'time_point': 48,
    'part_id': 'JBx_193086',
    'media': 'MOPS',
    'culture_volume': 15,
    'well_volume': 1500,
    'shaking_speed': 800,
    'temperature': 30,
    } 


In [3]:
df = pd.read_csv(user_params['media_file'], index_col=0)
df.tail()

Unnamed: 0_level_0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
B8,40.0,4.0,0.010556,20.0,2.601029,5.705683,0.082804,12.939917,3.939057,475.957694,3.3e-05,0.001986,0.000992,0.007573,0.000835
C8,40.0,4.0,0.010556,20.0,2.601029,5.705683,0.082804,12.939917,3.939057,475.957694,3.3e-05,0.001986,0.000992,0.007573,0.000835
D8,40.0,4.0,0.003932,20.0,0.309966,1.206566,0.01076,9.146237,0.502864,52.253707,3e-05,0.000304,0.0001,0.000726,9.8e-05
E8,40.0,4.0,0.003932,20.0,0.309966,1.206566,0.01076,9.146237,0.502864,52.253707,3e-05,0.000304,0.0001,0.000726,9.8e-05
F8,40.0,4.0,0.003932,20.0,0.309966,1.206566,0.01076,9.146237,0.502864,52.253707,3e-05,0.000304,0.0001,0.000726,9.8e-05


## Create Line Description

In [4]:
df['Line Description'] = df.apply(create_media_description, axis=1)


## Create Line Names

Add metadata for media and replicates to craft Line Names as **C[.]_W[..]-R[.]** denoting cycle number, wells occupying the same design and the replicate number:

In [5]:
reps = user_params['num_replicates']
num_media_designs = user_params['num_designs']

In [6]:
column_order = True if df.index[1][0] == 'B' else False

In [7]:
column_order

True

In [8]:
def linefunction_colum_order(row):
    well = row.name
    if well[0] in 'ABC':
        return f'C{CYCLE}_WA{well[1]}_C{well[1]}-R{row["Replicate"]}'
    else:
        return f'C{CYCLE}_WD{well[1]}_F{well[1]}-R{row["Replicate"]}'                                                                                              

def linefunction_row_order(row):
    well = row.name
    if int(well[1]) < 5:
        return f'C{CYCLE}_W{well[0]}1_{well[0]}4-R{row["Replicate"]}'
    else:
        return f'C{CYCLE}_W{well[0]}5_{well[0]}8-R{row["Replicate"]}'                                                                                              

    
df['Replicate'] = [i+1 for _ in range(num_media_designs) for i in range(reps)]

if column_order:
    df['Line Name'] = df.apply(linefunction_colum_order, axis=1)
else:
    df['Line Name'] = df.apply(linefunction_row_order, axis=1)

In [9]:
df.head(2)

Unnamed: 0_level_0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4,Line Description,Replicate,Line Name
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
A1,40.0,4.0,0.024773,20.0,0.700826,4.06276,0.047732,12.929126,2.133702,414.274311,0.00025,0.001827,0.000484,0.004128,1.4e-05,"MOPS: 40.000000, Tricine: 4.000000, H3BO3: 0.0...",1,C4_WA1_C1-R1
B1,40.0,4.0,0.024773,20.0,0.700826,4.06276,0.047732,12.929126,2.133702,414.274311,0.00025,0.001827,0.000484,0.004128,1.4e-05,"MOPS: 40.000000, Tricine: 4.000000, H3BO3: 0.0...",2,C4_WA1_C1-R2


## Process measurement files

Read measurements file:

In [10]:
df_600 = pd.read_excel(user_params['measurement_file'], sheet_name='600', index_col=0)
df_340 = pd.read_excel(user_params['measurement_file'], sheet_name='340', index_col=0)

df_600

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,0.1115,0.114,0.117,0.1128,0.0773,0.138,0.077,0.1143,0.0359,0.0463,0.0461,0.0459
B,0.1263,0.1297,0.1227,0.1321,0.1037,0.1392,0.0855,0.1318,0.0467,0.0467,0.0464,0.0459
C,0.2005,0.1479,0.149,0.1447,0.2711,0.1633,0.1134,0.1274,0.0466,0.0466,0.0464,0.0457
D,0.1368,0.0888,0.1257,0.1204,0.1095,0.1317,0.1262,0.1218,0.0463,0.0466,0.0462,0.0457
E,0.1614,0.0805,0.1479,0.1365,0.1129,0.133,0.1457,0.1361,0.0465,0.0464,0.0462,0.0457
F,0.1607,1.7715,0.1406,0.1414,0.135,0.1357,0.1597,0.1507,0.0467,0.0465,0.0461,0.0457
G,0.0461,0.0465,0.0469,0.0467,0.0464,0.0462,0.0461,0.0466,0.0468,0.0465,0.0463,0.0458
H,0.0461,0.0461,0.046,0.046,0.0456,0.0457,0.0458,0.0456,0.0462,0.0461,0.0459,0.0455


In [11]:
df_340

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,0.531,0.5431,0.4541,0.5717,0.5538,0.5698,0.5456,0.5301,0.1217,0.135,0.1329,0.1314
B,0.4933,0.5797,0.488,0.5661,0.6076,0.5819,0.608,0.5927,0.1388,0.1367,0.1348,0.132
C,0.5804,0.6215,0.5011,0.5927,0.6326,0.5964,0.6061,0.6047,0.1382,0.1374,0.1358,0.1321
D,0.5133,0.686,0.6674,0.5979,0.6053,0.6622,0.7012,0.3962,0.1387,0.1383,0.1361,0.1316
E,0.6158,0.6254,0.6938,0.5864,0.6472,0.6926,0.7061,0.4046,0.1387,0.1383,0.1361,0.1314
F,0.6035,0.6616,0.7063,0.6146,0.6443,0.6907,0.6863,0.4083,0.1394,0.138,0.1362,0.1323
G,0.1318,0.1345,0.137,0.1382,0.1391,0.1368,0.137,0.1397,0.1383,0.1367,0.1357,0.1321
H,0.1312,0.1333,0.1349,0.1349,0.1344,0.1338,0.1341,0.1341,0.1343,0.1354,0.1329,0.1297


### Process OD600

Normalize the data to the control well A9 value (water content):

In [12]:
zero_value_600 = df_600.at['A', 9]
df_600.loc[:, df_600.columns] -= zero_value_600
df_600

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,0.0756,0.0781,0.0811,0.0769,0.0414,0.1021,0.0411,0.0784,0.0,0.0104,0.0102,0.01
B,0.0904,0.0938,0.0868,0.0962,0.0678,0.1033,0.0496,0.0959,0.0108,0.0108,0.0105,0.01
C,0.1646,0.112,0.1131,0.1088,0.2352,0.1274,0.0775,0.0915,0.0107,0.0107,0.0105,0.0098
D,0.1009,0.0529,0.0898,0.0845,0.0736,0.0958,0.0903,0.0859,0.0104,0.0107,0.0103,0.0098
E,0.1255,0.0446,0.112,0.1006,0.077,0.0971,0.1098,0.1002,0.0106,0.0105,0.0103,0.0098
F,0.1248,1.7356,0.1047,0.1055,0.0991,0.0998,0.1238,0.1148,0.0108,0.0106,0.0102,0.0098
G,0.0102,0.0106,0.011,0.0108,0.0105,0.0103,0.0102,0.0107,0.0109,0.0106,0.0104,0.0099
H,0.0102,0.0102,0.0101,0.0101,0.0097,0.0098,0.0099,0.0097,0.0103,0.0102,0.01,0.0096


Set negative values to zero, multiply the values by 10 to account for 10x dilution and keep only 8 columns and 6 rows:

In [13]:
df_600[df_600 < 0] = 0
df_600.loc[:, df_600.columns] *= 10
df_600 = df_600.iloc[:6,:8]
df_600

Unnamed: 0,1,2,3,4,5,6,7,8
A,0.756,0.781,0.811,0.769,0.414,1.021,0.411,0.784
B,0.904,0.938,0.868,0.962,0.678,1.033,0.496,0.959
C,1.646,1.12,1.131,1.088,2.352,1.274,0.775,0.915
D,1.009,0.529,0.898,0.845,0.736,0.958,0.903,0.859
E,1.255,0.446,1.12,1.006,0.77,0.971,1.098,1.002
F,1.248,17.356,1.047,1.055,0.991,0.998,1.238,1.148


### Process OD340

Do the same for OD340, except of multiplication by 10, as the OD340 measurements were taken from non-diluted samples:

In [14]:
zero_value_340 = df_340.at['A', 9]
df_340.loc[:, df_340.columns] -= zero_value_340
df_340[df_340 < 0] = 0
df_340 = df_340.iloc[:6,:8]
df_340

Unnamed: 0,1,2,3,4,5,6,7,8
A,0.4093,0.4214,0.3324,0.45,0.4321,0.4481,0.4239,0.4084
B,0.3716,0.458,0.3663,0.4444,0.4859,0.4602,0.4863,0.471
C,0.4587,0.4998,0.3794,0.471,0.5109,0.4747,0.4844,0.483
D,0.3916,0.5643,0.5457,0.4762,0.4836,0.5405,0.5795,0.2745
E,0.4941,0.5037,0.5721,0.4647,0.5255,0.5709,0.5844,0.2829
F,0.4818,0.5399,0.5846,0.4929,0.5226,0.569,0.5646,0.2866


Transform wide to long format, matching the order of wells in index to the one from `df`:

In [15]:
if column_order:
    indfcn = lambda x: f'{x.name}{int(x["variable"])}'
    df_600 = df_600.melt(ignore_index=False, value_name='OD600')
    df_340 = df_340.melt(ignore_index=False, value_name='OD340')
else:
    indfcn = lambda x: f'{x["variable"]}{(x.name)}'
    df_600 = df_600.T.melt(ignore_index=False, value_name='OD600')
    df_340 = df_340.T.melt(ignore_index=False, value_name='OD340')

df_600['Well'] = df_600.apply(indfcn, axis=1)
df_340['Well'] = df_340.apply(indfcn, axis=1)

df_600.index = df_600['Well']
df_340.index = df_340['Well']

df_600.drop(columns=['variable', 'Well'], inplace=True)
df_340.drop(columns=['variable', 'Well'], inplace=True)

df_600.head()

Unnamed: 0_level_0,OD600
Well,Unnamed: 1_level_1
A1,0.756
B1,0.904
C1,1.646
D1,1.009
E1,1.255


Add measurements to the main dataframe:

In [17]:
measurOD600 = user_params['protocol_name'][0]
measurOD340 = user_params['protocol_name'][1]
df[measurOD600] = df_600
df[measurOD340] = df_340
df.head(2)


Unnamed: 0_level_0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4,Line Description,Replicate,Line Name,OD600,OD340
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
A1,40.0,4.0,0.024773,20.0,0.700826,4.06276,0.047732,12.929126,2.133702,414.274311,0.00025,0.001827,0.000484,0.004128,1.4e-05,"MOPS: 40.000000, Tricine: 4.000000, H3BO3: 0.0...",1,C4_WA1_C1-R1,0.756,0.4093
B1,40.0,4.0,0.024773,20.0,0.700826,4.06276,0.047732,12.929126,2.133702,414.274311,0.00025,0.001827,0.000484,0.004128,1.4e-05,"MOPS: 40.000000, Tricine: 4.000000, H3BO3: 0.0...",2,C4_WA1_C1-R2,0.904,0.3716


## Create EDD Experiment Description File

In [18]:
df['Media'] = user_params['media']
df['Part ID'] = user_params['part_id']
df['Culture Volume'] = user_params['culture_volume']
df['Flask Volume'] = user_params['well_volume']
df['Growth Temperature'] = user_params['temperature']
df['Shaking speed'] = user_params['shaking_speed']
# df['Starting OD'] =
# df['Replicate Count'] = 24


In [19]:
# Invalid columns for now in EDD
# df['Humidity[%]'] = user_params['humidity']
# df['Plate'] = user_params['plate']

In [20]:
exp_descr_file = f'{user_params["output_file_path"]}/edd_experiment_description.xlsx'
df[['Line Name',
    'Line Description',
    'Part ID',
    'Media',
    'Culture Volume',
    'Flask Volume',
    'Growth Temperature',
    'Shaking speed',
]].to_excel(exp_descr_file, index=False)

## Create EDD Measurement File

OD600

In [21]:
measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD600}.xlsx'
df['Measurement Type'] = 'Optical Density'

df['Time'] = user_params['time_point']
df['Value'] = df[measurOD600]
df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)

OD340

In [22]:
measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD340}.xlsx'

df['Time'] = user_params['time_point']
df['Value'] = df[measurOD340]
df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)