# Create EDD Study Files From Data

This notebook creates the files needed for importing a study into Experiment Data Depot (EDD).

## Inputs and outputs

### Required file to run this notebook:
   - `../data/DBTL7/media_descriptions.csv` - media designs for each of the wells
   
   - `../data/DBTL7/OD.xlsx` - production data from the plate reader


### File generated by running this notebook:
   - `edd_experiment_description.csv`
   
   - `edd_protocol.csv`
 
    
The files are stored in the user defined directory.

## Setup

Importing needed libraries:

In [1]:
import sys
sys.path.append('../media_compiler')

import pandas as pd
import openpyxl

from core import create_media_description

### User parameters

In [2]:
CYCLE = 5

user_params = {
    'media_file': f'../flaviolin yield data/DBTL{CYCLE}/media_descriptions.csv',  
    'measurement_file': f'../flaviolin yield data/DBTL{CYCLE}/OD.xlsx',
    'output_file_path': f'../flaviolin yield data/DBTL{CYCLE}', # Folder for output files,
    'num_replicates': 3,
    'num_designs': 16,
    'protocol_name': ['OD600', 'OD340'],
    'time_point': 48,
    'part_id': 'JBx_193086',
    'media': 'MOPS',
    'culture_volume': 15,
    'well_volume': 1500,
    'shaking_speed': 800,
    'temperature': 30,
    } 


In [3]:
df = pd.read_csv(user_params['media_file'], index_col=0)
df.head()

Unnamed: 0_level_0,MOPS[mM],Tricine[mM],H3BO3[mM],Glucose[mM],K2SO4[mM],K2HPO4[mM],FeSO4[mM],NH4Cl[mM],MgCl2[mM],NaCl[mM],(NH4)6Mo7O24[mM],CoCl2[mM],CuSO4[mM],MnSO4[mM],ZnSO4[mM],Kan[g/l]
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
A1,40.0,4.0,0.013782,42.795319,0.689979,5.890308,0.080204,8.53423,3.036435,490.645332,0.000141,0.002972,7.9e-05,0.003381,0.000836,0.05
B1,40.0,4.0,0.013782,42.795319,0.689979,5.890308,0.080204,8.53423,3.036435,490.645332,0.000141,0.002972,7.9e-05,0.003381,0.000836,0.05
C1,40.0,4.0,0.013782,42.795319,0.689979,5.890308,0.080204,8.53423,3.036435,490.645332,0.000141,0.002972,7.9e-05,0.003381,0.000836,0.05
D1,40.0,4.0,0.002196,46.303341,1.627894,0.419898,0.072132,8.598819,3.170558,428.803022,0.000243,0.00248,0.000943,0.005789,0.000796,0.05
E1,40.0,4.0,0.002196,46.303341,1.627894,0.419898,0.072132,8.598819,3.170558,428.803022,0.000243,0.00248,0.000943,0.005789,0.000796,0.05


## Create Line Description

In [4]:
df['Line Description'] = df.apply(create_media_description, axis=1)


In [5]:
df['Line Description']

Well
A1    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
B1    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
C1    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
D1    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
E1    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
F1    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
A2    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
B2    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
C2    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
D2    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
E2    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
F2    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
A3    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
B3    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
C3    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
D3    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
E3    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
F3    MOPS[mM]: 40.000000, Tricine[mM]: 4.0

## Create Line Names

Add metadata for media and replicates to craft Line Names as **C[.]\_W[.]1\_[.]3-R[.]** denoting cycle number, wells occupying the same design and replicate number:

Check if it's row or column order of lines:

In [6]:
column_order = True if df.index[1][0] == 'B' else False

In [7]:
reps = user_params['num_replicates']
num_media_designs = user_params['num_designs']

def linefunction_colum_order(row):
    well = row.name
    if well[0] in 'ABC':
        return f'C{CYCLE}_WA{well[1]}_C{well[1]}-R{row["Replicate"]}'
    else:
        return f'C{CYCLE}_WD{well[1]}_F{well[1]}-R{row["Replicate"]}'                                                                                              

def linefunction_row_order(row):
    well = row.name
    if int(well[1]) < 5:
        return f'C{CYCLE}_W{well[0]}1_{well[0]}4-R{row["Replicate"]}'
    else:
        return f'C{CYCLE}_W{well[0]}5_{well[0]}8-R{row["Replicate"]}'      
    
def linefunction_row_order_triplicates(row):
    well = row.name

    well_names = list(df.index)
    index = well_names.index(well)
    rem = index%3
    name = f'C{CYCLE}_W{well_names[index-rem]}_{well_names[index-rem+2]}-R{row["Replicate"]}'
    print(name)
    return name
    
    

    
df['Replicate'] = [i+1 for _ in range(num_media_designs) for i in range(reps)]
df['Line Name'] = df.apply(linefunction_row_order_triplicates, axis=1)

# if column_order:
#     df['Line Name'] = df.apply(linefunction_colum_order, axis=1)
# else:
#     df['Line Name'] = df.apply(linefunction_row_order, axis=1)

C5_WA1_C1-R1
C5_WA1_C1-R2
C5_WA1_C1-R3
C5_WD1_F1-R1
C5_WD1_F1-R2
C5_WD1_F1-R3
C5_WA2_C2-R1
C5_WA2_C2-R2
C5_WA2_C2-R3
C5_WD2_F2-R1
C5_WD2_F2-R2
C5_WD2_F2-R3
C5_WA3_C3-R1
C5_WA3_C3-R2
C5_WA3_C3-R3
C5_WD3_F3-R1
C5_WD3_F3-R2
C5_WD3_F3-R3
C5_WA4_C4-R1
C5_WA4_C4-R2
C5_WA4_C4-R3
C5_WD4_F4-R1
C5_WD4_F4-R2
C5_WD4_F4-R3
C5_WA5_C5-R1
C5_WA5_C5-R2
C5_WA5_C5-R3
C5_WD5_F5-R1
C5_WD5_F5-R2
C5_WD5_F5-R3
C5_WA6_C6-R1
C5_WA6_C6-R2
C5_WA6_C6-R3
C5_WD6_F6-R1
C5_WD6_F6-R2
C5_WD6_F6-R3
C5_WA7_C7-R1
C5_WA7_C7-R2
C5_WA7_C7-R3
C5_WD7_F7-R1
C5_WD7_F7-R2
C5_WD7_F7-R3
C5_WA8_C8-R1
C5_WA8_C8-R2
C5_WA8_C8-R3
C5_WD8_F8-R1
C5_WD8_F8-R2
C5_WD8_F8-R3


In [8]:
df.head(5)

Unnamed: 0_level_0,MOPS[mM],Tricine[mM],H3BO3[mM],Glucose[mM],K2SO4[mM],K2HPO4[mM],FeSO4[mM],NH4Cl[mM],MgCl2[mM],NaCl[mM],(NH4)6Mo7O24[mM],CoCl2[mM],CuSO4[mM],MnSO4[mM],ZnSO4[mM],Kan[g/l],Line Description,Replicate,Line Name
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
A1,40.0,4.0,0.013782,42.795319,0.689979,5.890308,0.080204,8.53423,3.036435,490.645332,0.000141,0.002972,7.9e-05,0.003381,0.000836,0.05,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",1,C5_WA1_C1-R1
B1,40.0,4.0,0.013782,42.795319,0.689979,5.890308,0.080204,8.53423,3.036435,490.645332,0.000141,0.002972,7.9e-05,0.003381,0.000836,0.05,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",2,C5_WA1_C1-R2
C1,40.0,4.0,0.013782,42.795319,0.689979,5.890308,0.080204,8.53423,3.036435,490.645332,0.000141,0.002972,7.9e-05,0.003381,0.000836,0.05,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",3,C5_WA1_C1-R3
D1,40.0,4.0,0.002196,46.303341,1.627894,0.419898,0.072132,8.598819,3.170558,428.803022,0.000243,0.00248,0.000943,0.005789,0.000796,0.05,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",1,C5_WD1_F1-R1
E1,40.0,4.0,0.002196,46.303341,1.627894,0.419898,0.072132,8.598819,3.170558,428.803022,0.000243,0.00248,0.000943,0.005789,0.000796,0.05,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",2,C5_WD1_F1-R2


## Process measurement files

Read measurements file:

In [9]:
df_600 = pd.read_excel(user_params['measurement_file'], sheet_name='600', index_col=0)
df_340 = pd.read_excel(user_params['measurement_file'], sheet_name='340', index_col=0)

# df_600

In [10]:
df_340

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,0.7464,0.5028,0.2401,0.4018,0.4977,0.4995,0.7258,0.2303,0.1209,0.1205,0.1192,0.1162
B,0.7423,0.5072,0.241,0.4097,0.486,0.521,0.74,0.2404,0.121,0.1184,0.1176,0.1131
C,0.7303,0.4999,0.2383,0.3702,0.4203,0.5138,0.7742,0.2377,0.1145,0.1137,0.1099,0.1082
D,0.441,0.2533,0.4289,0.474,0.4102,0.6172,0.5149,0.3563,0.1065,0.1065,0.1041,0.1007
E,0.4392,0.2514,0.4242,0.4749,0.4155,0.6516,0.5215,0.3558,0.1011,0.1007,0.0989,0.0954
F,0.4698,0.2376,0.4309,0.4707,0.4075,0.6543,0.5281,0.3391,0.0981,0.097,0.0947,0.092
G,0.0908,0.0935,0.0958,0.0967,0.0969,0.0952,0.0953,0.0972,0.0966,0.096,0.0925,0.0898
H,0.0921,0.0931,0.0944,0.0943,0.0931,0.0928,0.0932,0.0928,0.0941,0.0932,0.0911,0.0912


### Process OD600

Normalize the data to the control well A9 value (water content):

In [11]:
zero_value_600 = df_600.at['A', 9]
df_600.loc[:, df_600.columns] -= zero_value_600
df_600

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,1.0599,1.2882,0.1284,0.3276,0.3759,1.2634,1.3097,0.1003,0.0,-0.0003,0.0,-0.0009
B,1.0871,1.28,0.1249,0.3004,0.3716,1.2696,1.2869,0.1,-0.0029,-0.0038,-0.0034,-0.0046
C,1.0713,1.2861,0.1149,0.3098,0.5129,1.0798,1.11,0.0958,-0.0092,-0.0083,-0.0094,-0.0092
D,0.9879,0.1291,0.2742,0.2995,0.5661,1.1039,0.9281,0.0809,-0.0145,-0.0142,-0.015,-0.0143
E,0.9737,0.14,0.2528,0.3194,0.5788,1.032,0.9241,0.0476,-0.0194,-0.0194,-0.0196,-0.0202
F,0.971,0.1336,0.3029,0.2681,0.588,1.0752,0.9437,0.0152,-0.0229,-0.0223,-0.0233,-0.0228
G,-0.0234,-0.0234,-0.0231,-0.0232,-0.0236,-0.0235,-0.0238,-0.0234,-0.0233,-0.0231,-0.0239,-0.0241
H,-0.0032,-0.0224,-0.0226,-0.0228,-0.023,-0.0233,-0.0232,-0.0232,-0.023,-0.023,-0.0229,-0.0232


Set negative values to zero, multiply the values by 10 to account for 10x dilution and keep only 8 columns and 6 rows:

In [12]:
df_600[df_600 < 0] = 0
df_600 = df_600.iloc[:6,:8]
df_600

Unnamed: 0,1,2,3,4,5,6,7,8
A,1.0599,1.2882,0.1284,0.3276,0.3759,1.2634,1.3097,0.1003
B,1.0871,1.28,0.1249,0.3004,0.3716,1.2696,1.2869,0.1
C,1.0713,1.2861,0.1149,0.3098,0.5129,1.0798,1.11,0.0958
D,0.9879,0.1291,0.2742,0.2995,0.5661,1.1039,0.9281,0.0809
E,0.9737,0.14,0.2528,0.3194,0.5788,1.032,0.9241,0.0476
F,0.971,0.1336,0.3029,0.2681,0.588,1.0752,0.9437,0.0152


### Process OD340

Do the same for OD340, except of multiplication by 10, as the OD340 measurements were taken from non-diluted samples:

In [13]:
zero_value_340 = df_340.at['A', 9]
df_340.loc[:, df_340.columns] -= zero_value_340
df_340[df_340 < 0] = 0
df_340 = df_340.iloc[:6,:8]
df_340

Unnamed: 0,1,2,3,4,5,6,7,8
A,0.6255,0.3819,0.1192,0.2809,0.3768,0.3786,0.6049,0.1094
B,0.6214,0.3863,0.1201,0.2888,0.3651,0.4001,0.6191,0.1195
C,0.6094,0.379,0.1174,0.2493,0.2994,0.3929,0.6533,0.1168
D,0.3201,0.1324,0.308,0.3531,0.2893,0.4963,0.394,0.2354
E,0.3183,0.1305,0.3033,0.354,0.2946,0.5307,0.4006,0.2349
F,0.3489,0.1167,0.31,0.3498,0.2866,0.5334,0.4072,0.2182


Transform wide to long format, matching the order of wells in index to the one from `df`:

In [14]:
if column_order:
    indfcn = lambda x: f'{x.name}{int(x["variable"])}'
    df_600 = df_600.melt(ignore_index=False, value_name='OD600')
    df_340 = df_340.melt(ignore_index=False, value_name='OD340')
else:
    indfcn = lambda x: f'{x["variable"]}{(x.name)}'
    df_600 = df_600.T.melt(ignore_index=False, value_name='OD600')
    df_340 = df_340.T.melt(ignore_index=False, value_name='OD340')

df_600['Well'] = df_600.apply(indfcn, axis=1)
df_340['Well'] = df_340.apply(indfcn, axis=1)

df_600.index = df_600['Well']
df_340.index = df_340['Well']

df_600.drop(columns=['variable', 'Well'], inplace=True)
df_340.drop(columns=['variable', 'Well'], inplace=True)

df_600.head()

Unnamed: 0_level_0,OD600
Well,Unnamed: 1_level_1
A1,1.0599
B1,1.0871
C1,1.0713
D1,0.9879
E1,0.9737


Add measurements to the main dataframe:

In [15]:
measurOD600 = user_params['protocol_name'][0]
measurOD340 = user_params['protocol_name'][1]
df[measurOD600] = df_600
df[measurOD340] = df_340
df.head(2)


Unnamed: 0_level_0,MOPS[mM],Tricine[mM],H3BO3[mM],Glucose[mM],K2SO4[mM],K2HPO4[mM],FeSO4[mM],NH4Cl[mM],MgCl2[mM],NaCl[mM],...,CoCl2[mM],CuSO4[mM],MnSO4[mM],ZnSO4[mM],Kan[g/l],Line Description,Replicate,Line Name,OD600,OD340
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1,40.0,4.0,0.013782,42.795319,0.689979,5.890308,0.080204,8.53423,3.036435,490.645332,...,0.002972,7.9e-05,0.003381,0.000836,0.05,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",1,C5_WA1_C1-R1,1.0599,0.6255
B1,40.0,4.0,0.013782,42.795319,0.689979,5.890308,0.080204,8.53423,3.036435,490.645332,...,0.002972,7.9e-05,0.003381,0.000836,0.05,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",2,C5_WA1_C1-R2,1.0871,0.6214


## Create EDD Experiment Description File

In [16]:
df['Media'] = user_params['media']
df['Part ID'] = user_params['part_id']
df['Culture Volume'] = user_params['culture_volume']
df['Flask Volume'] = user_params['well_volume']
df['Growth Temperature'] = user_params['temperature']
df['Shaking speed'] = user_params['shaking_speed']
# df['Starting OD'] =
# df['Replicate Count'] = 24


In [17]:
# Invalid columns for now in EDD
# df['Humidity[%]'] = user_params['humidity']
# df['Plate'] = user_params['plate']

In [18]:
exp_descr_file = f'{user_params["output_file_path"]}/edd_experiment_description.xlsx'
df[['Line Name',
    'Line Description',
    'Part ID',
    'Media',
    'Culture Volume',
    'Flask Volume',
    'Growth Temperature',
    'Shaking speed',
]].to_excel(exp_descr_file, index=False)

In [19]:
df.head()

Unnamed: 0_level_0,MOPS[mM],Tricine[mM],H3BO3[mM],Glucose[mM],K2SO4[mM],K2HPO4[mM],FeSO4[mM],NH4Cl[mM],MgCl2[mM],NaCl[mM],...,Replicate,Line Name,OD600,OD340,Media,Part ID,Culture Volume,Flask Volume,Growth Temperature,Shaking speed
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1,40.0,4.0,0.013782,42.795319,0.689979,5.890308,0.080204,8.53423,3.036435,490.645332,...,1,C5_WA1_C1-R1,1.0599,0.6255,MOPS,JBx_193086,15,1500,30,800
B1,40.0,4.0,0.013782,42.795319,0.689979,5.890308,0.080204,8.53423,3.036435,490.645332,...,2,C5_WA1_C1-R2,1.0871,0.6214,MOPS,JBx_193086,15,1500,30,800
C1,40.0,4.0,0.013782,42.795319,0.689979,5.890308,0.080204,8.53423,3.036435,490.645332,...,3,C5_WA1_C1-R3,1.0713,0.6094,MOPS,JBx_193086,15,1500,30,800
D1,40.0,4.0,0.002196,46.303341,1.627894,0.419898,0.072132,8.598819,3.170558,428.803022,...,1,C5_WD1_F1-R1,0.9879,0.3201,MOPS,JBx_193086,15,1500,30,800
E1,40.0,4.0,0.002196,46.303341,1.627894,0.419898,0.072132,8.598819,3.170558,428.803022,...,2,C5_WD1_F1-R2,0.9737,0.3183,MOPS,JBx_193086,15,1500,30,800


## Create EDD Measurement File

OD600

In [20]:
measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD600}.xlsx'
df['Measurement Type'] = measurOD600


df['Time'] = user_params['time_point']
df['Value'] = df[measurOD600]
df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)

OD340

In [21]:
# measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD340}.xlsx'

# df['Time'] = user_params['time_point']
# df['Value'] = df[measurOD340]
# df['Units'] = 'n/a'
# df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)

OD

In [22]:
measurement_file = f'{user_params["output_file_path"]}/edd_OD.xlsx'
df['Measurement Type'] = measurOD340

df['Time'] = user_params['time_point']
df['Value'] = df[measurOD340]
df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)