# Create EDD Study Files From Data

This notebook creates the files needed for importing a study into Experiment Data Depot (EDD).

## Inputs and outputs

### Required file to run this notebook:
   - `../data/DBTL7/media_descriptions.csv` - media designs for each of the wells
   
   - `../data/DBTL7/OD.xlsx` - production data from the plate reader


### File generated by running this notebook:
   - `edd_experiment_description.csv`
   
   - `edd_protocol.csv`
 
    
The files are stored in the user defined directory.

## Setup

Importing needed libraries:

In [1]:
import sys
sys.path.append('../media_compiler')

import pandas as pd
import openpyxl

from core import create_media_description

### User parameters

In [2]:
CYCLE = 4.2

user_params = {
    'media_file': f'../flaviolin data/DBTL{CYCLE}/media_descriptions.csv',  
    'measurement_file': f'../flaviolin data/DBTL{CYCLE}/OD.xlsx',
    'output_file_path': f'../flaviolin data/DBTL{CYCLE}', # Folder for output files,
    'num_replicates': 3,
    'num_designs': 16,
    'protocol_name': ['OD600', 'OD340'],
    'time_point': 48,
    'part_id': 'JBx_193086',
    'media': 'MOPS',
    'culture_volume': 15,
    'well_volume': 1500,
    'shaking_speed': 800,
    'temperature': 30,
    } 


In [3]:
df = pd.read_csv(user_params['media_file'], index_col=0)
df.head()

Unnamed: 0_level_0,MOPS[mM],Tricine[mM],H3BO3[mM],Glucose[mM],K2SO4[mM],K2HPO4[mM],FeSO4[mM],NH4Cl[mM],MgCl2[mM],NaCl[mM],(NH4)6Mo7O24[mM],CoCl2[mM],CuSO4[mM],MnSO4[mM],ZnSO4[mM],Kan[g/l]
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
A1,40.0,4.0,0.045186,20.0,0.590637,4.413947,0.023892,47.480272,0.033098,651.233703,0.000541,0.00301,0.000536,0.007166,0.001252,0.05
B1,40.0,4.0,0.045186,20.0,0.590637,4.413947,0.023892,47.480272,0.033098,651.233703,0.000541,0.00301,0.000536,0.007166,0.001252,0.05
C1,40.0,4.0,0.045186,20.0,0.590637,4.413947,0.023892,47.480272,0.033098,651.233703,0.000541,0.00301,0.000536,0.007166,0.001252,0.05
D1,40.0,4.0,0.034717,20.0,0.641054,4.219546,0.022366,40.901081,0.156846,719.688474,0.000335,0.003982,0.000275,0.008757,0.00069,0.05
E1,40.0,4.0,0.034717,20.0,0.641054,4.219546,0.022366,40.901081,0.156846,719.688474,0.000335,0.003982,0.000275,0.008757,0.00069,0.05


## Create Line Description

In [4]:
df['Line Description'] = df.apply(create_media_description, axis=1)


In [5]:
df['Line Description']

Well
A1    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
B1    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
C1    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
D1    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
E1    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
F1    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
A2    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
B2    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
C2    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
D2    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
E2    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
F2    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
A3    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
B3    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
C3    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
D3    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
E3    MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...
F3    MOPS[mM]: 40.000000, Tricine[mM]: 4.0

## Create Line Names

Add metadata for media and replicates to craft Line Names as **C[.]\_W[.]1\_[.]3-R[.]** denoting cycle number, wells occupying the same design and replicate number:

Check if it's row or column order of lines:

In [6]:
column_order = True if df.index[1][0] == 'B' else False

In [7]:
reps = user_params['num_replicates']
num_media_designs = user_params['num_designs']

def linefunction_colum_order(row):
    well = row.name
    if well[0] in 'ABC':
        return f'C{CYCLE}_WA{well[1]}_C{well[1]}-R{row["Replicate"]}'
    else:
        return f'C{CYCLE}_WD{well[1]}_F{well[1]}-R{row["Replicate"]}'                                                                                              

def linefunction_row_order(row):
    well = row.name
    if int(well[1]) < 5:
        return f'C{CYCLE}_W{well[0]}1_{well[0]}4-R{row["Replicate"]}'
    else:
        return f'C{CYCLE}_W{well[0]}5_{well[0]}8-R{row["Replicate"]}'      
    
def linefunction_row_order_triplicates(row):
    well = row.name

    well_names = list(df.index)
    index = well_names.index(well)
    rem = index%3
    name = f'C{CYCLE}_W{well_names[index-rem]}_{well_names[index-rem+2]}-R{row["Replicate"]}'
    print(name)
    return name
    
    

    
df['Replicate'] = [i+1 for _ in range(num_media_designs) for i in range(reps)]
df['Line Name'] = df.apply(linefunction_row_order_triplicates, axis=1)

# if column_order:
#     df['Line Name'] = df.apply(linefunction_colum_order, axis=1)
# else:
#     df['Line Name'] = df.apply(linefunction_row_order, axis=1)

C4.2_WA1_C1-R1
C4.2_WA1_C1-R2
C4.2_WA1_C1-R3
C4.2_WD1_F1-R1
C4.2_WD1_F1-R2
C4.2_WD1_F1-R3
C4.2_WA2_C2-R1
C4.2_WA2_C2-R2
C4.2_WA2_C2-R3
C4.2_WD2_F2-R1
C4.2_WD2_F2-R2
C4.2_WD2_F2-R3
C4.2_WA3_C3-R1
C4.2_WA3_C3-R2
C4.2_WA3_C3-R3
C4.2_WD3_F3-R1
C4.2_WD3_F3-R2
C4.2_WD3_F3-R3
C4.2_WA4_C4-R1
C4.2_WA4_C4-R2
C4.2_WA4_C4-R3
C4.2_WD4_F4-R1
C4.2_WD4_F4-R2
C4.2_WD4_F4-R3
C4.2_WA5_C5-R1
C4.2_WA5_C5-R2
C4.2_WA5_C5-R3
C4.2_WD5_F5-R1
C4.2_WD5_F5-R2
C4.2_WD5_F5-R3
C4.2_WA6_C6-R1
C4.2_WA6_C6-R2
C4.2_WA6_C6-R3
C4.2_WD6_F6-R1
C4.2_WD6_F6-R2
C4.2_WD6_F6-R3
C4.2_WA7_C7-R1
C4.2_WA7_C7-R2
C4.2_WA7_C7-R3
C4.2_WD7_F7-R1
C4.2_WD7_F7-R2
C4.2_WD7_F7-R3
C4.2_WA8_C8-R1
C4.2_WA8_C8-R2
C4.2_WA8_C8-R3
C4.2_WD8_F8-R1
C4.2_WD8_F8-R2
C4.2_WD8_F8-R3


In [8]:
df.head(5)

Unnamed: 0_level_0,MOPS[mM],Tricine[mM],H3BO3[mM],Glucose[mM],K2SO4[mM],K2HPO4[mM],FeSO4[mM],NH4Cl[mM],MgCl2[mM],NaCl[mM],(NH4)6Mo7O24[mM],CoCl2[mM],CuSO4[mM],MnSO4[mM],ZnSO4[mM],Kan[g/l],Line Description,Replicate,Line Name
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
A1,40.0,4.0,0.045186,20.0,0.590637,4.413947,0.023892,47.480272,0.033098,651.233703,0.000541,0.00301,0.000536,0.007166,0.001252,0.05,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",1,C4.2_WA1_C1-R1
B1,40.0,4.0,0.045186,20.0,0.590637,4.413947,0.023892,47.480272,0.033098,651.233703,0.000541,0.00301,0.000536,0.007166,0.001252,0.05,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",2,C4.2_WA1_C1-R2
C1,40.0,4.0,0.045186,20.0,0.590637,4.413947,0.023892,47.480272,0.033098,651.233703,0.000541,0.00301,0.000536,0.007166,0.001252,0.05,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",3,C4.2_WA1_C1-R3
D1,40.0,4.0,0.034717,20.0,0.641054,4.219546,0.022366,40.901081,0.156846,719.688474,0.000335,0.003982,0.000275,0.008757,0.00069,0.05,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",1,C4.2_WD1_F1-R1
E1,40.0,4.0,0.034717,20.0,0.641054,4.219546,0.022366,40.901081,0.156846,719.688474,0.000335,0.003982,0.000275,0.008757,0.00069,0.05,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",2,C4.2_WD1_F1-R2


## Process measurement files

Read measurements file:

In [9]:
df_600 = pd.read_excel(user_params['measurement_file'], sheet_name='600', index_col=0)
df_340 = pd.read_excel(user_params['measurement_file'], sheet_name='340', index_col=0)

# df_600

In [10]:
df_340

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,0.112,0.1805,0.5506,0.5644,0.5434,0.5387,0.6933,0.5459,0.1046,0.1001,0.0991,0.0969
B,0.1078,0.1441,0.5305,0.5299,0.533,0.5236,0.6787,0.5396,0.1006,0.098,0.0962,0.092
C,0.1015,0.1398,0.5318,0.5394,0.5172,0.5207,0.6717,0.5478,0.0925,0.0917,0.0895,0.0869
D,0.1027,0.5253,0.5656,0.5687,0.5357,0.5078,0.6986,0.3473,0.0853,0.0845,0.0831,0.0788
E,0.1055,0.5336,0.5929,0.5988,0.5711,0.5328,0.705,0.3423,0.0803,0.0788,0.0783,0.073
F,0.1051,0.5574,0.5906,0.5998,0.5595,0.5511,0.7114,0.3497,0.0767,0.0752,0.0732,0.0707
G,0.0713,0.0731,0.0773,0.077,0.0801,0.0774,0.0771,0.0785,0.0758,0.0738,0.0711,0.0692
H,0.0709,0.0718,0.0741,0.0749,0.0745,0.0761,0.0735,0.0736,0.0748,0.0733,0.0718,0.0697


### Process OD600

Normalize the data to the control well A9 value (water content):

In [11]:
zero_value_600 = df_600.at['A', 9]
df_600.loc[:, df_600.columns] -= zero_value_600
df_600

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
A,0.0231,0.2432,1.0067,0.9636,0.9857,0.7687,0.7358,0.9468,0.0,0.0087,0.0091,0.0084
B,0.0175,0.1276,0.9933,0.9424,0.9896,0.7122,0.6917,0.9731,0.006,0.0047,0.0053,0.0041
C,0.0137,0.1279,1.0078,0.9598,0.983,0.6971,0.645,0.9643,-0.0008,0.0003,-0.0009,-0.0005
D,0.0427,1.0016,0.9152,0.9149,0.9507,0.4766,0.7193,0.0136,-0.0065,-0.0065,-0.0065,-0.0072
E,0.0594,1.0144,0.9274,0.9189,0.9532,0.5768,0.6967,0.0084,-0.0108,-0.0113,-0.011,-0.0119
F,0.0654,1.0369,0.9398,0.9232,0.9691,0.8153,0.7061,0.006,-0.0139,-0.0141,-0.0145,-0.0146
G,-0.015,-0.0146,-0.0143,-0.0142,-0.0143,-0.0145,-0.0144,-0.0146,-0.0147,-0.0149,-0.0152,-0.0156
H,-0.0137,-0.0136,-0.0138,-0.0141,-0.0134,-0.0136,-0.0142,-0.0144,-0.0141,-0.0139,-0.0143,-0.0147


Set negative values to zero, multiply the values by 10 to account for 10x dilution and keep only 8 columns and 6 rows:

In [12]:
df_600[df_600 < 0] = 0
df_600.loc[:, df_600.columns] *= 10
df_600 = df_600.iloc[:6,:8]
df_600

Unnamed: 0,1,2,3,4,5,6,7,8
A,0.231,2.432,10.067,9.636,9.857,7.687,7.358,9.468
B,0.175,1.276,9.933,9.424,9.896,7.122,6.917,9.731
C,0.137,1.279,10.078,9.598,9.83,6.971,6.45,9.643
D,0.427,10.016,9.152,9.149,9.507,4.766,7.193,0.136
E,0.594,10.144,9.274,9.189,9.532,5.768,6.967,0.084
F,0.654,10.369,9.398,9.232,9.691,8.153,7.061,0.06


### Process OD340

Do the same for OD340, except of multiplication by 10, as the OD340 measurements were taken from non-diluted samples:

In [13]:
zero_value_340 = df_340.at['A', 9]
df_340.loc[:, df_340.columns] -= zero_value_340
df_340[df_340 < 0] = 0
df_340 = df_340.iloc[:6,:8]
df_340

Unnamed: 0,1,2,3,4,5,6,7,8
A,0.0074,0.0759,0.446,0.4598,0.4388,0.4341,0.5887,0.4413
B,0.0032,0.0395,0.4259,0.4253,0.4284,0.419,0.5741,0.435
C,0.0,0.0352,0.4272,0.4348,0.4126,0.4161,0.5671,0.4432
D,0.0,0.4207,0.461,0.4641,0.4311,0.4032,0.594,0.2427
E,0.0009,0.429,0.4883,0.4942,0.4665,0.4282,0.6004,0.2377
F,0.0005,0.4528,0.486,0.4952,0.4549,0.4465,0.6068,0.2451


Transform wide to long format, matching the order of wells in index to the one from `df`:

In [14]:
if column_order:
    indfcn = lambda x: f'{x.name}{int(x["variable"])}'
    # df_600 = df_600.melt(ignore_index=False, value_name='OD600')
    df_340 = df_340.melt(ignore_index=False, value_name='OD340')
else:
    indfcn = lambda x: f'{x["variable"]}{(x.name)}'
    # df_600 = df_600.T.melt(ignore_index=False, value_name='OD600')
    df_340 = df_340.T.melt(ignore_index=False, value_name='OD340')

# df_600['Well'] = df_600.apply(indfcn, axis=1)
df_340['Well'] = df_340.apply(indfcn, axis=1)

# df_600.index = df_600['Well']
df_340.index = df_340['Well']

# df_600.drop(columns=['variable', 'Well'], inplace=True)
df_340.drop(columns=['variable', 'Well'], inplace=True)

# df_600.head()

Add measurements to the main dataframe:

In [15]:
# measurOD600 = user_params['protocol_name'][0]
measurOD340 = user_params['protocol_name'][1]
# df[measurOD600] = df_600
df[measurOD340] = df_340
df.head(2)


Unnamed: 0_level_0,MOPS[mM],Tricine[mM],H3BO3[mM],Glucose[mM],K2SO4[mM],K2HPO4[mM],FeSO4[mM],NH4Cl[mM],MgCl2[mM],NaCl[mM],(NH4)6Mo7O24[mM],CoCl2[mM],CuSO4[mM],MnSO4[mM],ZnSO4[mM],Kan[g/l],Line Description,Replicate,Line Name,OD340
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
A1,40.0,4.0,0.045186,20.0,0.590637,4.413947,0.023892,47.480272,0.033098,651.233703,0.000541,0.00301,0.000536,0.007166,0.001252,0.05,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",1,C4.2_WA1_C1-R1,0.0074
B1,40.0,4.0,0.045186,20.0,0.590637,4.413947,0.023892,47.480272,0.033098,651.233703,0.000541,0.00301,0.000536,0.007166,0.001252,0.05,"MOPS[mM]: 40.000000, Tricine[mM]: 4.000000, H3...",2,C4.2_WA1_C1-R2,0.0032


## Create EDD Experiment Description File

In [16]:
df['Media'] = user_params['media']
df['Part ID'] = user_params['part_id']
df['Culture Volume'] = user_params['culture_volume']
df['Flask Volume'] = user_params['well_volume']
df['Growth Temperature'] = user_params['temperature']
df['Shaking speed'] = user_params['shaking_speed']
# df['Starting OD'] =
# df['Replicate Count'] = 24


In [17]:
# Invalid columns for now in EDD
# df['Humidity[%]'] = user_params['humidity']
# df['Plate'] = user_params['plate']

In [18]:
exp_descr_file = f'{user_params["output_file_path"]}/edd_experiment_description.xlsx'
df[['Line Name',
    'Line Description',
    'Part ID',
    'Media',
    'Culture Volume',
    'Flask Volume',
    'Growth Temperature',
    'Shaking speed',
]].to_excel(exp_descr_file, index=False)

## Create EDD Measurement File

OD600

In [19]:
# measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD600}.xlsx'
# df['Measurement Type'] = 'Optical Density'

# df['Time'] = user_params['time_point']
# df['Value'] = df[measurOD600]
# df['Units'] = 'n/a'
# df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)

OD340

In [20]:
# measurement_file = f'{user_params["output_file_path"]}/edd_{measurOD340}.xlsx'

# df['Time'] = user_params['time_point']
# df['Value'] = df[measurOD340]
# df['Units'] = 'n/a'
# df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)

OD

In [21]:
measurement_file = f'{user_params["output_file_path"]}/edd_OD.xlsx'
df['Measurement Type'] = measurOD340

df['Time'] = user_params['time_point']
df['Value'] = df[measurOD340]
df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)

In [22]:
df['Kan[g/l]']

Well
A1    0.05
B1    0.05
C1    0.05
D1    0.05
E1    0.05
F1    0.05
A2    0.05
B2    0.05
C2    0.05
D2    0.05
E2    0.05
F2    0.05
A3    0.05
B3    0.05
C3    0.05
D3    0.05
E3    0.05
F3    0.05
A4    0.05
B4    0.05
C4    0.05
D4    0.05
E4    0.05
F4    0.05
A5    0.05
B5    0.05
C5    0.05
D5    0.05
E5    0.05
F5    0.05
A6    0.05
B6    0.05
C6    0.05
D6    0.05
E6    0.05
F6    0.05
A7    0.05
B7    0.05
C7    0.05
D7    0.05
E7    0.05
F7    0.05
A8    0.05
B8    0.05
C8    0.05
D8    0.05
E8    0.05
F8    0.05
Name: Kan[g/l], dtype: float64