# Create EDD Study Files From Data

This notebook...

## Inputs and outputs

#### Required file to run this notebook:
   - `../data/flaviolin/DBTL0/target_concentrations_DBTL0_all.csv` - media designs
   
   - `data/indigoidine/cycle1_batch1_data.csv` - production data


#### File generated by running this notebook:
   - `experiment_description.csv`
   
   - `edd_cycleX_data.csv`
 
    
The files are stored in the user defined directory.

## Setup

Importing needed libraries:

In [1]:
import pandas as pd
# import openpyxl

## User parameters

In [2]:
CYCLE = 0

user_params = {
    'media_file': f'../data/flaviolin/DBTL{CYCLE}/target_concentrations_all.csv',  
    'product_file': '../data/indigoidine/cycle1_batch1_data.csv',
    'output_file_path': f'../data/flaviolin/DBTL{CYCLE}', # Folder for output files,
    'productID': 'CID:160478:Flaviolin',
    'protocol_name': 'OD350',
    'time_point': 48,
    } 

In [3]:
df = pd.read_csv(user_params['media_file'], index_col=0)
df.head()

Unnamed: 0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4
A1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
B1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
C1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
D1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001
E1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001


In [4]:
# Create Line Description
def create_media_description(series):
    description = ''
    for item, value in series.iteritems():
        description += f'{item}: {value:0.4f}, '
    return description[:-2]
    

In [5]:
df['Line Description'] = df.apply(create_media_description,axis=1)

In [6]:
#Add Metadata for Media and Replicates To Craft Line Names
reps = 24
num_media_designs = 2

lnfcn = lambda x: f'C{CYCLE}_W{x.name}_{x["Media"]}-R{x["Replicate"]}'
df['Media'] =     [media for media in ['biomek', 'manual'] for _ in range(reps)]
df['Replicate'] = [i+1 for _ in range(num_media_designs) for i in range(reps)]
df['Line Name'] = df.apply(lnfcn, axis=1)

In [7]:
# Append Data To Data Frame
production_df = pd.read_csv(user_params['product_file'])
production_df.head()

Unnamed: 0,Indigoidine [g/L]
0,0.039563
1,0.073779
2,0.082351
3,0.149748
4,0.204138


NOTE: Make sure the order of wells is correct

In [8]:
df['Media'] = 'MOPS'
# df['Part ID'] = ''
product = user_params['productID']
df[product] = production_df['Indigoidine [g/L]'].values
# df[['Line Name','Line Description','Part ID','Media']]


In [9]:
df.head(2)

Unnamed: 0,MOPS,Tricine,H3BO3,Glucose,K2SO4,K2HPO4,FeSO4,NH4Cl,MgCl2,NaCl,(NH4)6Mo7O24,CoCl2,CuSO4,MnSO4,ZnSO4,Line Description,Media,Replicate,Line Name,CID:160478:Flaviolin
A1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0040,...",MOPS,1,C0_WA1_biomek-R1,0.039563
B1,40,4,0.004,20,0.29,1.32,0.01,9.52,0.52,50,3e-05,0.0003,0.0001,0.0008,0.0001,"MOPS: 40.0000, Tricine: 4.0000, H3BO3: 0.0040,...",MOPS,2,C0_WB1_biomek-R2,0.073779


Create EDD Line Description File

In [10]:
df['Culture Volume'] = 15
# df['Shaking speed'] = 
df['Well Volume'] = 1500
df['Growth Temperature'] = 30
# df['Starting OD'] =
df['Plate'] = 'm2p_flower'
# df['Replicate Count']

In [13]:
exp_descr_file = f'{user_params["output_file_path"]}/experiment_description.xlsx'
df[['Line Name',
    'Line Description',
    'Media',
    'Culture Volume',
    'Well Volume',
    'Growth Temperature',
    'Plate']].to_excel(exp_descr_file, index=False)

Create EDD Measurement File

In [12]:
measurement_file = f'{user_params["output_file_path"]}/edd_data_{user_params["protocol_name"]}.xlsx'
# df['Protocol Name'] = user_params['protocol_name']
df['Measurement Type'] = product

df['Time'] = user_params['time_point']
df['Value'] = df[product]
df['Units'] = 'n/a'
df[['Line Name', 'Measurement Type', 'Time', 'Value', 'Units']].to_excel(measurement_file, index=False)