# Load the data

In [8]:
from pandas import read_excel
experiments = read_excel("../experiments.xlsx")
# growth_profiles = read_excel("growth_profiles.xlsx")

In [9]:
experimental_columns = ["experiment_id", "base_media", "additional_compounds", "strains", "data"]
experiments.insert(1, "short_ID", [f"Z{x}" for x in experiments.index])
experiments.head(5)

Unnamed: 0,experiment_id,short_ID,base_media,additional_compounds,strains,data
0,ecoli_5-malt_A,Z0,93465/base_media,cpd000179:5:0.5,ecoli,2022-05-10
1,50-ecoli_1-pf_5-malt_A,Z1,93465/base_media,cpd000179:5:0.5,ecoli:R50;pf:R1,2022-05-10
2,20-ecoli_1-pf_5-malt_A,Z2,93465/base_media,cpd000179:5:0.5,ecoli:R20;pf:R1,2022-05-10
3,10-ecoli_1-pf_5-malt_A,Z3,93465/base_media,cpd000179:5:0.5,ecoli:R10;pf:R1,2022-05-10
4,3-ecoli_1-pf_5-malt_A,Z4,93465/base_media,cpd000179:5:0.5,ecoli:R3;pf:R1,2022-05-10


# Recreate data

In [5]:
# define the dimensions of difference
base_media = "93465/1/1"  # !!! provide the appropriate permanent KBase ID
columns = 11; rows = 6
date = "10-05-22"
members = ["ecoli", "pf"]
species_abundances = {1:{"ecoli":0, "pf":1},
          2:{"ecoli":1, "pf":50},
          3:{"ecoli":1, "pf":20},
          4:{"ecoli":1, "pf":10},
          5:{"ecoli":1, "pf":3},
          6:{"ecoli":1, "pf":1},
          7:{"ecoli":3, "pf":1},
          8:{"ecoli":10, "pf":1},
          9:{"ecoli":20, "pf":1},
          10:{"ecoli":1, "pf":0},
          11:{"ecoli":0, "pf":0}
          }

carbon_sources = {'cpd00136':"4HB", 'cpd00179':"malt"}

species_identities_rows = {1:{"ecoli":"mRuby"},
                          2:{"ecoli":"ACS"},
                          3:{"ecoli":"mRuby"},
                          4:{"ecoli":"ACS"},
                          5:{"ecoli":"mRuby"},
                          6:{"ecoli":"ACS"}
                          }

row_concentrations = {'*':{'cpd00179':[5, 0.5]},
                      1:{"cpd00136":[0, 0]},
                      2:{"cpd00136":[0, 0]},
                      3:{"cpd00136":[1, 0.1]},
                      4:{"cpd00136":[1, 0.1]},
                      5:{"cpd00136":[4, 0.4]},
                      6:{"cpd00136":[4, 0.4]}
                     }

# construct the DataFrame
from pandas import DataFrame, set_option
constructed_experiments = DataFrame()
experiment_prefix = "Z"
constructed_experiments["short_code"] = [f"{experiment_prefix}{x+1}" for x in experiments.index]
constructed_experiments["base_media"] = [base_media]*(columns*rows)

## define the strains column
strains, additional_compounds, experiment_ids = [], [], []
trial_name_conversion = {}
count = 1
for row in range(1, rows+1):
    trial_letter = chr(ord("A")+row)
    composition = {}
    row_conc = []
    if '*' in row_concentrations:
        for met in row_concentrations['*']:
            row_conc.append(':'.join([met, str(row_concentrations['*'][met][0]), str(row_concentrations['*'][met][1])]))
    if row in row_concentrations:
        for met in row_concentrations[row]:
            if row_concentrations[row][met][0] > 0:
                row_conc.append(':'.join([met, str(row_concentrations[row][met][0]), str(row_concentrations[row][met][1])]))
    row_concentration = ';'.join(row_conc)
    for col in range(1, columns+1):
        additional_compounds.append(row_concentration)
        experiment_id = []
        for member in members:
            composition[member] = [member, f"r{species_abundances[col][member]}"]
            if member in species_identities_rows[row]:
                composition[member][0] += f"_{species_identities_rows[row][member]}"
            if int(composition[member][1][1:]) != 0:
                experiment_id.append(f"{composition[member][1]}_{composition[member][0]}")
            composition[member] = ':'.join(composition[member])
        strains.append(';'.join(composition[member] for member in members))
        for r in row_conc:
            met, init, end = r.split(':')
            experiment_id.append(f"{init}_{carbon_sources[met]}")
        experiment_id = '-'.join(experiment_id)
        experiment_ids.append(experiment_id)
        trial_name_conversion[trial_letter+str(col+1)] = [experiment_prefix+str(count), experiment_id]
        count +=1

constructed_experiments["additional_compounds"] = additional_compounds
constructed_experiments["strains"] = strains
constructed_experiments.insert(0,"experiment_ids", experiment_ids) 
constructed_experiments["date"] = [date]*(columns*rows)

ignore_experiments = ["Z12", "Z22", "Z56"]  # problem trials
constructed_experiments.drop(
    constructed_experiments.index[
        constructed_experiments["short_code"].isin(ignore_experiments)], inplace = True)
constructed_experiments.index = list(range(len(constructed_experiments["short_code"])))

set_option('display.max_rows', None)
display(constructed_experiments)
constructed_experiments.to_csv("experimental_metadata.csv")

Unnamed: 0,experiment_ids,short_code,base_media,additional_compounds,strains,date
0,r1_pf-5_malt,Z1,93465/1/1,cpd00179:5:0.5,ecoli_mRuby:r0;pf:r1,10-05-22
1,r1_ecoli_mRuby-r50_pf-5_malt,Z2,93465/1/1,cpd00179:5:0.5,ecoli_mRuby:r1;pf:r50,10-05-22
2,r1_ecoli_mRuby-r20_pf-5_malt,Z3,93465/1/1,cpd00179:5:0.5,ecoli_mRuby:r1;pf:r20,10-05-22
3,r1_ecoli_mRuby-r10_pf-5_malt,Z4,93465/1/1,cpd00179:5:0.5,ecoli_mRuby:r1;pf:r10,10-05-22
4,r1_ecoli_mRuby-r3_pf-5_malt,Z5,93465/1/1,cpd00179:5:0.5,ecoli_mRuby:r1;pf:r3,10-05-22
5,r1_ecoli_mRuby-r1_pf-5_malt,Z6,93465/1/1,cpd00179:5:0.5,ecoli_mRuby:r1;pf:r1,10-05-22
6,r3_ecoli_mRuby-r1_pf-5_malt,Z7,93465/1/1,cpd00179:5:0.5,ecoli_mRuby:r3;pf:r1,10-05-22
7,r10_ecoli_mRuby-r1_pf-5_malt,Z8,93465/1/1,cpd00179:5:0.5,ecoli_mRuby:r10;pf:r1,10-05-22
8,r20_ecoli_mRuby-r1_pf-5_malt,Z9,93465/1/1,cpd00179:5:0.5,ecoli_mRuby:r20;pf:r1,10-05-22
9,r1_ecoli_mRuby-5_malt,Z10,93465/1/1,cpd00179:5:0.5,ecoli_mRuby:r1;pf:r0,10-05-22


In [6]:
import json
with open("trial_conversions.json", 'w') as out:
    json.dump(trial_name_conversion, out, indent=3)

# growth data 

In [16]:
from pandas import ExcelFile

def isnumber(string):
    try:
        float(string)
    except:
        return False
    return True

dataframes = {}
raw_data = ExcelFile("../PF-EC-ACS 4-HB 5-10-22.xlsx")
worksheets = {"Raw OD(590)": "OD", "mNeongreen (PF)": "pf", "mRuby (mRuby EC)": "ecoli"}
for org_sheet, name in worksheets.items():
    sheet = org_sheet.replace(' ', '_')
    dataframes[sheet] = raw_data.parse(org_sheet)
    dataframes[sheet].columns = dataframes[sheet].iloc[6]
    dataframes[sheet] = dataframes[sheet].drop(dataframes[sheet].index[:7])

numerical_columns = []
dfs = {}
for sheet in dataframes:
    times, values, experiments, short_codes = [], [], [], [] 
    df = dataframes[sheet]
    for trial in set(df["Well"]):
        for index, row in df[df["Well"] == trial].iterrows():
            if not numerical_columns:
                numerical_columns = [x for x in row.index if isnumber(x)]
            if row["Cycle #"] == "Time (s)":
                times.extend([row[x] for x in numerical_columns])
                experiments.extend([trial_name_conversion[trial][1]]*len(numerical_columns))  # arbitrarily placed in one conditional block to prevent doubling the contents
                short_codes.extend([trial_name_conversion[trial][0]]*len(numerical_columns))  # arbitrarily placed in one conditional block to prevent doubling the contents
            if row["Cycle #"] == "Result":
                values.extend([row[x] for x in numerical_columns])
                
    dfs[sheet] = DataFrame({"Time (s)":times, "values":values, "experiment_IDs":experiments, "short_codes": short_codes})
    dfs[sheet].index = dfs[sheet]["short_codes"]
    del dfs[sheet]["short_codes"]
    dfs[sheet].to_csv(sheet+".csv")

In [25]:
for sheet, df in dfs.items():
    display(df.iloc[[-2,-1]])

Unnamed: 0_level_0,Time (s),values,experiment_IDs
short_codes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Z56,97220.589,0.348245,r1_pf-5_malt-4_4HB
Z56,97813.16,0.349189,r1_pf-5_malt-4_4HB


Unnamed: 0_level_0,Time (s),values,experiment_IDs
short_codes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Z56,97189.946,167037.0,r1_pf-5_malt-4_4HB
Z56,97782.508,162960.0,r1_pf-5_malt-4_4HB


Unnamed: 0_level_0,Time (s),values,experiment_IDs
short_codes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Z56,97199.544,147.0,r1_pf-5_malt-4_4HB
Z56,97783.5,140.0,r1_pf-5_malt-4_4HB
