In [None]:
import numpy as np
import glob 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
from functools import reduce


In [None]:
def SingleClassAnalysis(chip,class_name):
    chip = chip.rename(columns={class_name: 'neg'})
    
    chip.neg=chip.neg.replace({1: 0, 0: 1}) # we switch so we can simply use the mean
    prob_neg=chip.neg.mean()

 
    #now all chip information
    out=pd.DataFrame({'prob_neg_label':prob_neg,'prob_pos_label':1-prob_neg,'total_label':len(chip.neg.values)},index=[0])    
    out=out.add_suffix(f'_{class_name}')
    
    return out

def AnalyseChipClasses(chip,classes=['positive','rods','filaments','planktonic','clumped']):
    
    collect=[]
    for c in classes:
            collect.append(SingleClassAnalysis(chip[[c]],c))

    df = reduce(lambda df1,df2: df1.join(df2), collect)
    df=df.reset_index(drop=True)
    df['concentration']=chip['concentration'].values[0] # we groupy chip when we apply this function so only one concentration
    return df

# Extract class probabilities from labeles

In [None]:
Folder='PredictedLabelsTables/' 

paths=glob.glob(f'{Folder}*_trained.csv')

## unconditioned

In [None]:
for p in paths:

    data = pd.read_csv(f'{p}',index_col=0)
    data['n_cells_1']=data.n_cells_1.astype(int)
    data['n_cells_2']=data.n_cells_2.astype(int)
    classes=['rods','filaments','planktonic','clumped']
    for c in classes:
        data[c]=data[c].astype(int)
    chip_info=[]
    chip_info=data.groupby(['chip']).apply(lambda x: AnalyseChipClasses(x),include_groups=False)
    chip_info=chip_info.droplevel(level=1)
    chip_info=chip_info.reset_index()
    Filename=p.split('/')[1].split('_trained')[0]

    chip_info.to_csv(f'{Folder}chip_info_{Filename}_prob.csv')
    print(f'save done {p}',flush=True)

## positive i.e. observing succesful growth and morphology: in unification we compute the actualy conditional probability

In [None]:
for p in paths:

    data = pd.read_csv(f'{p}',index_col=0)
    data['n_cells_1']=data.n_cells_1.astype(int)
    data['n_cells_2']=data.n_cells_2.astype(int)
    classes=['rods','filaments','planktonic','clumped']
    for c in classes:
        data[c]=data[[c,'positive']].all(axis=1) # positive and morphology
        data[c]=data[c].astype(int)

    chip_info=[]
    chip_info=data.groupby(['chip']).apply(lambda x: AnalyseChipClasses(x),include_groups=False)
    chip_info=chip_info.droplevel(level=1)
    chip_info=chip_info.reset_index()
    Filename=p.split('/')[1].split('_trained')[0]

    chip_info.to_csv(f'{Folder}chip_info_{Filename}_prob_and_pos.csv')
    print(f'save done {p}',flush=True)

# Unify tables: warning overwrites provided data
this creastes the table present in the github repository


In [None]:
def unionise_datasets(data, dates, date_index):
    """Combine datasets"""
    result = []
    
    for i, df in enumerate(data):
        df_copy = df.copy()
        df_copy['dataset'] = i
        df_copy['date'] = dates[i]
        df_copy['date_index'] = date_index[i]
        result.append(df_copy)


    out=pd.concat(result, ignore_index=True)
    out = out.sort_values('concentration').reset_index(drop=True)
    return out

In [None]:
Folder='PredictedLabelsTables/' 

In [None]:
#-------------------------------------Genta ---------------------------------------------------------------------
dates=['20221101','20221101','20230110','20230110'] # we add this metadata to keep track of the experiments
date_index=[1,2,1,2]


data =[]


data.append(pd.read_csv(f'{Folder}chip_info_20221101-ecoli-genta1_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20221101-ecoli-genta2_prob.csv',index_col=0))

data.append(pd.read_csv(f'{Folder}chip_info_20230110-e.coli-genta_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20230110-e.coli-genta-2_prob.csv',index_col=0))

df=  unionise_datasets(data,dates,date_index)
df.to_csv(f'tables/probability_tables/Gentamicin.csv')  #warning overwrites provided data

data_and_pos=[]

data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20221101-ecoli-genta1_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20221101-ecoli-genta2_prob_and_pos.csv',index_col=0))

data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20230110-e.coli-genta_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20230110-e.coli-genta-2_prob_and_pos.csv',index_col=0))

df=  unionise_datasets(data_and_pos,dates,date_index)

# calculate condionals
for l  in ['rods','planktonic','filaments','clumped']:
    df[f'prob_pos_label_{l}']=df[f'prob_pos_label_{l}']/df[f'prob_pos_label_positive']
    
df.to_csv(f'tables/probability_tables/Gentamicin_cond_pos.csv')  #warning overwrites provided data

In [None]:
#-------------------------------------Tetra---------------------------------------------------------------------
dates=['20230315','20230315','20230404','20230404']
date_index=[1,2,1,2]

data=[]


data.append(pd.read_csv(f'{Folder}chip_info_20230315-ecoli_set-1_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20230315-ecoli_set-2_prob.csv',index_col=0))

data.append(pd.read_csv(f'{Folder}chip_info_20230404-ecoli-Tetracycline_set1_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20230404-ecoli-Tetracycline_set2_prob.csv',index_col=0))

df=unionise_datasets(data,dates,date_index)
df.to_csv(f'tables/probability_tables/Tetracycline.csv')  #warning overwrites provided data


data_and_pos=[]


data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20230315-ecoli_set-1_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20230315-ecoli_set-2_prob_and_pos.csv',index_col=0))

data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20230404-ecoli-Tetracycline_set1_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20230404-ecoli-Tetracycline_set2_prob_and_pos.csv',index_col=0))

df=unionise_datasets(data_and_pos,dates,date_index)

# calculate condionals
for l  in ['rods','planktonic','filaments','clumped']:
    df[f'prob_pos_label_{l}']=df[f'prob_pos_label_{l}']/df[f'prob_pos_label_positive']
    
df.to_csv(f'tables/probability_tables/Tetracycline_cond_pos.csv')  #warning overwrites provided data



In [None]:
#-------------------------------------Cipro---------------------------------------------------------------------
dates=['20220531','20220531','20230131','20230131']
date_index=[1,2,1,2]

data=[]


data.append(pd.read_csv(f'{Folder}chip_info_20220531-MIC-e.coli-cipro_1stexp_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20220531-MIC-e.coli-cipro_2ndexp_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20230131-ecoli-cipro-1_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20230131-ecoli-cipro-2_prob.csv',index_col=0))

df=unionise_datasets(data,dates,date_index)
df.to_csv(f'tables/probability_tables/Ciprofloxacin.csv')  #warning overwrites provided data


data_and_pos=[]


data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20220531-MIC-e.coli-cipro_1stexp_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20220531-MIC-e.coli-cipro_2ndexp_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20230131-ecoli-cipro-1_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20230131-ecoli-cipro-2_prob.csv',index_col=0))



df=unionise_datasets(data_and_pos,dates,date_index)

# calculate condionals
for l  in ['rods','planktonic','filaments','clumped']:
    df[f'prob_pos_label_{l}']=df[f'prob_pos_label_{l}']/df[f'prob_pos_label_positive']
    
df.to_csv(f'tables/probability_tables/Ciprofloxacin_cond_pos.csv')  #warning overwrites provided data

In [None]:
#-------------------------------------AMP---------------------------------------------------------------------
dates=['20220614','20220614']
date_index=[1,2]

data=[]


data.append(pd.read_csv(f'{Folder}chip_info_20220614-MIC-e.coli-amp-LB-1_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20220614-MIC-e.coli-amp-LB-2_prob.csv',index_col=0))


df=unionise_datasets(data,dates,date_index)
df.to_csv(f'tables/probability_tables/Ampicilin.csv')  #warning overwrites provided data

data_and_pos=[]

data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20220614-MIC-e.coli-amp-LB-1_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20220614-MIC-e.coli-amp-LB-2_prob_and_pos.csv',index_col=0))


df=unionise_datasets(data_and_pos,dates,date_index)

# calculate condionals
for l  in ['rods','planktonic','filaments','clumped']:
    df[f'prob_pos_label_{l}']=df[f'prob_pos_label_{l}']/df[f'prob_pos_label_positive']
    
df.to_csv(f'tables/probability_tables/Ampicilin_cond_pos.csv')  #warning overwrites provided data





In [None]:
#-------------------------------------CHP---------------------------------------------------------------------
dates=['20220524','20220602','20220628','20220628','20221012','20221013','20221031','20221031','20221122','20230111','20230111','20230221','20230313','20230313']
date_index=[1,1,1,2,1,1,1,2,1,1,2,1,1,2]

data =[]


data.append(pd.read_csv(f'{Folder}chip_info_20220524-MIC-e.coli-chp-LB_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20220602-MIC-e.coli-chp-LB_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20220628-MIC-e.coli-chp-LB-1_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20220628-MIC-e.coli-chp-LB-2_prob.csv',index_col=0))

data.append(pd.read_csv(f'{Folder}chip_info_20221012-ecoli-chp_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20221013-ecoli-chp_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20221031-ecoli-chp1_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20221031-ecoli-chp2_prob.csv',index_col=0)) # A lot of empty wells which becomes positive (more than 50%) ==> Data not trustworthy for 0ug and 2ug, the rest all dead so we can't say much
data.append(pd.read_csv(f'{Folder}chip_info_20221122-ecoli-chp_prob.csv',index_col=0))

data.append(pd.read_csv(f'{Folder}chip_info_20230111-ecoli-chp_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20230111-ecoli-chp-2_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20230221-ecoli-chp-1_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20230313-ecoli-chp-1_prob.csv',index_col=0))
data.append(pd.read_csv(f'{Folder}chip_info_20230313-ecoli-chp-2_prob.csv',index_col=0))

df= unionise_datasets(data,dates,date_index)
df.to_csv(f'tables/probability_tables/Chloramphenicol.csv')  #warning overwrites provided data

data_and_pos=[]

data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20220524-MIC-e.coli-chp-LB_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20220602-MIC-e.coli-chp-LB_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20220628-MIC-e.coli-chp-LB-1_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20220628-MIC-e.coli-chp-LB-2_prob_and_pos.csv',index_col=0))

data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20221012-ecoli-chp_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20221013-ecoli-chp_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20221031-ecoli-chp1_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20221031-ecoli-chp2_prob_and_pos.csv',index_col=0)) # A lot of empty wells which becomes positive (more than 50%) ==> Data not trustworthy for 0ug and 2ug, the rest all dead so we can't say much
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20221122-ecoli-chp_prob_and_pos.csv',index_col=0))

data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20230111-ecoli-chp_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20230111-ecoli-chp-2_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20230221-ecoli-chp-1_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20230313-ecoli-chp-1_prob_and_pos.csv',index_col=0))
data_and_pos.append(pd.read_csv(f'{Folder}chip_info_20230313-ecoli-chp-2_prob_and_pos.csv',index_col=0))



df= unionise_datasets(data_and_pos,dates,date_index)
# calculate condionals
for l  in ['rods','planktonic','filaments','clumped']:
    df[f'prob_pos_label_{l}']=df[f'prob_pos_label_{l}']/df[f'prob_pos_label_positive']
    
df.to_csv(f'tables/probability_tables/Chloramphenicol_cond_pos.csv')  #warning overwrites provided data