In [1]:
import numpy as np
import pandas as pd
import os

# Loop over all parameter combinations and process raw simulation data

In [2]:
# folder of raw simulation data
folders = ["example_data_from_hABM/"] # the location of the real simulation data can be found in the "available data" part of the corresponding paper 

# parameter names that were screened over
parameter_names = ["rupAM", "t0sAEC", "rupAEC1", "icNum"]

# Assuming the script is run from the parent directory which has n parameter directories
for folder in folders:
    dirlis = os.listdir(folder)
    dirlis.sort()
    rows = []
    sim = 0
    # Looping over all simulations
    for dir_ in dirlis:
        print(sim, "/", len(dirlis), end=", ")
        dir_name = folder + dir_
        if os.path.isdir(dir_name):
            # Form the file path for the csv file
            if (dir_name[0] != "."):
                file_path = os.path.join(dir_name, "measurements", "agent-statistics.csv")
                parameter_string = dir_name.split("/")[1]
                
                # retrieve simulation parameters from folder name
                parameter_values = [sim]
                for p in parameter_names:
                    p_value = float(parameter_string.split(p)[1].split("_")[0])
                    parameter_values.append(p_value)
                
                # read the measurement file and look at fungal cells
                df = pd.read_csv(file_path, sep=";").query('agent == "FungalCellAlveolus"')
                times = df["time"].unique()
                
                # save the proportion of conidia that are on AEC1 or AEC2 at time 0
                on10 = 0 if len(df.query("time == 0").query('state == "FungalOnAEC1"')) == 0 else df.query("time == 0").query('state == "FungalOnAEC1"').value_counts("state")[0] 
                on20 = 0 if len(df.query("time == 0").query('state == "FungalOnAEC2"')) == 0 else df.query("time == 0").query('state == "FungalOnAEC2"').value_counts("state")[0]
                sw10 = 0 if len(df.query("time == 0").query('state == "OnAEC1Swelling"')) == 0 else df.query("time == 0").query('state == "OnAEC1Swelling"').value_counts("state")[0] 
                sw20 = 0 if len(df.query("time == 0").query('state == "OnAEC2Swelling"')) == 0 else df.query("time == 0").query('state == "OnAEC2Swelling"').value_counts("state")[0] 
                on1_t0 = on10 + sw10
                on2_t0 = on20 + sw20
                
                # init variables
                all_rows = []
                max1, max2 = 0, 0
                currRad = 0
                
                # loop over all time steps (ever 30th minute)
                for t in times:

                    # retrieve proportion of not taken up conidia
                    on1 = 0 if len(df.query("time == @t").query('state == "FungalOnAEC1"')) == 0 else df.query("time == @t").query('state == "FungalOnAEC1"').value_counts("state")[0] 
                    on2 = 0 if len(df.query("time == @t").query('state == "FungalOnAEC2"')) == 0 else df.query("time == @t").query('state == "FungalOnAEC2"').value_counts("state")[0]
                    sw1 = 0 if len(df.query("time == @t").query('state == "OnAEC1Swelling"')) == 0 else df.query("time == @t").query('state == "OnAEC1Swelling"').value_counts("state")[0]
                    sw2 = 0 if len(df.query("time == @t").query('state == "OnAEC2Swelling"')) == 0 else df.query("time == @t").query('state == "OnAEC2Swelling"').value_counts("state")[0] 
                    sw1 += on1
                    sw2 += on2
                    on1 = 0
                    on2 = 0

                    # retrieve proportion of taken up conidia by AEC1
                    k1 = 0 if len(df.query("time == @t").query('state == "KilledByAEC1"')) == 0 else df.query("time == @t").query('state == "KilledByAEC1"').value_counts("state")[0]
                    u1 = 0 if len(df.query("time == @t").query('state == "UptakenByAEC1"')) == 0 else df.query("time == @t").query('state == "UptakenByAEC1"').value_counts("state")[0]
                    if (k1+u1 > max1):
                        max1 = k1+u1
                    else:
                        u1 = max1
                        k1 = 0

                    # retrieve proportion of taken up conidia by AEC2
                    k2 = 0 if len(df.query("time == @t").query('state == "KilledByAEC2"')) == 0 else df.query("time == @t").query('state == "KilledByAEC2"').value_counts("state")[0]
                    u2 = 0 if len(df.query("time == @t").query('state == "UptakenByAEC2"')) == 0 else df.query("time == @t").query('state == "UptakenByAEC2"').value_counts("state")[0]
                    if (k2+u2 > max2):
                        max2 = k2+u2
                    else:
                        u2 = max2
                        k2 = 0    
                    
                    # retrieve proportion of taken up conidia by AM
                    uAM = (on1_t0 + on2_t0 - on1 - on2 - sw1 - sw2 - k1 - u1 - k2 - u2)
                    kAM = 0
                    
                    # retrieve average radius of conidia
                    meanRad = df.query("time == @t")["radius"].mean()
                    if (meanRad > currRad):
                        currRad = meanRad

                    # save all relevant values
                    totup = (u1 + u2 + k1 + k2 + uAM + kAM)/(on1_t0 + on2_t0)
                    ratios_values = [t, currRad, (sw1)/(on1_t0+on2_t0), (sw2)/(on1_t0+on2_t0), (sw1)/on1_t0, (sw2)/on2_t0, (u1+k1)/on1_t0, (u2+k2)/on2_t0, (uAM + kAM)/(on1_t0 + on2_t0), (u1 + u2 + k1 + k2)/(on1_t0 + on2_t0), totup]
    
                    ratios = []
                    for p in parameter_values:
                        ratios.append(p)

                    for val in ratios_values:
                        ratios.append(val)
                    
                    
                    rows.append(ratios)
                sim += 1
                
# create dataframe                    
df_all = pd.DataFrame(rows, columns=["run_id", "rupAM", "t0sAEC", "rupAEC1", "icNum", "Time", "Mean Radius", "On AEC1", "On AEC2", "On AEC1 rel", "On AEC2 rel", "Taken up by AEC1 rel", "Taken up by AEC2 rel", "Taken up by AMs", "Total taken up by AECs", "Total taken up"])          
df_all

0 / 3, 1 / 3, 2 / 3, 

Unnamed: 0,run_id,rupAM,t0sAEC,rupAEC1,icNum,Time,Mean Radius,On AEC1,On AEC2,On AEC1 rel,On AEC2 rel,Taken up by AEC1 rel,Taken up by AEC2 rel,Taken up by AMs,Total taken up by AECs,Total taken up
0,0,0.000685,0.0,0.000137,0.0,0,1.39,0.96,0.04,1.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0,0.000685,0.0,0.000137,0.0,30,1.39,0.94,0.04,0.979167,1.0,0.020833,0.0,0.0,0.02,0.02
2,0,0.000685,0.0,0.000137,0.0,60,1.403326,0.94,0.04,0.979167,1.0,0.020833,0.0,0.0,0.02,0.02
3,0,0.000685,0.0,0.000137,0.0,90,1.448266,0.94,0.04,0.979167,1.0,0.020833,0.0,0.0,0.02,0.02
4,0,0.000685,0.0,0.000137,0.0,120,1.561005,0.94,0.04,0.979167,1.0,0.020833,0.0,0.0,0.02,0.02
5,0,0.000685,0.0,0.000137,0.0,150,1.771955,0.94,0.04,0.979167,1.0,0.020833,0.0,0.0,0.02,0.02
6,0,0.000685,0.0,0.000137,0.0,180,2.001385,0.92,0.04,0.958333,1.0,0.041667,0.0,0.0,0.04,0.04
7,0,0.000685,0.0,0.000137,0.0,210,2.15703,0.92,0.04,0.958333,1.0,0.041667,0.0,0.0,0.04,0.04
8,0,0.000685,0.0,0.000137,0.0,240,2.215379,0.9,0.04,0.9375,1.0,0.0625,0.0,0.0,0.06,0.06
9,0,0.000685,0.0,0.000137,0.0,270,2.24016,0.9,0.04,0.9375,1.0,0.0625,0.0,0.0,0.06,0.06


In [3]:
df_all.to_csv("processed_data/example_processed_simulation_data.csv")