In [1]:
# import plotting, io, stats, linalg libraries
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from scipy.integrate import odeint, solve_ivp
from scipy.stats import linregress
import scipy.io as sio
import pandas as pd

# import DOE library
from doepy import build
import itertools
from scipy.stats.qmc import Sobol

# set plot parameters
params = {'legend.fontsize': 18,
          'figure.figsize': (16, 12),
          'axes.labelsize': 24,
          'axes.titlesize':24,
          'axes.linewidth':5,
          'xtick.labelsize':20,
          'ytick.labelsize':20}
plt.rcParams.update(params)
plt.style.use('seaborn-colorblind')
plt.rcParams['pdf.fonttype'] = 42

np.random.seed(123)



# Define simulation parameters

In [2]:
# number of species in gLV model
numspecies=12

# define names of measured values
species_names = ['s'+str(i+1) for i in range(numspecies)]

# std. dev. of Gaussian noise to add to data 
noise = 0.01

# total time to simulate 
TF = 16

# Generate random ground truth model parameters

In [3]:
#Randomly create the parameters that define the system
#Normally distributed aij parameters with strictly negative diagonal
#These parameter statistics were calculated from the parameter set in Venturelli et al, Molecular Systems Biology, 2018
#Note that these statistics frequently give me unbounded growth for pairs of species (positive feedback loops). If you make mu_aij more negative, this happens less frequently.
mu_aii=-1.5
sigma_aii=0.25
mu_aij=-0.22
sigma_aij=0.33
params_ii=np.random.normal(mu_aii,sigma_aii,numspecies)
params_ij=np.random.normal(mu_aij,sigma_aij,numspecies**2-numspecies)
A=np.zeros((numspecies,numspecies))
k=0
l=0
for i in range(numspecies):
    for j in range(numspecies):
        if i==j:
            A[i,j]=-abs(params_ii[l])
            l+=1
        else:
            A[i,j]=params_ij[k]
            k+=1
#Normally distributed basal growth rates
mu_r = .36
sigma_r=0.16
r= np.random.normal(mu_r,sigma_r,numspecies)
for k in range(len(r)):
    if r[k]<0:
        r[k]=abs(r[k])
        
# define gLV ODE model
def runGLV(x, r, A, t_eval):
    # define system of equations
    def system(t, x, r, A):
        # derivative of x (gLV equation)
        return x * (r + A@x)
    # solve system
    soln = solve_ivp(system, (0, t_eval[-1]), x, t_eval=t_eval, args=(np.vstack(r), A), 
                     method='RK45', vectorized=True)
    #y = odeint(system, x, t_eval, args=(r, A))
    #    return t_eval, y
    return soln.t, soln.y.T

# Initial experiment (mono culture)

In [4]:
# mono culture design matrix
X = np.eye(numspecies)

# Number of time points
NT = 16

# init data matrix
N_samples = X.shape[0]
tspan = (0, TF)
teval = np.linspace(0,tspan[-1], NT)
D = np.zeros([X.shape[0]*len(teval), 1+numspecies])

for i, x in enumerate(X):
    # solve
    IC = x*.1
    t, y = runGLV(IC, r, A, teval)

    # add noise to y 
    y[1:] = y[1:] + noise*np.random.randn(y.shape[0]-1, y.shape[1])
    
    # make sure initially zero species stay zero regardless of noise
    y = np.einsum("j,ij-> ij", np.array(IC > 0, float), y)
    y = np.clip(y, 0, np.inf)
    
    # store data 
    D[i*len(teval):(i+1)*len(teval), 1:] = y

# save data to dataframe
time = list(teval)*X.shape[0]

all_treatments = []
for i,x in enumerate(X):
    if sum(x>0) == 1:
        exp_name = f"mono_exp_{i+1}"
    else: 
        exp_name = f"exp_{i+1}"
    for _ in range(len(teval)):
        all_treatments.append(exp_name)

# determine names of unique treatments
unique_treatments = np.unique(all_treatments)        

df = pd.DataFrame()
df['Treatments'] = all_treatments
df['Time'] = time

# store data in dataframe
for j,s in enumerate(species_names):
    df[s] = D[:, j+1] 

# save
df.to_csv(f"init_gLV_data.csv", index=False)
df

Unnamed: 0,Treatments,Time,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12
0,mono_exp_1,0.000000,0.100000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
1,mono_exp_1,1.066667,0.135485,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
2,mono_exp_1,2.133333,0.160333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
3,mono_exp_1,3.200000,0.186676,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
4,mono_exp_1,4.266667,0.208772,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,mono_exp_12,11.733333,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.239874
188,mono_exp_12,12.800000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.258659
189,mono_exp_12,13.866667,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.237723
190,mono_exp_12,14.933333,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.255115


# Design matrix with all possible community combinations

In [5]:
# create dataframe with measured species at specified time intervals
# create matrix of all possible communities
Xlist = [np.reshape(np.array(i), (1, numspecies)) for i in itertools.product([0, 1], repeat = numspecies)]
# remove all zeros community
X = np.squeeze(np.array(Xlist)[1:], 1)
X.shape

(4095, 12)

# High-resolution (for sequencing)

In [6]:
# high fidelity data
NT = 2

# init data matrix
N_samples = X.shape[0]
tspan = (0, TF)
teval = np.linspace(0,tspan[-1], NT)
D = np.zeros([X.shape[0]*len(teval), numspecies])

for i, x in enumerate(X):
    # solve
    IC = x*.1
    t, y = runGLV(IC, r, A, teval)

    # add noise to y 
    y[1:] = y[1:] + noise*np.random.randn(y.shape[0]-1, y.shape[1])
    
    # make sure initially zero species stay zero regardless of noise
    y = np.einsum("j,ij-> ij", np.array(IC > 0, float), y)
    y = np.clip(y, 0, np.inf)

    # store data 
    D[i*len(teval):(i+1)*len(teval)] = y

# save data to dataframe
time = list(teval)*X.shape[0]

all_treatments = []
for i,x in enumerate(X):
    if sum(x>0) == 1:
        exp_name = f"mono_exp_{i+1}"
    else: 
        exp_name = f"exp_{i+1}"
    for _ in range(len(teval)):
        all_treatments.append(exp_name)

# determine names of unique treatments
unique_treatments = np.unique(all_treatments)        

df = pd.DataFrame()
df['Treatments'] = all_treatments
df['Time'] = time

# add noise to data
for j,s in enumerate(species_names):
    df[s] = D[:, j]

# save
df.to_csv(f"hr_gLV_data.csv", index=False)
df

Unnamed: 0,Treatments,Time,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12
0,mono_exp_1,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000
1,mono_exp_1,16.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.252116
2,mono_exp_2,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,0.000000
3,mono_exp_2,16.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.116708,0.000000
4,exp_3,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,0.100000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8185,exp_4093,16.0,0.136650,0.006297,0.132832,0.287159,0.189537,0.084862,0.118691,0.112804,0.085288,0.002943,0.000000,0.138226
8186,exp_4094,0.0,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.000000
8187,exp_4094,16.0,0.170411,0.000000,0.155946,0.263682,0.195645,0.080487,0.076490,0.130852,0.071838,0.018274,0.032583,0.000000
8188,exp_4095,0.0,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000


# Low resolution (OD)

In [7]:
# high time resolution data
NT = 32

# init data matrix
N_samples = X.shape[0]
tspan = (0, TF)
teval = np.linspace(0,tspan[-1], NT)
D = np.zeros([X.shape[0]*len(teval), 1 + numspecies])

for i, x in enumerate(X):
    # solve
    IC = x*.1
    t, y = runGLV(IC, r, A, teval)

    # add noise to y 
    y[1:] = y[1:] + noise*np.random.randn(y.shape[0]-1, y.shape[1])
    
    # make sure initially zero species stay zero regardless of noise
    y = np.einsum("j,ij-> ij", np.array(IC > 0, float), y)
    y = np.clip(y, 0, np.inf)

    # store data 
    D[i*len(teval):(i+1)*len(teval), 0]  = np.sum(y, 1)
    D[i*len(teval):(i+1)*len(teval), 1:] = y

# save data to dataframe
time = list(teval)*X.shape[0]

all_treatments = []
for i,x in enumerate(X):
    if sum(x>0) == 1:
        exp_name = f"mono_exp_{i+1}"
    else: 
        exp_name = f"exp_{i+1}"
    for _ in range(len(teval)):
        all_treatments.append(exp_name)

# determine names of unique treatments
unique_treatments = np.unique(all_treatments)        

df = pd.DataFrame()
df['Treatments'] = all_treatments
df['Time'] = time
df["OD"] = D[:, 0]
# add noise to data
for j,s in enumerate(species_names):
    df[s] = D[:, j+1]

# save
df.to_csv(f"lr_gLV_data.csv", index=False)
df

Unnamed: 0,Treatments,Time,OD,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12
0,mono_exp_1,0.000000,0.100000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000
1,mono_exp_1,0.516129,0.101360,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.101360
2,mono_exp_1,1.032258,0.109095,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.109095
3,mono_exp_1,1.548387,0.133521,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.133521
4,mono_exp_1,2.064516,0.147917,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.147917
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131035,exp_4095,13.935484,1.362068,0.129345,0.022291,0.143421,0.264637,0.180603,0.081591,0.134397,0.124026,0.054825,0.027690,0.034963,0.164280
131036,exp_4095,14.451613,1.352856,0.140614,0.020913,0.144710,0.272184,0.182537,0.082265,0.126536,0.105943,0.037675,0.049389,0.024687,0.165404
131037,exp_4095,14.967742,1.279778,0.156327,0.000000,0.144467,0.270236,0.182811,0.061033,0.132456,0.107319,0.039698,0.007128,0.020978,0.157325
131038,exp_4095,15.483871,1.318702,0.141426,0.025939,0.173320,0.268075,0.193635,0.078446,0.116501,0.109662,0.038606,0.008398,0.000701,0.163994


# Mid resolution test data

In [8]:
# use Sobol sequence to generate design
sampler = Sobol(d=numspecies)
X = sampler.random(32)

# high fidelity data
NT = 3

# init data matrix
N_samples = X.shape[0]
tspan = (0, TF)
teval = np.linspace(0,tspan[-1], NT)
D = np.zeros([X.shape[0]*len(teval), numspecies])

for i, x in enumerate(X):
    # solve
    IC = x*.1
    t, y = runGLV(IC, r, A, teval)

    # add noise to y 
    y[1:] = y[1:] + noise*np.random.randn(y.shape[0]-1, y.shape[1])
    
    # make sure initially zero species stay zero regardless of noise
    y = np.einsum("j,ij-> ij", np.array(IC > 0, float), y)
    y = np.clip(y, 0, np.inf)

    # store data 
    D[i*len(teval):(i+1)*len(teval)] = y

# save data to dataframe
time = list(teval)*X.shape[0]

all_treatments = []
for i,x in enumerate(X):
    if sum(x>0) == 1:
        exp_name = f"mono_exp_{i+1}"
    else: 
        exp_name = f"exp_{i+1}"
    for _ in range(len(teval)):
        all_treatments.append(exp_name)

# determine names of unique treatments
unique_treatments = np.unique(all_treatments)        

df = pd.DataFrame()
df['Treatments'] = all_treatments
df['Time'] = time

# add noise to data
for j,s in enumerate(species_names):
    df[s] = D[:, j]

# save
df.to_csv(f"test_gLV_data.csv", index=False)
df

Unnamed: 0,Treatments,Time,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12
0,exp_1,0.0,0.057251,0.079641,0.024338,0.065582,0.058996,0.076186,0.004551,0.087550,0.016461,0.005027,0.069704,0.011079
1,exp_1,8.0,0.181500,0.041303,0.092532,0.293605,0.176399,0.095334,0.000000,0.155617,0.014650,0.012093,0.036481,0.071142
2,exp_1,16.0,0.145099,0.016104,0.138333,0.293985,0.215529,0.075262,0.057403,0.131409,0.022097,0.010484,0.033634,0.124055
3,exp_2,0.0,0.026115,0.039607,0.052088,0.003330,0.001450,0.021422,0.076977,0.008969,0.097686,0.075151,0.018731,0.051879
4,exp_2,8.0,0.163304,0.031575,0.165073,0.147545,0.019098,0.052535,0.116913,0.046885,0.050118,0.039477,0.028558,0.156299
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,exp_31,8.0,0.116721,0.028401,0.107603,0.287957,0.168427,0.083846,0.104311,0.148372,0.055124,0.007911,0.007855,0.135394
92,exp_31,16.0,0.156631,0.014949,0.137977,0.289056,0.179308,0.067739,0.110668,0.128010,0.040469,0.000000,0.013803,0.153207
93,exp_32,0.0,0.060125,0.043200,0.078057,0.034261,0.044555,0.026898,0.029374,0.012404,0.011834,0.085301,0.074028,0.035561
94,exp_32,8.0,0.130950,0.019647,0.173851,0.271436,0.190970,0.071430,0.098712,0.041982,0.025532,0.028358,0.054514,0.149999
