In [1]:
# import plotting, io, stats, linalg libraries
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from scipy.integrate import odeint, solve_ivp
from scipy.stats import linregress
import scipy.io as sio
import pandas as pd

# import DOE library
from doepy import build
import itertools
from scipy.stats.qmc import Sobol

# set plot parameters
params = {'legend.fontsize': 18,
          'figure.figsize': (16, 12),
          'axes.labelsize': 24,
          'axes.titlesize':24,
          'axes.linewidth':5,
          'xtick.labelsize':20,
          'ytick.labelsize':20}
plt.rcParams.update(params)
plt.style.use('seaborn-colorblind')
plt.rcParams['pdf.fonttype'] = 42

np.random.seed(123)



# Define simulation parameters

In [2]:
# number of species in gLV model
numspecies = 6

# define names of measured values
species_names = ['s'+str(i+1) for i in range(numspecies)]

# std. dev. of Gaussian noise to add to data 
noise = 0.01

# total time to simulate 
TF = 16

# Generate random ground truth model parameters

In [3]:
#Randomly create the parameters that define the system
#Normally distributed aij parameters with strictly negative diagonal
#These parameter statistics were calculated from the parameter set in Venturelli et al, Molecular Systems Biology, 2018
#Note that these statistics frequently give me unbounded growth for pairs of species (positive feedback loops). If you make mu_aij more negative, this happens less frequently.
mu_aii=-1.5
sigma_aii=0.25
mu_aij=-0.22
sigma_aij=0.33
params_ii=np.random.normal(mu_aii,sigma_aii,numspecies)
params_ij=np.random.normal(mu_aij,sigma_aij,numspecies**2-numspecies)
A=np.zeros((numspecies,numspecies))
k=0
l=0
for i in range(numspecies):
    for j in range(numspecies):
        if i==j:
            A[i,j]=-abs(params_ii[l])
            l+=1
        else:
            A[i,j]=params_ij[k]
            k+=1
#Normally distributed basal growth rates
mu_r = .36
sigma_r=0.16
r= np.random.normal(mu_r,sigma_r,numspecies)
for k in range(len(r)):
    if r[k]<0:
        r[k]=abs(r[k])
        
# define gLV ODE model
def runGLV(x, r, A, t_eval):
    # define system of equations
    def system(t, x, r, A):
        # derivative of x (gLV equation)
        return x * (r + A@x)
    # solve system
    soln = solve_ivp(system, (0, t_eval[-1]), x, t_eval=t_eval, args=(np.vstack(r), A), 
                     method='RK45', vectorized=True)
    #y = odeint(system, x, t_eval, args=(r, A))
    #    return t_eval, y
    return soln.t, soln.y.T

# Design data 

In [4]:
# create dataframe with measured species at specified time intervals
# create matrix of all possible communities
# Xlist = [np.reshape(np.array(i), (1, numspecies)) for i in itertools.product([0, 1], repeat = numspecies)]
# # remove all zeros community
# X = np.squeeze(np.array(Xlist)[1:], 1)
X = np.random.uniform(0, 1, [64, numspecies])

# Number of time points
NT = 3

# init data matrix
N_samples = X.shape[0]
tspan = (0, TF)
teval = np.linspace(0,tspan[-1], NT)
D = np.zeros([X.shape[0]*len(teval), 1+numspecies])

for i, x in enumerate(X):
    # solve
    IC = x*.1
    t, y = runGLV(IC, r, A, teval)

    # add noise to y 
    y[1:] = y[1:] + noise*np.random.randn(y.shape[0]-1, y.shape[1])
    
    # make sure initially zero species stay zero regardless of noise
    y = np.einsum("j,ij-> ij", np.array(IC > 0, float), y)
    y = np.clip(y, 0, np.inf)
    
    # store data 
    D[i*len(teval):(i+1)*len(teval), 1:] = y

# save data to dataframe
time = list(teval)*X.shape[0]

all_treatments = []
for i,x in enumerate(X):
    if sum(x>0) == 1:
        exp_name = f"mono_exp_{i+1}"
    else: 
        exp_name = f"exp_{i+1}"
    for _ in range(len(teval)):
        all_treatments.append(exp_name)

# determine names of unique treatments
unique_treatments = np.unique(all_treatments)        

df = pd.DataFrame()
df['Treatments'] = all_treatments
df['Time'] = time

# store data in dataframe
for j,s in enumerate(species_names):
    df[s] = D[:, j+1] 

# remove monocolture
df = pd.concat([df_i for n_i, df_i in df.groupby("Treatments") if "mono" not in n_i])
    
# save
df.to_csv(f"gLV_data_for_CR.csv", index=False)
df

Unnamed: 0,Treatments,Time,s1,s2,s3,s4,s5,s6
0,exp_1,0.0,0.086631,0.025046,0.048303,0.098556,0.051949,0.061289
1,exp_1,8.0,0.090712,0.215443,0.190611,0.174476,0.032546,0.002532
2,exp_1,16.0,0.023573,0.353033,0.282975,0.173349,0.000000,0.000000
27,exp_10,0.0,0.024086,0.034346,0.051313,0.066662,0.010591,0.013089
28,exp_10,8.0,0.059290,0.277299,0.194568,0.172302,0.012084,0.018922
...,...,...,...,...,...,...,...,...
22,exp_8,8.0,0.105865,0.058858,0.132456,0.164099,0.075496,0.017758
23,exp_8,16.0,0.083224,0.293845,0.269835,0.161158,0.025799,0.004992
24,exp_9,0.0,0.035591,0.076255,0.059318,0.069170,0.015113,0.039888
25,exp_9,8.0,0.054867,0.315027,0.211837,0.146970,0.027926,0.007899
