In [1]:
# import plotting, io, stats, linalg libraries
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from scipy.integrate import odeint, solve_ivp
from scipy.stats import linregress
import scipy.io as sio
import pandas as pd

# import DOE library
from doepy import build
import itertools
from scipy.stats.qmc import Sobol

# set plot parameters
params = {'legend.fontsize': 18,
          'figure.figsize': (16, 12),
          'axes.labelsize': 24,
          'axes.titlesize':24,
          'axes.linewidth':5,
          'xtick.labelsize':20,
          'ytick.labelsize':20}
plt.rcParams.update(params)
plt.style.use('seaborn-colorblind')
plt.rcParams['pdf.fonttype'] = 42

np.random.seed(123)



# Define simulation parameters

In [2]:
# number of species in gLV model
numspecies=5

# define compression functions 
compressor0 = lambda x: np.sum(x, 1)   # sum over outputs 
compressor1 = lambda x: x
compressors = [compressor0, compressor1]

# number of time points to sample
NTs = [16, 4]

# time span to take samples
TF = 8

# number of low fidelity samples
NS_LF = 64

# define names of measured values
species_names = ['s'+str(i+1) for i in range(numspecies)]

# name datasets
data_names = ['sum', 'full']

# std. dev. of Gaussian noise to add to data (noise = 1/100 percent Gaussian noise)
noise = 0.02

In [3]:
#Randomly create the parameters that define the system
#Normally distributed aij parameters with strictly negative diagonal
#These parameter statistics were calculated from the parameter set in Venturelli et al, Molecular Systems Biology, 2018
#Note that these statistics frequently give me unbounded growth for pairs of species (positive feedback loops). If you make mu_aij more negative, this happens less frequently.
mu_aii=-1.
sigma_aii=0.25
mu_aij=-0.22
sigma_aij=0.33
params_ii=np.random.normal(mu_aii,sigma_aii,numspecies)
params_ij=np.random.normal(mu_aij,sigma_aij,numspecies**2-numspecies)
A=np.zeros((numspecies,numspecies))
k=0
l=0
for i in range(numspecies):
    for j in range(numspecies):
        if i==j:
            A[i,j]=-abs(params_ii[l])
            l+=1
        else:
            A[i,j]=params_ij[k]
            k+=1
#Normally distributed basal growth rates
mu_r = .36
sigma_r=0.16
r= np.random.normal(mu_r,sigma_r,numspecies)
for k in range(len(r)):
    if r[k]<0:
        r[k]=abs(r[k])
        
# define gLV ODE model
def runGLV(x, r, A, t_eval):
    # define system of equations
    def system(t, x, r, A):
        # derivative of x (gLV equation)
        return A@x
    # solve system
    soln = solve_ivp(system, (0, t_eval[-1]), x, t_eval=t_eval, args=(np.vstack(r), A), 
                     method='RK45', vectorized=True)
    #y = odeint(system, x, t_eval, args=(r, A))
    #    return t_eval, y
    return soln.t, soln.y.T

# Low fidelity design matrix

In [4]:
# # use DOEpy to define latin hyper-cube initial design
# design_dict = {}
# for i in range(numspecies):
#     design_dict[f's{i+1}'] = [0,2]

# exp_design = build.space_filling_lhs(
#                 design_dict,
#                 num_samples = NS_LF)
# X = exp_design.values

# use Sobol sequence to generate design
sampler = Sobol(d=numspecies)
X = 2.*sampler.random(NS_LF) - 1.

# add monoculture
# Xmono = np.eye(numspecies)
# X = np.concatenate((X, Xmono))

In [5]:
# low fidelity data
NT = NTs[0]
compressor = compressors[0] 

# init data matrix
N_samples = X.shape[0]
tspan = (0, TF)
teval = np.linspace(0,tspan[-1], NT)
D = np.zeros([X.shape[0]*len(teval), 1+numspecies])

for i, x in enumerate(X):
    # solve
    IC = x
    t, y = runGLV(IC, r, A, teval)

    # compress y 
    c_y = compressor(y)
    
    # add noise to compressed y after initial condition
    c_y[1:] = c_y[1:]*(1. + noise*np.random.randn(len(c_y)-1))
    
    # store data 
    D[i*len(teval):(i+1)*len(teval), 0]  = c_y
    D[i*len(teval):(i+1)*len(teval), 1:] = y

# save data to dataframe
time = list(teval)*X.shape[0]

all_treatments = []
for i,x in enumerate(X):
    if sum(x>0) == 1:
        exp_name = f"mono_exp_{i+1}"
    else: 
        exp_name = f"exp_{i+1}"
    for _ in range(len(teval)):
        all_treatments.append(exp_name)

# determine names of unique treatments
unique_treatments = np.unique(all_treatments)        

df = pd.DataFrame()
df['Treatments'] = all_treatments
df['Time'] = time

# store data in dataframe
df['OD'] = D[:,0] 
for j,s in enumerate(species_names):
    df[s] = D[:, j+1] 

# save
df.to_csv(f"Simulated_Ax_data_sum.csv", index=False)

In [6]:
df

Unnamed: 0,Treatments,Time,OD,s1,s2,s3,s4,s5
0,mono_exp_1,0.000000,-1.417382,-0.409561,-0.345314,-0.368720,-0.299253,0.005467
1,mono_exp_1,0.533333,-0.448903,-0.115393,-0.108492,-0.107672,-0.185799,0.066145
2,mono_exp_1,1.066667,-0.082425,-0.023223,-0.027049,-0.010397,-0.099557,0.072914
3,mono_exp_1,1.600000,0.025528,-0.002618,-0.002338,0.021845,-0.049648,0.059225
4,mono_exp_1,2.133333,0.046335,-0.003279,0.003474,0.029404,-0.024697,0.042091
...,...,...,...,...,...,...,...,...
1019,mono_exp_64,5.866667,-0.011639,0.078015,-0.023346,-0.093770,0.044408,-0.017374
1020,mono_exp_64,6.400000,-0.009905,0.063082,-0.021012,-0.075619,0.036310,-0.012906
1021,mono_exp_64,6.933333,-0.008306,0.050674,-0.018371,-0.060543,0.029457,-0.009499
1022,mono_exp_64,7.466667,-0.006414,0.040445,-0.015707,-0.048149,0.023698,-0.006938


In [7]:
A.flatten()

array([-1.27140765,  0.32497406, -1.02080415, -0.36154117,  0.19775897,
       -0.50602433, -0.75066364, -0.44403243, -0.25125396,  0.27215858,
       -0.43083766, -0.36651405, -0.92925538, -0.36333592,  0.50795693,
        0.50163941,  0.11133779, -0.09255849, -1.37657368,  0.02333163,
        0.27194157, -0.52882518,  0.16802358, -0.63378062, -1.14465006])

In [8]:
np.linalg.eigvals(A)

array([-0.5174403 +0.11563652j, -0.5174403 -0.11563652j,
       -1.80067894+0.j        , -1.31849543+0.49687268j,
       -1.31849543-0.49687268j])