In [1]:
# import plotting, io, stats, linalg libraries
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from scipy.integrate import odeint, solve_ivp
from scipy.stats import linregress
import scipy.io as sio
import pandas as pd

# import DOE library
from doepy import build
import itertools

# set plot parameters
params = {'legend.fontsize': 18,
          'figure.figsize': (16, 12),
          'axes.labelsize': 24,
          'axes.titlesize':24,
          'axes.linewidth':5,
          'xtick.labelsize':20,
          'ytick.labelsize':20}
plt.rcParams.update(params)
plt.style.use('seaborn-colorblind')
plt.rcParams['pdf.fonttype'] = 42

np.random.seed(12345)

In [2]:
# number of species in gLV model
numspecies=7

# define compression functions 
compressor0 = lambda x: np.sum(x, 1)   # sum over outputs 
compressor1 = lambda x: x
compressors = [compressor0, compressor1]

# number of time points to sample
NTs = [49, 2]

# define names of measured values
species_names = ['s'+str(i+1) for i in range(numspecies)]

# name datasets
data_names = ['sum', 'full']

# std. dev. of Gaussian noise to add to data (1/100 percent Gaussian noise)
noise = 0.15

In [3]:
#Randomly create the parameters that define the system
#Normally distributed aij parameters with strictly negative diagonal
#These parameter statistics were calculated from the parameter set in Venturelli et al, Molecular Systems Biology, 2018
#Note that these statistics frequently give me unbounded growth for pairs of species (positive feedback loops). If you make mu_aij more negative, this happens less frequently.
mu_aii=-1.5
sigma_aii=0.25
mu_aij=-0.22
sigma_aij=0.33
params_ii=np.random.normal(mu_aii,sigma_aii,numspecies)
params_ij=np.random.normal(mu_aij,sigma_aij,numspecies**2-numspecies)
A=np.zeros((numspecies,numspecies))
k=0
l=0
for i in range(numspecies):
    for j in range(numspecies):
        if i==j:
            A[i,j]=-abs(params_ii[l])
            l+=1
        else:
            A[i,j]=params_ij[k]
            k+=1
#Normally distributed basal growth rates
mu_r = .36
sigma_r=0.16
r= np.random.normal(mu_r,sigma_r,numspecies)
for k in range(len(r)):
    if r[k]<0:
        r[k]=abs(r[k])
        
# define gLV ODE model
def runGLV(x, r, A, t_eval):
    # define system of equations
    def system(t, x, r, A):
        # derivative of x (gLV equation)
        return x * (r + A@x)
    # solve system
    soln = solve_ivp(system, (0, t_eval[-1]), x, t_eval=t_eval, args=(np.vstack(r), A), 
                     method='RK45', vectorized=True)
    #y = odeint(system, x, t_eval, args=(r, A))
    #    return t_eval, y
    return soln.t, soln.y.T

In [4]:
# create dataframe with measured species at specified time intervals
# create matrix of all possible communities
Xlist = [np.reshape(np.array(i), (1, numspecies)) for i in itertools.product([0, 1], repeat = numspecies)]
# remove all zeros community
X = np.array(np.concatenate(Xlist)[1:, :][::-1], float)

In [5]:
# low fidelity data
NT = NTs[0]
compressor = compressors[0] 

# init data matrix
N_samples = X.shape[0]
tspan = (0, 48)
teval = np.linspace(0,tspan[-1], NT)
D = np.zeros([X.shape[0]*len(teval), 1+numspecies])

for i, x in enumerate(X):
    # solve
    IC = x*.1
    t, y = runGLV(IC, r, A, teval)

    # compress y 
    c_y = compressor(y)
    
    # add noise to compressed y  
    c_y[1:] = c_y[1:]*(1. + noise*np.random.randn(len(c_y)-1))
    
    # store data 
    D[i*len(teval):(i+1)*len(teval), 0]  = c_y
    D[i*len(teval):(i+1)*len(teval), 1:] = y

# save data to dataframe
time = list(teval)*X.shape[0]

all_treatments = []
for i,x in enumerate(X):
    if sum(x>0) == 1:
        exp_name = f"mono_exp_{i+1}"
    else: 
        exp_name = f"exp_{i+1}"
    for _ in range(len(teval)):
        all_treatments.append(exp_name)

# determine names of unique treatments
unique_treatments = np.unique(all_treatments)        

df = pd.DataFrame()
df['Treatments'] = all_treatments
df['Time'] = time

# store data in dataframe
df['OD'] = D[:,0] 
for j,s in enumerate(species_names):
    df[s] = D[:, j+1] 

# save
df.to_csv(f"Simulated_gLV_data_sum.csv", index=False)

In [6]:
df

Unnamed: 0,Treatments,Time,OD,s1,s2,s3,s4,s5,s6,s7
0,exp_1,0.0,0.700000,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000
1,exp_1,1.0,0.603124,0.139175,0.115084,0.112920,0.111055,0.104585,0.116198,0.086675
2,exp_1,2.0,0.863875,0.180531,0.128074,0.122424,0.121131,0.104713,0.130274,0.073863
3,exp_1,3.0,1.025633,0.218637,0.138798,0.128066,0.130070,0.101389,0.141819,0.062099
4,exp_1,4.0,0.868276,0.249608,0.147425,0.130253,0.137868,0.096018,0.150858,0.051734
...,...,...,...,...,...,...,...,...,...,...
6218,mono_exp_127,44.0,0.149115,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.144137
6219,mono_exp_127,45.0,0.157048,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.144124
6220,mono_exp_127,46.0,0.126141,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.144121
6221,mono_exp_127,47.0,0.133857,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.144130


In [7]:
# high fidelity data
NT = NTs[1]
compressor = compressors[1] 

# init data matrix
N_samples = X.shape[0]
tspan = (0, 48)
teval = np.linspace(0,tspan[-1], NT)
D = np.zeros([X.shape[0]*len(teval), numspecies])

for i, x in enumerate(X):
    # solve
    IC = x*.1
    t, y = runGLV(IC, r, A, teval)

    # add noise to y 
    y[1:] = y[1:]*(1. + noise*np.random.randn(y.shape[0]-1, y.shape[1]))

    # store data 
    D[i*len(teval):(i+1)*len(teval)] = y

# save data to dataframe
time = list(teval)*X.shape[0]

all_treatments = []
for i,x in enumerate(X):
    if sum(x>0) == 1:
        exp_name = f"mono_exp_{i+1}"
    else: 
        exp_name = f"exp_{i+1}"
    for _ in range(len(teval)):
        all_treatments.append(exp_name)

# determine names of unique treatments
unique_treatments = np.unique(all_treatments)        

df = pd.DataFrame()
df['Treatments'] = all_treatments
df['Time'] = time

# add noise to data
for j,s in enumerate(species_names):
    df[s] = D[:, j]

# save
df.to_csv(f"Simulated_gLV_data_full.csv", index=False)

In [8]:
df

Unnamed: 0,Treatments,Time,s1,s2,s3,s4,s5,s6,s7
0,exp_1,0.0,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000
1,exp_1,48.0,0.298559,0.159095,0.081960,0.215479,0.016468,0.168590,0.000057
2,exp_2,0.0,0.100000,0.100000,0.100000,0.100000,0.100000,0.100000,0.000000
3,exp_2,48.0,0.286262,0.165522,0.065139,0.155305,0.025074,0.212555,0.000000
4,exp_3,0.0,0.100000,0.100000,0.100000,0.100000,0.100000,0.000000,0.100000
...,...,...,...,...,...,...,...,...,...
249,exp_125,48.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.337060,0.118508
250,mono_exp_126,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000,0.000000
251,mono_exp_126,48.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.548540,0.000000
252,mono_exp_127,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.100000
