# Libraries

In [81]:
import cobra as cb
from cobra.sampling import sample, OptGPSampler, ACHRSampler
import logging
logging.basicConfig(filename="log.txt"  , level=logging.INFO)
import os
import random
import pandas as pd
import numpy as np

# Set up

In [82]:
modelNames = ["ENGRO 1", "ENGRO 2"]

In [83]:
##############################################
# Create a folder if it doesn't already exist
# Parameters
# - path --> new folder path
##############################################
def createFolder(path):
    if not os.path.exists(path):
            os.mkdir(path)

# Loading models

In [84]:
##############################################
# Load the models files (.xml)
# Parameters
# - modelNames --> list of models names that must
# match the file names
# - modelFolder --> the folder containing the 
# models files
##############################################
def loadModels(modelNames, modelFolder):
    models = {}
    for modelName in modelNames:
        files = os.listdir(modelFolder)
        found = False
        for file in files:
            filename, extension = os.path.splitext(file)
            if(filename == modelName):
                found = True
                break
        if(found):
            if(extension == ".xml"):
                models[modelName] = cb.io.read_sbml_model(modelFolder + filename + extension)
            else:
                raise ImportError('Model file extension not supported')
        else:
            raise FileNotFoundError('File not found')
    return models

In [85]:
modelsDict = loadModels(modelNames, "../../models/")
modelReactionsDict = {}
for modelName in modelNames:
    listReactions = []
    for reaction in modelsDict[modelName].reactions:
        listReactions.append(reaction.id)
    modelReactionsDict[modelName] = listReactions

# Sampling ACHR and OPTGP

In [89]:
##############################################
# Samples the models by using sampling algorithms
# (ACHR and OPTGP) over multiple repetetitions at fixed sample size
# Parameters
# - modelNames --> loaded models to sample
# - modelsDict --> dictionary containing the models
# - algorithms --> list of algorithms to use (ACHR and/or OPTGP)
# - samples --> list of fixed samples
# - executions --> number of repetition for each fixed sample size
# - thinnings --> list of thinnings to use
# - processes --> number of processes to use (see OPTGP documentation)
# - samplesFolder --> target folder for all the samples
##############################################
def sampler(modelNames, modelsDict, algorithms, samples, executions, 
            thinnings, processes, samplesFolder):
    
    for modelName in modelNames:
        model = modelsDict[modelName]
        extension = ""
        createFolder(samplesFolder + modelName)
        for algorithm in algorithms:
            for thinning in thinnings:
                createFolder(samplesFolder + modelName+ "/" + algorithm + "Thinning" + str(thinning))
                path = samplesFolder + modelName + "/" + algorithm + "Thinning" + str(thinning)+ "/"
                if(algorithm == "ACHR"):
                    sampler = ACHRSampler(model, thinning = thinning, seed = 1)
                    extension = "_achr.csv"
                elif(algorithm == "OPTGP"):
                    sampler = OptGPSampler(model,  thinning = thinning,
                                           processes = processes)
                    extension = "_optgp.csv"
                else:
                    raise NotImplementedError('Algorithm not supported')
                for nsample in samples:
                    logging.info("Creating " + str(nsample) + " samples - " + modelName + " - " + 
                          algorithm + " - Thinning " + str(thinning))
                    for h in range (0, executions, 1):
                        sampler.sample(nsample).to_csv(path + str(nsample) + "_" + str(h) + extension)
    pass

In [None]:
samplesNList = []
for i in range(1000, 30001, 1000):
    samplesNList.append(i)
    
executionsPerSampleSize = 20

algorithms = ["ACHR", "OPTGP"]

thinnings = [1, 10, 100]

nprocesses = 4

samplesFolder = "../../samples/"
    
sampler(modelNames, modelsDict, algorithms, samplesNList, executionsPerSampleSize, thinnings, nprocesses, samplesFolder)

In [None]:
samplesNList = []
for i in range(1, 10, 2):
    samplesNList.append(i)
    
executionsPerSampleSize = 3

algorithms = ["ACHR", "OPTGP"]

thinnings = [1, 2, 3]

nprocesses = 4

samplesFolder = "../../samples/"
    
sampler(modelNames, modelsDict, algorithms, samplesNList, executionsPerSampleSize, thinnings, nprocesses, samplesFolder)

# Sampling CBS3

In [52]:
##############################################
# Samples the models by using the CBS3 algorithm
# based on FBA with random functions
# Parameters
# - modelNames --> loaded models to sample
# - modelsDict --> dictionary containing the models
# - modelReactionsDict --> dictionary containing the models reactions
# - executions --> number of repetition for each group
# - groupedBy --> number of samples in each group
# - samplesFolder --> target folder for all the samples
##############################################
def randomObjectiveFunction(modelNames, modelsDict, modelReactionsDict, executions, groupedBy , samplesFolder):
    
    for modelName in modelNames:
        
        createFolder(samplesFolder + modelName)
            
        path = samplesFolder + modelName + "/CBS3groupedBy" + str(groupedBy) + "/"
        
        createFolder(path)
        
        model = modelsDict[modelName]
        
        df_fva = cb.flux_analysis.flux_variability_analysis(model, processes=1,fraction_of_optimum=0).round(10).abs()
        
        reactions = modelReactionsDict[modelName]
        nReactions = len(reactions)

        for i in range(0, executions):
            df = pd.DataFrame(columns=reactions)
            logging.info("Creating " + str(i) + "-th file - " + modelName)
            for j in range(0, groupedBy):

                threshold=random.random() #coefficiente tra 0 e 1

                coefficients = dict()

                for reaction in reactions:
                    
                    val=random.random()
                    if val>threshold:
                        val_max=np.max([df_fva.loc[reaction,"minimum"],df_fva.loc[reaction,"maximum"]])
                        c=2*random.random()-1 #coefficiente tra -1 e 1
                        if val_max!=0: #solo se la fva è diversa da zero
                            coefficients[model.reactions.get_by_id(reaction)] = c/val_max #divido per la fva
                        else:
                            coefficients[model.reactions.get_by_id(reaction)] = 0
                    else:
                        coefficients[model.reactions.get_by_id(reaction)] = 0
                        
                model.objective=coefficients

                if random.random()<0.5:
                    df.loc[j] = model.optimize(objective_sense="maximize").fluxes
                else:
                    df.loc[j] = model.optimize(objective_sense="minimize").fluxes
            df.to_csv(path +  str(i) +  "_0_cbs3.csv")
    pass

In [None]:
executions = 20

groupedBy = 1000

samplesFolder = "../../samples/"
    
randomObjectiveFunction(modelNames, modelsDict, modelReactionsDict, executions, groupedBy , samplesFolder)