# Libraries

In [1]:
import cobra as cb
import logging
logging.basicConfig(filename="log.txt"  , level=logging.INFO)
import os
import pandas as pd
import numpy as np
import statistics

# Set up

In [2]:
modelNames = ["ENGRO 1", "ENGRO 2"]

In [3]:
##############################################
# Create a folder if it doesn't already exist
# Parameters
# - path --> new folder path
##############################################
def createFolder(path):
    if not os.path.exists(path):
            os.mkdir(path)

# Loading models

In [4]:
##############################################
# Load the models files (.xml)
# Parameters
# - modelNames --> list of models names that must
# match the file names
# - modelFolder --> the folder containing the 
# models files
##############################################
def loadModels(modelNames, modelFolder):
    models = {}
    for modelName in modelNames:
        files = os.listdir(modelFolder)
        found = False
        for file in files:
            filename, extension = os.path.splitext(file)
            if(filename == modelName):
                found = True
                break
        if(found):
            if(extension == ".xml"):
                models[modelName] = cb.io.read_sbml_model(modelFolder + filename + extension)
            else:
                raise ImportError('Model file extension not supported')
        else:
            raise FileNotFoundError('File not found')
    return models

In [5]:
modelsDict = loadModels(modelNames, "../../models/")
modelReactionsDict = {}
for modelName in modelNames:
    listReactions = []
    for reaction in modelsDict[modelName].reactions:
        listReactions.append(reaction.id)
    modelReactionsDict[modelName] = listReactions

# Mean differences

In [6]:
namesChrr = []
for j in range(0, 20):
    namesChrr.append(str(1000) + "_" + str(j) + "_chrr.csv")
namesChrr

['1000_0_chrr.csv',
 '1000_1_chrr.csv',
 '1000_2_chrr.csv',
 '1000_3_chrr.csv',
 '1000_4_chrr.csv',
 '1000_5_chrr.csv',
 '1000_6_chrr.csv',
 '1000_7_chrr.csv',
 '1000_8_chrr.csv',
 '1000_9_chrr.csv',
 '1000_10_chrr.csv',
 '1000_11_chrr.csv',
 '1000_12_chrr.csv',
 '1000_13_chrr.csv',
 '1000_14_chrr.csv',
 '1000_15_chrr.csv',
 '1000_16_chrr.csv',
 '1000_17_chrr.csv',
 '1000_18_chrr.csv',
 '1000_19_chrr.csv']

In [7]:
namesCbs3 = []
for j in range(0, 20):
    namesCbs3.append(str(j) + "_0.csv")
namesCbs3

['0_0.csv',
 '1_0.csv',
 '2_0.csv',
 '3_0.csv',
 '4_0.csv',
 '5_0.csv',
 '6_0.csv',
 '7_0.csv',
 '8_0.csv',
 '9_0.csv',
 '10_0.csv',
 '11_0.csv',
 '12_0.csv',
 '13_0.csv',
 '14_0.csv',
 '15_0.csv',
 '16_0.csv',
 '17_0.csv',
 '18_0.csv',
 '19_0.csv']

In [10]:
def meanDiff(modelNames, modelsDict, modelReactionsDict, elementPath, resultPath):
    
    createFolder(resultPath)
    for modelName in modelNames:
        createFolder(resultPath + modelName)
        dfColumns = ['testName']
        dfColumns.extend(modelReactionsDict[modelName])
        resultDf = pd.DataFrame(columns = dfColumns)
        lenDf = 0
        nReactions = len(modelReactionsDict[modelName])

        for nexec in range (0, 20):
            test_name = nexec
            res = [test_name]

            df_chrr = pd.read_csv(os.path.join(elementPath , modelName ,  "CHRR"+
                                            "Thinning" + str(100) , namesChrr[nexec]), index_col = 0)
            
            df_cbs3 = pd.read_csv(os.path.join(elementPath , modelName ,  "CBS3"+
                                            "groupedBy" + str(1000) , namesCbs3[nexec]), index_col = 0)
            

            for h in range(nReactions):
                if(statistics.mean(df_cbs3.iloc[:, h]) ==0 and statistics.mean(df_chrr.iloc[:, h]) == 0 ):
                    delta = 0
                else:
                    delta = abs(statistics.mean(df_cbs3.iloc[:, h]) - statistics.mean(df_chrr.iloc[:, h])) / max(abs(statistics.mean(df_cbs3.iloc[:, h])), abs(statistics.mean(df_chrr.iloc[:, h])))
                res.append(delta)
            resultDf.loc[lenDf] = res
            lenDf = lenDf + 1
        resultDf.set_index('testName').to_csv(os.path.join(resultPath, modelName, "meanDiff.csv"))



In [11]:
samplesFolder = "../../samples/"
resultPath = "../../results/CHRR_CBS3/"
meanDiff(modelNames, modelsDict, modelReactionsDict, samplesFolder, resultPath)

# Variance differences

In [12]:
namesChrr = []
for j in range(0, 20):
    namesChrr.append(str(1000) + "_" + str(j) + "_chrr.csv")
namesChrr

['1000_0_chrr.csv',
 '1000_1_chrr.csv',
 '1000_2_chrr.csv',
 '1000_3_chrr.csv',
 '1000_4_chrr.csv',
 '1000_5_chrr.csv',
 '1000_6_chrr.csv',
 '1000_7_chrr.csv',
 '1000_8_chrr.csv',
 '1000_9_chrr.csv',
 '1000_10_chrr.csv',
 '1000_11_chrr.csv',
 '1000_12_chrr.csv',
 '1000_13_chrr.csv',
 '1000_14_chrr.csv',
 '1000_15_chrr.csv',
 '1000_16_chrr.csv',
 '1000_17_chrr.csv',
 '1000_18_chrr.csv',
 '1000_19_chrr.csv']

In [13]:
namesCbs3 = []
for j in range(0, 20):
    namesCbs3.append(str(j) + "_0.csv")
namesCbs3

['0_0.csv',
 '1_0.csv',
 '2_0.csv',
 '3_0.csv',
 '4_0.csv',
 '5_0.csv',
 '6_0.csv',
 '7_0.csv',
 '8_0.csv',
 '9_0.csv',
 '10_0.csv',
 '11_0.csv',
 '12_0.csv',
 '13_0.csv',
 '14_0.csv',
 '15_0.csv',
 '16_0.csv',
 '17_0.csv',
 '18_0.csv',
 '19_0.csv']

In [16]:
def varianceDiff(modelNames, modelsDict, modelReactionsDict, elementPath, resultPath):
    
    createFolder(resultPath)
    for modelName in modelNames:
        createFolder(resultPath + modelName)
        dfColumns = ['testName']
        dfColumns.extend(modelReactionsDict[modelName])
        resultDf = pd.DataFrame(columns = dfColumns)
        lenDf = 0
        nReactions = len(modelReactionsDict[modelName])

        for nexec in range (0, 20):
            test_name = nexec
            res = [test_name]

            df_chrr = pd.read_csv(os.path.join(elementPath , modelName ,  "CHRR"+
                                            "Thinning" + str(100) , namesChrr[nexec]), index_col = 0)
            
            df_cbs3 = pd.read_csv(os.path.join(elementPath , modelName ,  "CBS3"+
                                            "groupedBy" + str(1000) , namesCbs3[nexec]), index_col = 0)


            for h in range(nReactions):
                if(statistics.variance(df_cbs3.iloc[:, h]) ==0 and statistics.variance(df_chrr.iloc[:, h]) == 0 ):
                    delta = 0
                else:
                    delta = abs(statistics.variance(df_cbs3.iloc[:, h]) - statistics.variance(df_chrr.iloc[:, h])) / max(abs(statistics.variance(df_cbs3.iloc[:, h])), abs(statistics.variance(df_chrr.iloc[:, h])))
                res.append(delta)
            resultDf.loc[lenDf] = res
            lenDf = lenDf + 1
        resultDf.set_index('testName').to_csv(os.path.join(resultPath, modelName, "varianceDiff.csv"))



In [17]:
samplesFolder = "../../samples/"
resultPath = "../../results/CHRR_CBS3/"
varianceDiff(modelNames, modelsDict, modelReactionsDict, samplesFolder, resultPath)