In [None]:
import pandas as pd
import numpy as np
from equilibrator_api import ComponentContribution, Q_
CC = ComponentContribution()
from equilibrator_assets.generate_compound import create_compound, get_or_create_compound
from ast import literal_eval

In [None]:
def DeltaG(reagents, products, mus, compounds): #calculates the change in reaction gibbs energy for a given reaction, passed the reagents and products for the reaction, as well as the compound cache information constructed for all compounds within the given rels file
    dummy_mus = []
    dummy_compounds = []
    dummy_coefficients = []
    for j in range(len(reagents)):
        dummy_compounds.append(reagents[j])
        dummy_coefficients.append(-1)
    for k in range(len(products)):
        dummy_compounds.append(products[k])
        dummy_coefficients.append(1) #creates a list of compounds for the reaction, as well as compound coefficients identifying whether they're used up or made
    for m in range(len(dummy_compounds)):
        dummy_mus.append(mus[compounds.index(dummy_compounds[m])])

    S = np.zeros(len(dummy_compounds))
    for n in range(len(dummy_coefficients)):
        S[n] = dummy_coefficients[n] #creates a row matrix containing the compound coefficients
    dummy_mus = Q_(dummy_mus, "kJ/mol")
    standard_dgs = S.T @ dummy_mus #identifies the total change in reaction gibbs energy by taking the matrix product of the transpose of the coefficients row matrix and a row matrix containing the Gibbs energy of each compound of the reaction
    return(standard_dgs._magnitude.round(2))

In [None]:
def ThermoGen(filepath, name): #identifies the change in reaction gibbs energy for each reaction of a processed rels file, passed the filepath for the rels file and the network in question
    df = pd.read_csv(filepath, sep='\t')
    indexes = []
    compounds = []
    Reagents = []
    Products = []
    rules = []
    for i in range(len(df['Index'])):
        indexes.append(df['Index'][i])
        rules.append(df['Rule'][i])
        reagents = literal_eval(df['Reagents'][i])
        Reagents.append(reagents)
        products = literal_eval(df['Products'][i])
        Products.append(products)
        for j in range(len(reagents)):
            if reagents[j] not in compounds:
                compounds.append(reagents[j])
        for k in range(len(products)):
            if products[k] not in compounds:
                compounds.append(products[k]) #creates a list of all unique compounds in the rels file

    print('Checkpoint 1')

    compound_list = get_or_create_compound(CC.ccache, compounds, mol_format="smiles", error_log=False) #identifies each unique compound using either cached information or generated info using equilibrator assets

    print('Checkpoint 2')

    mus = []
    for c in compound_list:
        mu = (CC.predictor.preprocess.get_compound_prediction(c))[0]
        mus.append(mu) #generates thermo data for each unique compound using group contribution methods

    print('Checkpoint 3')

    EnergyChanges = []
    all_reactions = True
    for i in range(len(df['Index'])):
        print(i)
        try:
            reagents = literal_eval(df['Reagents'][i])
            products = literal_eval(df['Products'][i])
            EnergyChanges.append(DeltaG(reagents, products, mus, compounds)) #identifies change in gibbs free energy for a given reaction
        except:
            EnergyChanges.append('NaN') #fails for some molecules which cannot be decomposed, will run correctly if the compound cache and deltaG evaluation processes are re-run for each failed compound individually
            all_reactions = False

    if all_reactions == False:
        for i in range(len(EnergyChanges)):
            if EnergyChanges[i] == 'NaN': #re-runs the compound cache construction and deltaG evaluation for previously failed reactions
                error_reagents = literal_eval(df['Reagents'][i])
                error_products = literal_eval(df['Products'][i])
                error_compounds = []
                for j in range(len(error_reagents)):
                    error_compounds.append(error_reagents[j])
                for k in range(len(error_products)):
                    error_compounds.append(error_products[k])
                error_compound_list = get_or_create_compound(CC.ccache, error_compounds, mol_format="smiles", error_log=False)
                error_mus = []
                for c in error_compound_list:
                    mu = (CC.predictor.preprocess.get_compound_prediction(c))[0]
                    error_mus.append(mu)
                EnergyChanges[i] = DeltaG(error_reagents, error_products, error_mus, error_compounds)


    outputdata = {'Index':indexes, 'Reagents':Reagents, 'Products':Products, 'Rule':rules, 'Energy Change':EnergyChanges}
    outputdf = pd.DataFrame(outputdata)
    outputdf.to_csv(f'./G3RelsWithThermo/{name}RelsWithThermo.tsv', index=None, sep='\t', mode='a')
    return(outputdf)

In [1]:
# %%time
# a = ThermoGen('./G3ProcessedRels/GlucoseAmmG3ProcessedRels.tsv', 'GlucoseAmmG3')