In [None]:
import pandas as pd
import numpy as np
from equilibrator_api import ComponentContribution, Q_
CC = ComponentContribution()
from equilibrator_assets.generate_compound import create_compound, get_or_create_compound
from ast import literal_eval
from rdkit import Chem

In [None]:
def inchikey(Smiles): #returns first layer of InChIKey (excluding stereochemical information), passed SMILES for a molecule
    mol = Chem.MolFromSmiles(Smiles)
    inchi = Chem.inchi.MolToInchi(mol) #generates InChIKey via InChI because older cversions of Rdkit.Chem do not contain the MolToInchiKey function
    dummy = (Chem.inchi.InchiToInchiKey(inchi))[:14] #retrieves first layer only
    return(dummy)

In [None]:
def DeltaG(reagents, products, mus, inchikeys): #generates standard change in Gibbs reaction energy for a given reaction, passed the reactants and products of the reaction, as well as the thermo data generated for the entire compound cache and the inchikey list corresponding to this cache
    dummy_mus = []
    dummy_compounds = []
    dummy_coefficients = []
    for j in range(len(reagents)):
        dummy_compounds.append(reagents[j])
        dummy_coefficients.append(-1)
    for k in range(len(products)):
        dummy_compounds.append(products[k])
        dummy_coefficients.append(1) #creates a list of all compounds for the reaction (may include repeated species) and coefficients indicating whether they are used up or formed
    all_compounds = True
    for m in range(len(dummy_compounds)):
        try:
            dummy_inchikey = inchikey(dummy_compounds[m]) #converts compound smiles to inchikey
            dummy_mus.append(mus[inchikeys.index(dummy_inchikey)]) #uses compound inchikey to identify corresponding thermo data
        except:
            all_compounds = False #if compound inchikey cannot be found in inchikey list, compound thermo data has not been generated successfully in the compound cache and the Gibbs energy change for the reaction is set to 'NaN'
            break
    if all_compounds == False:
        return('NaN')
    else:
        S = np.zeros(len(dummy_compounds))
        for n in range(len(dummy_coefficients)):
            S[n] = dummy_coefficients[n]
        dummy_mus = Q_(dummy_mus, "kJ/mol")
        standard_dgs = S.T @ dummy_mus #Gibbs reaction energy change given by matrix product of coefficient matrix put in column form and thermo data for the compounds of the reaction
        return(standard_dgs._magnitude.round(2))

In [None]:
def ThermoGen(filepath, name): #generates standard change in Gibbs reaction enerhy data for each reaction of a processed rels file, passed the filepath for the input rels file and the name of the network
    df = pd.read_csv(filepath, sep='\t')
    compounds = []
    for i in range(len(df['Index'])):
        reagents = literal_eval(df['Reagents'][i])
        products = literal_eval(df['Products'][i])
        for j in range(len(reagents)):
            if reagents[j] not in compounds:
                compounds.append(reagents[j])
        for k in range(len(products)):
            if products[k] not in compounds:
                compounds.append(products[k]) #list of unique compounds for the rels file created

    print('Checkpoint 1')

    compound_list = get_or_create_compound(CC.ccache, compounds, mol_format="smiles", bypass_chemaxon=True) #creates the compound cache using equilibrator-assets
    inchikeys = []
    for i in range(len(compound_list)):
        inchikeys.append(compound_list[i].inchi_key[:14]) #creates a list of inchikeys for unique compounds successfully added to the compound cache

    print('Checkpoint 2')
    mus = []
    for c in compound_list:
        mu = (CC.predictor.preprocess.get_compound_prediction(c))[0]
        mus.append(mu) #generates thermo data for each compound of the compound cache

    print('Checkpoint 3')

    EnergyChanges = []
    for i in range(len(df['Index'])):
        print(i)
        try:
            reagents = literal_eval(df['Reagents'][i])
            products = literal_eval(df['Products'][i])
            EnergyChanges.append(DeltaG(reagents, products, mus, inchikeys)) #evaluates Gibbs free energy change for a given reaction
        except:
            EnergyChanges.append('NaN')
    df['Energy Change'] = EnergyChanges
    df.to_csv(f'./G3RelsWithThermo/{name}RelsWithThermo.tsv', index=None, sep='\t', mode='a')
    return(df)

In [1]:
# %%time
# d = ThermoGen('./G3ProcessedRels/MaillardG3ProcessedRels.tsv', 'MaillardG3')