In [None]:
import copy
%run geneSearchFunctions.ipynb
import pandas as pd

In [None]:
%%capture
def getGeneProductAddresses(products):
    result = []
    
    if type(products) is dict:
        link = products["@resource"]
        result.append(link)
        return result
    else:
        for product in products:
            result.append(product["@resource"])
            
    return result

%store -r gene
geneID = gene

# Getting gene dict
linkGene = "https://websvc.biocyc.org/getxml?id=ECOLI:" + geneID + "&detail=low"
treeGene = createTree(linkGene)
treeDictGene = etree_to_dict(treeGene)

# Getting addresses of all gene products
try: 
    products = treeDictGene["ptools-xml"]["Gene"]["product"]["Protein"]
except:
    print("Invalid GeneID or Gene has no products")

productAddresses = getGeneProductAddresses(products)

# Convert Gene Product Addresses into Dictionaries
monomerDicts = []

for address in productAddresses:
    try: 
        linkGeneProduct = "https://websvc.biocyc.org/" + address + "&detail=low"
        treeGeneProduct = createTree(linkGeneProduct)
        treeDictGeneProduct = etree_to_dict(treeGeneProduct)
        monomerDicts.append(treeDictGeneProduct)
    except:
        print("Gene product does not have a valid API Link/Page")

In [None]:
# Getting "complex addresses" if applicable

def getComplexDictionaries(monomerDicts, complexDicts):

    for dictionary in monomerDicts:
        stem = dictionary["ptools-xml"]["Protein"]
        
        if "component-of" not in stem:
            complexDicts.append(dictionary)
        else:
            complexes = stem["component-of"]
            temp = []
            if type(complexes) is dict:
                complexAddress = complexes["Protein"]["@resource"]
                link = "https://websvc.biocyc.org/" + complexAddress + "&detail=low"
                treeComplex = createTree(link)
                treeDictComplex = etree_to_dict(treeComplex)
                temp.append(treeDictComplex)
            else:
                for complexR in complexes:
                    complexAddress = complexR["@resource"]
                    link = "https://websvc.biocyc.org/" + complexAddress + "&detail=low"
                    treeComplex = createTree(link)
                    treeDictComplex = etree_to_dict(treeComplex)
                    temp.append(treeDictComplex)
            getComplexDictionaries(temp, complexDicts)
            
    return

complexDicts = []

getComplexDictionaries(monomerDicts, complexDicts)     

In [None]:
# Get reaction addresses

reactionAddresses = []

for dictionary in complexDicts:
    stem = dictionary["ptools-xml"]["Protein"]
    if "catalyzes" not in stem:
        reactionAddresses.append("NONE")
    else:
        reactions = stem["catalyzes"]["Enzymatic-Reaction"]
        if type(reactions) is dict:
            respectiveReactions = []
            respectiveReactions.append(reactions["reaction"]["Reaction"]["@resource"])
            reactionAddresses.append(respectiveReactions)
        else:
            respectiveReactions = []
            for reaction in reactions:
                respectiveReactions.append(reaction["reaction"]["Reaction"]["@resource"]) 
            reactionAddresses.append(respectiveReactions)

# Get reaction dictionaries

reactionDicts = []

for addressList in reactionAddresses:
    if addressList == "NONE":
        reactionDicts.append("NONE")
    else:
        temp = []
        for address in addressList:
            linkReaction = "https://websvc.biocyc.org/" + address
            treeReaction = createTree(linkReaction)
            treeDictReaction = etree_to_dict(treeReaction)
            temp.append(treeDictReaction)
            
        reactionDicts.append(temp)

In [None]:
%%capture
geneReactionInfo = []

for x in range(len(complexDicts)):
    
    dictionary1 = complexDicts[x]
    
    geneReactions = {
        "reactionID": "",
        "stoichiometry": [],
        "reversible": "",
        "protein_complexes": "",
        "protein_monomers": [],
        "monomer_counts": [],
        "genes": [],
    } 
    
    
    dictionary2 = reactionDicts[x]
    
    if dictionary2 == "NONE":
        geneReactions["reactionID"] = "No Reaction Found"
        geneReactions["protein_complexes"] = getProteinComplex(dictionary1)
        geneReactions["genes"].append(geneID)
        
        components = dictionary1["ptools-xml"]["Protein"]
        
        if "component" in dictionary1:
            components = components["component"]
            if type(components) is dict:
                monomers.append(components["Protein"]["@frameid"])
            else:
                for protein in components:
                    monomers.append(protein["Protein"]["@frameid"])
                    
        copy = geneReactions.copy()
        geneReactionInfo.append(copy)  
        
    else:
        for dictionary2nest in dictionary2:
            geneReactions["reactionID"] = getReactionID(dictionary2nest)
            rxnName = geneReactions["reactionID"]
            %store -r reactionStore
            if rxnName not in reactionStore:
                reactionStore.add(rxnName)
                %store reactionStore
                geneReactions["protein_complexes"] = getProteinComplexes(dictionary2nest)
                easy = geneReactions["protein_complexes"]
                data = getMonomersCountsGenes(easy)
                geneReactions["protein_monomers"] = data[0]
                geneReactions["monomer_counts"] = data[1]
                geneReactions["genes"] = data[2]
                stoichiometryDict = {}
                getStoichiometrySubstrate(dictionary2nest, geneReactions, stoichiometryDict)
                getStoichiometryProduct(dictionary2nest, geneReactions, stoichiometryDict)
                geneReactions["reversible"] = isReversible(dictionary2nest)
                copy = geneReactions.copy()
                geneReactionInfo.append(copy)
            %store reactionStore

In [None]:
%%capture
%store -r geneData
df = pd.DataFrame(geneReactionInfo)
geneData = geneData.append(df, ignore_index=True)
%store geneData