In [1]:
%reset -f

# Implementing KEGG API

1. REST: https://www.kegg.jp/kegg/rest/
2. API: https://www.kegg.jp/kegg/rest/keggapi.html
3. DB entry: https://www.kegg.jp/kegg/docs/dbentry.html
4. Weblinks: https://www.kegg.jp/kegg/docs/weblink.html


## Libraries Installed

1. conda install -c conda-forge notebook
2. conda install -c anaconda urllib3 
3. conda install pandas


### Info:

    KEGG:
    kegg             Kyoto Encyclopedia of Genes and Genomes
    kegg             Release 96.0+/12-14, Dec 20
                     Kanehisa Laboratories
                     pathway     758,081 entries
                     brite       264,435 entries
                     module          512 entries
                     orthology    23,916 entries
                     genome        7,256 entries
                     genes     33,082,417 entries
                     compound     18,749 entries
                     glycan       11,042 entries
                     reaction     11,467 entries
                     rclass        3,168 entries
                     enzyme        7,787 entries
                     network       1,442 entries
                     variant         441 entries
                     disease       2,456 entries
                     drug         11,370 entries
                     dgroup        2,298 entries
                     environ         864 entries
    -----------------------------------------------------------------------------

    pathway          KEGG Pathway Database
    path             Release 96.0+/12-15, Dec 20
                     Kanehisa Laboratories
                     758,081 entries

    linked db        module
                     ko
                     genome
                     <org>
                     compound
                     glycan
                     reaction
                     rclass
                     enzyme
                     network
                     disease
                     drug
                     pubmed
                     
    -----------------------------------------------------------------------------                     
                     
    compound         KEGG Compound Database
    cpd              Release 96.0+/12-15, Dec 20
                     Kanehisa Laboratories
                     18,749 entries

    linked db        pathway
                     brite
                     module
                     genome
                     glycan
                     reaction
                     enzyme
                     network
                     disease
                     drug
                     environ
                     pubchem
                     chebi
                     
    ----------------------------------------------------------------------------- 

    reaction         KEGG Reaction Database
    rn               Release 96.0+/12-15, Dec 20
                     Kanehisa Laboratories
                     11,467 entries

    linked db        pathway
                     brite
                     module
                     ko
                     compound
                     glycan
                     rclass
                     enzyme


In [3]:
import urllib3
import io
import pandas as pd

In [4]:
def parseResponse(cols, *args):
    keggUrl = 'http://rest.kegg.jp/'
    
    for arg in args:
        keggUrl += arg+'/'

    df = pd.DataFrame(columns=cols)
    http = urllib3.PoolManager()
    pathwayResponse = http.request('GET', keggUrl, preload_content=False)
    pathwayResponse.auto_close=False

    for line in io.TextIOWrapper(pathwayResponse):
        df = df.append(pd.Series(line.strip('\n').split('\t'),index = df.columns), ignore_index=True)
    return df


## LIST

In [3]:
# Pathway
cols = ['pathwayId','pathwayDesc']
pathwayDF = parseResponse(cols, 'list','pathway')
print('Pathways Count: '+str(len(pathwayDF)))
pathwayDF.to_csv('data/pathway.tsv', sep='\t', index=False)
pathwayDF.head()

Pathways Count: 541


Unnamed: 0,pathwayId,pathwayDesc
0,path:map00010,Glycolysis / Gluconeogenesis
1,path:map00020,Citrate cycle (TCA cycle)
2,path:map00030,Pentose phosphate pathway
3,path:map00040,Pentose and glucuronate interconversions
4,path:map00051,Fructose and mannose metabolism


In [7]:
# Compound
cols = ['compoundId','compoundDesc']
compoundDF = parseResponse(cols, 'list','cpd')
print('Compounds Count: '+str(len(compoundDF)))
compoundDF.to_csv('data/compound.tsv', sep='\t', index=False)
compoundDF.head()

Compounds Count: 18756


Unnamed: 0,compoundId,compoundDesc
0,cpd:C00001,H2O; Water
1,cpd:C00002,ATP; Adenosine 5'-triphosphate
2,cpd:C00003,NAD+; NAD; Nicotinamide adenine dinucleotide; ...
3,cpd:C00004,NADH; DPNH; Reduced nicotinamide adenine dinuc...
4,cpd:C00005,NADPH; TPNH; Reduced nicotinamide adenine dinu...


In [8]:
# Reaction
cols = ['reactionId','reactionDesc']
reactionDF = parseResponse(cols, 'list','rn')
print('Reactions Count:' + str(len(reactionDF)))
reactionDF.to_csv('data/reaction.tsv', sep='\t', index=False)
reactionDF.head()

Reactions Count:11474


Unnamed: 0,reactionId,reactionDesc
0,rn:R00001,polyphosphate polyphosphohydrolase; Polyphosph...
1,rn:R00002,Reduced ferredoxin:dinitrogen oxidoreductase (...
2,rn:R00004,diphosphate phosphohydrolase; pyrophosphate ph...
3,rn:R00005,urea-1-carboxylate amidohydrolase; Urea-1-carb...
4,rn:R00006,pyruvate:pyruvate acetaldehydetransferase (dec...


## LINK

In [9]:
# Reaction -> Pathway

cols = ['reactionId', 'pathwayId']
reactionPathwayLinkDF = parseResponse(cols, 'link','pathway','rn')
print('Reaction2Pathway Links:' + str(len(reactionPathwayLinkDF)))
reactionPathwayLinkDF.to_csv('data/reactionPathwayLink.tsv', sep='\t', index=False)
reactionPathwayLinkDF.head()

Reaction2Pathway Links:34908


Unnamed: 0,reactionId,pathwayId
0,rn:R00014,path:map00010
1,rn:R00014,path:rn00010
2,rn:R00199,path:map00010
3,rn:R00199,path:rn00010
4,rn:R00200,path:map00010


In [10]:
# Compound <-> Reaction

cols = ['compoundId', 'reactionId']
compoundReactionLinkDF = parseResponse(cols, 'link','rn', 'cpd')
print('Compound2Reaction Links:' + str(len(compoundReactionLinkDF)))
compoundReactionLinkDF.to_csv('data/compoundReactionLink.tsv', sep='\t', index=False)
compoundReactionLinkDF.head()

Compound2Reaction Links:48389


Unnamed: 0,compoundId,reactionId
0,cpd:C00001,rn:R00001
1,cpd:C00001,rn:R00002
2,cpd:C00001,rn:R00004
3,cpd:C00001,rn:R00005
4,cpd:C00001,rn:R00009


In [9]:
# Compound <-> Pathway

cols = ['compoundId', 'pathwayId']
compoundPathwayLinkDF = parseResponse(cols, 'link','pathway', 'cpd')
print('Compound2Pathway Links:' + str(len(compoundPathwayLinkDF)))
compoundPathwayLinkDF.to_csv('data/compoundPathwayLink.tsv', sep='\t', index=False)
compoundPathwayLinkDF.head()

Compound2Pathway Links:17355


Unnamed: 0,compoundId,pathwayId
0,cpd:C00022,path:map00010
1,cpd:C00024,path:map00010
2,cpd:C00031,path:map00010
3,cpd:C00033,path:map00010
4,cpd:C00036,path:map00010


In [6]:
# Reaction <-> Pathway
cols = ['reactionId', 'pathwayId']
reactionPathwayLinkDF = parseResponse(cols, 'link','pathway', 'rn')
print('Reaction2Pathway Links:' + str(len(reactionPathwayLinkDF)))
reactionPathwayLinkDF.to_csv('data/reactionPathwayLink.tsv', sep='\t', index=False)
reactionPathwayLinkDF.head()

Raction2Pathway Links:34908


Unnamed: 0,reactionId,pathwayId
0,rn:R00014,path:map00010
1,rn:R00014,path:rn00010
2,rn:R00199,path:map00010
3,rn:R00199,path:rn00010
4,rn:R00200,path:map00010


## Compound <=> Reaction <=> Pathway

In [12]:
cpdRnPath = compoundReactionLinkDF.merge(reactionPathwayLinkDF, on='reactionId', sort=True)
print('cpd <-> rn <-> path # of rows: '+ str(len(cpdRnPath)))
cpdRnPath.head()

cpd <-> rn <-> path # of rows: 150152


Unnamed: 0,compoundId,reactionId,pathwayId
0,cpd:C00001,rn:R00005,path:map00220
1,cpd:C00001,rn:R00005,path:rn00220
2,cpd:C00001,rn:R00005,path:map00791
3,cpd:C00001,rn:R00005,path:rn00791
4,cpd:C00001,rn:R00005,path:map01100


## GET

This gets back given DB entries. For e.g.:
http://rest.kegg.jp/get/rn:R00001+cpd:C00001+path:map00010


In [35]:
for row in cpdRnPath.iterrows():
    cpd = row[1]['compoundId']
    rn = row[1]['reactionId']
    path = row[1]['pathwayId']
    
    keggUrl = 'http://rest.kegg.jp/'+'get/'+cpd+'+'+rn+'+' + path
    
    http = urllib3.PoolManager()
    pathwayResponse = http.request('GET', keggUrl, preload_content=False)
    pathwayResponse.auto_close=False
    break;
count = 0
for line in io.TextIOWrapper(pathwayResponse):
    print(line)
    count += 1
    if count == 2:
        break;
        
print(keggUrl)

ENTRY       C00001                      Compound

NAME        H2O;

http://rest.kegg.jp/get/cpd:C00001+rn:R00005+path:map00220


In [22]:
path

'path:map00220'

## CONV

In [5]:
# Compound <-> CHEBI

cols = ['CHEBI', 'compoundId']
compoundChebiConvDF = parseResponse(cols, 'conv', 'chebi', 'cpd')
print('CompoundCHEBI Conversions:' + str(len(compoundChebiConvDF)))
compoundChebiConvDF.to_csv('data/compoundChebiConv.tsv', sep='\t', index=False)
compoundChebiConvDF.head()

CompoundCHEBI Conversions:16993


Unnamed: 0,CHEBI,compoundId
0,cpd:C00462,chebi:16042
1,cpd:C00742,chebi:17051
2,cpd:C08142,chebi:28741
3,cpd:C00698,chebi:17996
4,cpd:C07755,chebi:6636
