### KEGG

In [77]:
import os
cache = '.'
db = 'META'
if not os.path.exists(cache + '/' + db):
    os.mkdir(cache + '/' + db)

### BioCyc

In [135]:
import requests
import os
from enum import Enum

class BiocycAPICached(BiocycAPI):
    
    def __init__(self, cache, db='META', email=None, password=None):
        self.cache = cache
        super().__init__(db, email, password)
    
    def cache_fetch(self, frame_id):
        if not os.path.exists(f'{self.cache}/{self.db}'):
            return None
        
        frame_file = frame_id + '.xml'
        for d in os.listdir(f'{self.cache}/{self.db}'):
            path = f'{self.cache}/{self.db}/{d}'
            if os.path.isdir(path):
                files = os.listdir(path)
                if frame_file in files:
                    with open(f'{self.cache}/{self.db}/{d}/{frame_id}.xml', 'rb') as fh:
                        return fh.read()
        return None
        
    def cache_store(self, frame_id, data, object_type):
        if not os.path.exists(f'{self.cache}/{self.db}'):
            os.mkdir(f'{self.cache}/{self.db}')
        if not os.path.exists(f'{self.cache}/{self.db}/{object_type}'):
            os.mkdir(f'{self.cache}/{self.db}/{object_type}')
        with open(f'{self.cache}/{self.db}/{object_type}/{frame_id}.xml', 'wb') as fh:
            fh.write(data)
            
    def cache_query(self, biocyc_query: BiocycQuery):
        if not os.path.exists(f'{self.cache}/{self.db}'):
            return None
        if not os.path.exists(f'{self.cache}/{self.db}/query'):
            return None
        if not os.path.exists(f'{self.cache}/{self.db}/query/{biocyc_query.name}.txt'):
            return None
        with open(f'{self.cache}/{self.db}/query/{biocyc_query.name}.txt', 'r') as fh:
            return fh.read().split()
        
    def save_query(self, biocyc_query: BiocycQuery, items):
        if not os.path.exists(f'{self.cache}/{self.db}'):
            os.mkdir(f'{self.cache}/{self.db}')
        if not os.path.exists(f'{self.cache}/{self.db}/query'):
            os.mkdir(f'{self.cache}/{self.db}/query')
        with open(f'{self.cache}/{self.db}/query/{biocyc_query.name}.txt', 'w') as fh:
            for i in items:
                fh.write(i + '\n')
        
    def fetch(self, frame_id, detail='high'):
        data = self.cache_fetch(frame_id)
        if data:
            return data
        else:
            resp = super().fetch(frame_id, detail)
            object_type = _detect_class(resp)
            print(object_type)
            self.cache_store(frame_id, resp.content, object_type)
            
        return resp
    
    def query(self, biocyc_query: BiocycQuery, detail='none'):
        items = self.cache_query(biocyc_query)
        if items is None:
            print('loading!!')
            items = super().query(biocyc_query, detail)
            self.save_query(biocyc_query, items)
            
        return items
        
        

class BiocycAPI:

    def __init__(self, db='META', email=None, password=None):
        self.db = db
        self.end_point = 'https://websvc.biocyc.org'
        self.session = None
        if email and password:
            self.session = requests.Session()
            self.session.post(f'{self.end_point}/credentials/login/', 
                              data={'email': email, 'password': password})
        
    def get_session(self):
        if self.session:
            return self.session
        else:
            return requests

    def fetch(self, frame_id, detail='high'):
        resp = self.get_session().get(f'{self.end_point}/getxml?id={self.db}:{frame_id}&detail={detail}')
        return resp
    
    def query(self, biocyc_query: BiocycQuery, detail='none'):
        """
        @param detail: [none|low|full
        """
        q = f'[x:x<-{self.db}^^{biocyc_query.name}]'
        
        resp = self.get_session().get(f'{self.end_point}/xmlquery?query={q}&detail={detail}')
        
        l = _to_list(resp)
        
        return {o[0]['frameid'] for o in l[biocyc_query.value]}

"""
06/20/2023
BiocycQuery.Pathways 4205
BiocycQuery.Proteins 24391
BiocycQuery.Reactions 20552
BiocycQuery.Compounds 24705
BiocycQuery.Genes 15514
BiocycQuery.Organisms 9165
"""
    
api = BiocycAPICached(cache='.', email='', password='')
for q in [
    BiocycQuery.Pathways, BiocycQuery.Proteins, BiocycQuery.Reactions, BiocycQuery.Compounds,
    BiocycQuery.Genes, 
    BiocycQuery.Organisms,
    
]:
    res = api.query(q)
    print(q, len(res))
#api.fetch('PWY-7822')
#res = api.fetch('RXN-15479')
api.cache_fetch('RXN-15479')

BiocycQuery.Pathways 4205
BiocycQuery.Proteins 24391
BiocycQuery.Reactions 20552
BiocycQuery.Compounds 24705
BiocycQuery.Genes 15514
BiocycQuery.Organisms 9165


b'<?xml version="1.0" encoding="iso-8859-1"?>\n<ptools-xml ptools-version=\'27.0\' xml:base=\'http://BioCyc.org/getxml?META:RXN-15479\'><metadata><url>http://BioCyc.org/</url>\n<service_name>getxml</service_name>\n<query>META:RXN-15479</query>\n<num_results>1</num_results>\n<PGDB orgid=\'META\' version=\'27.0\'><species datatype=\'string\'>MetaCyc</species>\n</PGDB>\n</metadata>\n<Reaction ID=\'META:RXN-15479\' orgid=\'META\' frameid=\'RXN-15479\' detail=\'full\'><parent><Reaction resource=\'getxml?META:Electron-Transfer-Reactions\' orgid=\'META\' frameid=\'Electron-Transfer-Reactions\' class=\'true\'/></parent>\n<parent><Reaction resource=\'getxml?META:Protein-Modification-Reactions\' orgid=\'META\' frameid=\'Protein-Modification-Reactions\' class=\'true\'/></parent>\n<left><Protein resource=\'getxml?META:Oxidized-Plastocyanins\' orgid=\'META\' frameid=\'Oxidized-Plastocyanins\' class=\'true\'/><compartment><cco resource=\'getxml?META:CCO-IN\' orgid=\'META\' frameid=\'CCO-IN\'/></comp

In [113]:
res = api.query(BiocycQuery.Pathways)

In [None]:
s = requests.Session() # create session
    # Post login credentials to session:
    s.post('https://websvc.biocyc.org/credentials/login/', data={'email':'[email]', 'password':'[password]'})
    # Issue web service request:
    r = s.get('https://websvc.biocyc.org/[request]')

In [119]:
res = api.fetch('RXN-15479')

TypeError: cache_fetch() missing 1 required positional argument: 'object_type'

In [61]:
def _detect_class(resp):
    
    parser = ET.iterparse(BytesIO(resp.content), events=("end", "start"))
    for action, elem in parser:
        if action == "end" and elem.tag == "metadata":
            for action, elem in parser:
                if action == 'start':
                    return elem.tag

    raise Exception("unable to detect class")
_detect_class(res)

'Reaction'

In [136]:
#print(res.content.decode('utf-8'))

In [49]:
from enum import Enum
class BiocycQuery(Enum):
    Pathways = 'Pathway'
    Reactions = 'Reaction'
    Compounds = 'Compound'
    Proteins = 'Protein'
    Genes = 'Gene'
    Organisms = 'Organism'
BiocycQuery.Pathways.value

'Pathway'

In [25]:
from io import BytesIO
import lxml.etree as ET

def _to_list(resp):
    res = {}
    
    parser = ET.iterparse(BytesIO(resp.content), events=("end", "start"))
    for action, elem in parser:
        if action == "end" and elem.tag == "metadata":
            for action, elem in parser:
                if action == 'start':
                    if elem.tag not in res:
                        res[elem.tag] = []
                    res[elem.tag].append([dict(elem.attrib), elem.text])
                elif action == "end" and elem.tag == "ptools-xml":
                    return res
    
    raise Exception("unexpected ending")

def _detect_class(xml_str):
    clazz = None
    return clazz

l = _to_list(res)
len(l['Pathway'])

4205

### NCBI Eutils

In [None]:
https://websvc.biocyc.org/xmlquery?query=[QUERY]&detail=[none|low|full]