In [2]:
import requests
import pprint
import sys
import string
import json
import io
import copy

import ase.io
import ase.calculators.singlepoint

GRAPHQL = 'http://api.catalysis-hub.org/graphql'

def fetch(query):
    return requests.get(
        GRAPHQL, {'query': query}
    ).json()['data']


In [15]:
def reactions_from_dataset(pub_id, page_size=10):
    reactions = []
    has_next_page = True
    start_cursor = ''
    page = 0
    while has_next_page:
        data = fetch("""{{
      reactions(pubId: "{pub_id}", first: {page_size}, after: "{start_cursor}") {{
        totalCount
        pageInfo {{
          hasNextPage
          hasPreviousPage
          startCursor
          endCursor 
        }}  
        edges {{
          node {{
            Equation
            reactants
            products
            reactionEnergy
            reactionSystems {{
              name
              systems {{
                energy
                InputFile(format: "json")
              }}
            }}  
          }}  
        }}  
      }}    
    }}""".format(start_cursor=start_cursor,
                 page_size=page_size,
                 pub_id=pub_id,
                ))
        has_next_page = data['reactions']['pageInfo']['hasNextPage']
        start_cursor = data['reactions']['pageInfo']['endCursor']
        page += 1
        print(has_next_page, start_cursor, page_size * page, data['reactions']['totalCount'])
        reactions.extend(map(lambda x: x['node'], data['reactions']['edges']))
        has_next_page = False

    return reactions

In [20]:
raw_reactions = reactions_from_dataset("MamunHighT2019", page_size=100)

ConnectionError: HTTPConnectionPool(host='api.catalysis-hub.org', port=80): Max retries exceeded with url: /graphql?query=%7B%0A++++++reactions%28pubId%3A+%22MamunHighT2019%22%2C+first%3A+100%2C+after%3A+%22%22%29+%7B%0A++++++++totalCount%0A++++++++pageInfo+%7B%0A++++++++++hasNextPage%0A++++++++++hasPreviousPage%0A++++++++++startCursor%0A++++++++++endCursor+%0A++++++++%7D++%0A++++++++edges+%7B%0A++++++++++node+%7B%0A++++++++++++Equation%0A++++++++++++reactants%0A++++++++++++products%0A++++++++++++reactionEnergy%0A++++++++++++reactionSystems+%7B%0A++++++++++++++name%0A++++++++++++++systems+%7B%0A++++++++++++++++energy%0A++++++++++++++++InputFile%28format%3A+%22json%22%29%0A++++++++++++++%7D%0A++++++++++++%7D++%0A++++++++++%7D++%0A++++++++%7D++%0A++++++%7D++++%0A++++%7D (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x11c557ef0>: Failed to establish a new connection: [Errno 51] Network is unreachable'))

In [17]:
def aseify_reactions(reactions):
    for i, reaction in enumerate(reactions):
        for j, _ in enumerate(reactions[i]['reactionSystems']):
            with io.StringIO() as tmp_file:
                system = reactions[i]['reactionSystems'][j].pop('systems')
                tmp_file.write(system.pop('InputFile'))
                tmp_file.seek(0)
                atoms = ase.io.read(tmp_file, format='json')
            calculator = ase.calculators.singlepoint.SinglePointCalculator(
                atoms,
                energy=system.pop('energy')
            )
            atoms.set_calculator(calculator)
            #print(atoms.get_potential_energy())
            reactions[i]['reactionSystems'][j]['atoms'] = atoms
        # flatten list further into {name: atoms, ...} dictionary
        reactions[i]['reactionSystems'] = {x['name']: x['atoms']
                                          for x in reactions[i]['reactionSystems']}
        
reactions = copy.deepcopy(raw_reactions)
aseify_reactions(reactions)

In [18]:
reactions

[{'Equation': '0.5N2(g) + * -> N*',
  'reactants': '{"star": 1, "N2gas": 0.5}',
  'products': '{"Nstar": 1}',
  'reactionEnergy': -2.810391181412342,
  'reactionSystems': {'bulk': Atoms(symbols='V3Sc', pbc=True, cell=[3.98821571776633, 3.98821571776633, 3.98821571776633], calculator=SinglePointCalculator(...)),
   'star': Atoms(symbols='V3ScV3ScV3Sc', pbc=True, cell=[[5.64018960086126, 0.0, 0.0], [-2.82009480043063, 4.88454665433577, 0.0], [0.0, 0.0, 24.6051932587667]], calculator=SinglePointCalculator(...)),
   'N2gas': Atoms(symbols='N2', pbc=True, cell=[19.9999985846621, 19.9999985846621, 21.1299785046969], calculator=SinglePointCalculator(...)),
   'Nstar': Atoms(symbols='VScV3ScV3ScV2N', pbc=True, cell=[[5.64018960086126, 0.0, 0.0], [2.82009480043063, 4.88454665433577, 0.0], [0.0, 0.0, 24.6051932587667]], calculator=SinglePointCalculator(...))}},
 {'Equation': 'H2O(g) - H2(g) + * -> O*',
  'reactants': '{"star": 1, "H2gas": -1.0, "H2Ogas": 1.0}',
  'products': '{"Ostar": 1}',
  'r

In [19]:
len(reactions)


10