In [None]:
%matplotlib inline
import sys
sys.path.insert(1, '/Users/loriab/linux/qcdb')
import qcdb

In [None]:
%cd /Users/loriab/linux/mongo_qcdb/DF_S22_LAB/

In [None]:
s22 = qcdb.Database('s22', loadfrompickle=True)
print s22.available_projects()
#for pj in bfdb.available_projects():
#    bfdb.load_qcdata_byproject(pj)
#s22.load_qcdata_byproject('saptone')

In [None]:
import json
import collections

In [None]:
import hashlib

def mongoqcdb_hasher(obj):

    hash_fields = []
    if isinstance(obj, qcdb.dbwrap.Reagent):
        hash_fields = ["symbols", "masses", "name", "charge", "multiplicity", "real", "geometry", "fragments", "fragment_charges", "fragment_multiplicities"]
    #hash_fields["databases"] = ["name"]
    #hash_fields["pages"] = ["molecule", "method"]

    data = mongoqcdb_translator(obj)
    
    m = hashlib.sha1()
    concat = ''
    for field in hash_fields:
        concat += json.dumps(data[field])
    m.update(concat.encode('utf-8'))
    digest = m.hexdigest()
    return digest

In [None]:
def mongoqcdb_translator(obj):
    """Translates member data of QCDb object *obj* into fields for MongoQCDb."""
    
    dat = {}
    if isinstance(obj, qcdb.dbwrap.Reagent):
        dat['name'] = obj.name
        dat['comment'] = obj.tagl
        omol = qcdb.Molecule(obj.mol)
        omol.update_geometry()
        dat['charge'] = omol.molecular_charge()
        dat['multiplicity'] = omol.multiplicity()
        dat['symbols'] = []
        dat['masses'] = []
        dat['real'] = []
        dat['geometry'] = []
        dat['fragments'] = []
        dat['fragment_charges'] = []
        dat['fragment_multiplicities'] = []
        for iat in range(omol.natom()):
            dat['symbols'].append(omol.symbol(iat))
            dat['masses'].append(omol.mass(iat))
            dat['geometry'].append(omol.xyz(iat))
            if omol.Z(iat):
                dat['real'].append(True)
            else:
                dat['real'].append(False)
        for ifr in range(omol.nfragments()):  # 0-indexed
            dat['fragments'].append(list(range(omol.fragments[ifr][0], 
                                               omol.fragments[ifr][1] + 1)))
            dat['fragment_charges'].append(omol.fragment_charges[ifr])
            dat['fragment_multiplicities'].append(omol.fragment_multiplicities[ifr])

    elif isinstance(obj, qcdb.dbwrap.Reaction):
        dat['name'] = obj.name
        dat['comment'] = obj.tagl
        dat['stoichiometry'] = {}
        for mode, rxnm in obj.rxnm.iteritems():
            dat['stoichiometry'][mode] = collections.OrderedDict()
            for rgt, coeff in rxnm.iteritems():
                rgthash = mongoqcdb_hasher(rgt)
                dat['stoichiometry'][mode][rgthash] = coeff
        dat['reaction_results'] = sorted(obj.data.keys())  # really methods, not data or mc or something?
        #for label, datum in sorted(self.data.iteritems()):
        dat['benchmark'] = obj.benchmark
        
    elif isinstance(obj, qcdb.dbwrap.WrappedDatabase):
        dat['name'] = obj.dbse
        dat['comment'] = obj.tagl
        dat['reactions'] = {}
#             dat['reagents'] = {}
#             dat['subsets'] = {}
#             dat['orderedsubsets'] = {}
        for rxn in obj.hrxn:
            dat['reactions'][rxn] = json.dumps(obj.hrxn[rxn], cls=MongoQCDbEncoder)

    return dat


class MongoQCDbEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, qcdb.dbwrap.Reagent) or \
           isinstance(obj, qcdb.dbwrap.Reaction) or \
           isinstance(obj, qcdb.dbwrap.WrappedDatabase):
            dat = mongoqcdb_translator(obj)

            if isinstance(obj, qcdb.dbwrap.WrappedDatabase):
                name = obj.dbse
            else:
                name = obj.name
            
            if isinstance(obj, qcdb.dbwrap.Reagent):
                rhash = mongoqcdb_hasher(obj)
            else:
                rhash = 'unhashed'

            print("""\nJSONEncoded {} {} as {}""".format(obj.__class__.__name__, name, rhash))
            return dat
        else:
            return json.JSONEncoder.default(self, obj)

In [None]:
with open('databases/S22_DB.json', 'w') as handle:
    json.dump(s22.dbdict['S22'], handle, indent=4, cls=MongoQCDbEncoder)

In [None]:
# print json.dumps(s22.dbdict['S22'], cls=MongoQCDbEncoder)
# print json.dumps(s22.hrgt['S22-16-monoB-CP'], cls=MongoQCDbEncoder)
# print json.dumps(s22.hrgt['S22-16-monoB-unCP'], cls=MongoQCDbEncoder)

In [None]:
for rgt, orgt in s22.hrgt.iteritems():
    hrgt = mongoqcdb_hasher(orgt)
    print rgt, hrgt
    with open('molecules/{}.json'.format(hrgt), 'w') as handle:
        json.dump(s22.hrgt[rgt], handle, indent=4, cls=MongoQCDbEncoder)