In [1]:
import qcportal as ptl
from openeye import oechem
from openforcefield.topology import Molecule, Topology
from openforcefield.typing.engines.smirnoff import ForceField
import cmiles
import time
import collections

client = ptl.FractalClient()
torsion_datasets = client.list_collections("TorsionDriveDataset")
datasets = []
for i in range(len(torsion_datasets)):
    datasets.append(torsion_datasets.index[i][1])
datasets



['Fragment Stability Benchmark',
 'Fragmenter paper',
 'OpenFF DANCE 1 eMolecules t142 v1.0',
 'OpenFF Fragmenter Validation 1.0',
 'OpenFF Full TorsionDrive Benchmark 1',
 'OpenFF Gen 2 Torsion Set 1 Roche',
 'OpenFF Gen 2 Torsion Set 1 Roche 2',
 'OpenFF Gen 2 Torsion Set 2 Coverage',
 'OpenFF Gen 2 Torsion Set 2 Coverage 2',
 'OpenFF Gen 2 Torsion Set 3 Pfizer Discrepancy',
 'OpenFF Gen 2 Torsion Set 3 Pfizer Discrepancy 2',
 'OpenFF Gen 2 Torsion Set 4 eMolecules Discrepancy',
 'OpenFF Gen 2 Torsion Set 4 eMolecules Discrepancy 2',
 'OpenFF Gen 2 Torsion Set 5 Bayer',
 'OpenFF Gen 2 Torsion Set 5 Bayer 2',
 'OpenFF Gen 2 Torsion Set 6 Supplemental',
 'OpenFF Gen 2 Torsion Set 6 Supplemental 2',
 'OpenFF Group1 Torsions',
 'OpenFF Group1 Torsions 2',
 'OpenFF Group1 Torsions 3',
 'OpenFF Primary Benchmark 1 Torsion Set',
 'OpenFF Primary Benchmark 2 Torsion Set',
 'OpenFF Primary TorsionDrive Benchmark 1',
 'OpenFF Protein Fragments TorsionDrives v1.0',
 'OpenFF Rowley Biaryl v1.0',

In [2]:
def get_coverage_report(datasets):
    for dataset_name in datasets:
        count = 0
        while True:
            try:
                ds = client.get_collection("TorsionDriveDataset", dataset_name)
                ds.status("default", status="COMPLETE")
                break
            except:
                time.sleep(20)
                count += 1
                if count < 2:
                    continue
                else:
                    break

        params = []
        for index in ds.df.index:
            # get the dihedral indices
            dihedral_indices = ds.df.loc[index].default.keywords.dihedrals[0]
            # build the molecule from the mapped smiles 
            molecule = Molecule.from_qcschema(ds.get_entry(index))
            topology = Topology.from_molecules([molecule])

            # Let's label using the Parsley force field
            forcefield = ForceField('openff_unconstrained-1.3.0.offxml')

            # Run the molecule labeling
            molecule_force_list = forcefield.label_molecules(topology)

            # Print out a formatted description of the torsion parameters applied to this molecule
            for mol_idx, mol_forces in enumerate(molecule_force_list):
                for force_tag, force_dict in mol_forces.items():
                    if force_tag == 'ProperTorsions':
                        for (atom_indices, parameter) in force_dict.items():
                            if(atom_indices == dihedral_indices):
                                count += 1
                                params.append(parameter.id)
                                
        counter = collections.Counter(params)
        print(dataset_name, counter)
        print(" ")
    return counter

In [3]:
torsion_coverage = get_coverage_report(datasets)

Fragment Stability Benchmark Counter({'t69': 86})
 
Fragmenter paper Counter({'t69a': 2, 't96': 1, 't70d': 1, 't69': 1, 't112': 1})
 
OpenFF DANCE 1 eMolecules t142 v1.0 Counter()
 
OpenFF Fragmenter Validation 1.0 Counter({'t70d': 5, 't96': 3, 't69a': 3, 't112': 3, 't69': 2, 't101': 2})
 
OpenFF Full TorsionDrive Benchmark 1 Counter({'t47': 21, 't69': 14, 't17': 13, 't1': 10, 't96': 7, 't51': 6, 't48': 3, 't43': 3, 't61': 2, 't20': 2, 't70b': 1, 't118': 1, 't128': 1})
 
OpenFF Gen 2 Torsion Set 1 Roche Counter({'t1': 1, 't2': 1, 't3': 1, 't4': 1, 't5': 1, 't9': 1, 't10': 1, 't13': 1, 't17': 1, 't20': 1, 't18': 1, 't19': 1, 't21': 1, 't22': 1, 't24': 1, 't27': 1, 't29': 1, 't33': 1, 't39': 1, 't41': 1, 't43': 1, 't44': 1, 't45': 1, 't46': 1, 't47': 1, 't48': 1, 't51': 1, 't52': 1, 't59': 1, 't61': 1, 't62': 1, 't64': 1, 't69a': 1, 't69': 1, 't70c': 1, 't71': 1, 't72': 1, 't75': 1, 't76': 1, 't85': 1, 't86': 1, 't87': 1, 't88': 1, 't91': 1, 't94': 1, 't96': 1, 't97': 1, 't98': 1, 't102'

OpenFF Gen 2 Torsion Set 5 Bayer 2 Counter({'t71': 7, 't19': 5, 't26': 5, 't84': 5, 't96': 5, 't97': 5, 't157': 5, 't4': 4, 't44': 4, 't134': 4, 't156': 4, 't1': 3, 't2': 3, 't9': 3, 't17': 3, 't20': 3, 't22': 3, 't27': 3, 't29': 3, 't52': 3, 't55': 3, 't59': 3, 't61': 3, 't69a': 3, 't70c': 3, 't86': 3, 't87': 3, 't98': 3, 't99': 3, 't108': 3, 't110': 3, 't112': 3, 't141': 3, 't142': 3, 't3': 2, 't5': 2, 't10': 2, 't13': 2, 't15': 2, 't23': 2, 't24': 2, 't25': 2, 't28': 2, 't32': 2, 't38': 2, 't43': 2, 't45': 2, 't46': 2, 't47': 2, 't51': 2, 't56': 2, 't64': 2, 't75': 2, 't76': 2, 't85': 2, 't88': 2, 't101': 2, 't102': 2, 't106': 2, 't109': 2, 't113': 2, 't118': 2, 't120': 2, 't125': 2, 't127': 2, 't130': 2, 't135': 2, 't136': 2, 't138': 2, 't139': 2, 't148': 2, 't14': 1, 't18': 1, 't21': 1, 't30': 1, 't31': 1, 't34': 1, 't36': 1, 't39': 1, 't40': 1, 't41': 1, 't48': 1, 't51c': 1, 't62': 1, 't63': 1, 't69': 1, 't73': 1, 't77': 1, 't90': 1, 't107': 1, 't126': 1, 't137': 1, 't140': 1, 't

KeyError: 'The record must contain the hydrogen mapped smiles to be safely made from the archive.'