In [1]:
import qcportal as ptl
from openeye import oechem
from openforcefield.topology import Molecule, Topology
from openforcefield.typing.engines.smirnoff import ForceField
import cmiles
import time

client = ptl.FractalClient()
torsion_datasets = client.list_collections("TorsionDriveDataset")
datasets = []
for i in range(len(torsion_datasets)):
    datasets.append(torsion_datasets.index[i][1])
    
for dataset_name in datasets:
    count = 0
    while True:
        try:
            ds = client.get_collection("TorsionDriveDataset", dataset_name)
            ds.status("default", status="COMPLETE")
            break
        except:
            time.sleep(20)
            count += 1
            if count < 2:
                continue
            else:
                break
    
    params = []
    for i in range(len(ds.df)):
        dihedral_indices = ds.df.iloc[i, 0].dict()['keywords']['dihedrals'][0]
        smiles = ds.df.index[i]
        mapped_smiles = ds.get_entry(smiles).attributes['canonical_isomeric_explicit_hydrogen_mapped_smiles']
        molecule = Molecule.from_mapped_smiles(mapped_smiles)
        topology = Topology.from_molecules([molecule])

        # Let's label using the Parsley force field
        forcefield = ForceField('openff_unconstrained-1.2.1.offxml')

        # Run the molecule labeling
        molecule_force_list = forcefield.label_molecules(topology)
        
        # Print out a formatted description of the torsion parameters applied to this molecule
        for mol_idx, mol_forces in enumerate(molecule_force_list):
            for force_tag, force_dict in mol_forces.items():
                if force_tag == 'ProperTorsions':
                    for (atom_indices, parameter) in force_dict.items():
                        if(atom_indices == dihedral_indices):
                            count += 1
                            params.append(parameter.id)

    param_set = set(params)
    params = list(param_set)
    print(dataset_name, params)
    print('    ')



Fragment Stability Benchmark ['t69']
    
Fragmenter paper ['t96', 't112', 't69']
    
OpenFF DANCE 1 eMolecules t142 v1.0 []
    
OpenFF Fragmenter Validation 1.0 ['t96', 't101', 't112', 't69']
    
OpenFF Full TorsionDrive Benchmark 1 ['t96', 't61', 't1', 't48', 't20', 't43', 't118', 't128', 't69', 't51', 't17', 't47']
    
OpenFF Gen 2 Torsion Set 1 Roche ['t96', 't108', 't112', 't140', 't2', 't22', 't48', 't97', 't64', 't110', 't10', 't88', 't75', 't91', 't87', 't17', 't120', 't94', 't157', 't70', 't135', 't41', 't137', 't44', 't68', 't27', 't39', 't138', 't51', 't33', 't52', 't61', 't4', 't71', 't21', 't20', 't43', 't76', 't46', 't109', 't72', 't62', 't85', 't102', 't45', 't69', 't128a', 't29', 't142', 't9', 't111', 't86', 't98', 't1', 't24', 't3', 't136', 't13', 't5', 't113', 't59', 't130', 't18', 't19', 't47']
    
OpenFF Gen 2 Torsion Set 1 Roche 2 ['t96', 't108', 't112', 't140', 't2', 't22', 't48', 't97', 't64', 't110', 't10', 't88', 't75', 't91', 't87', 't17', 't120', 't94', 

OpenFF Primary TorsionDrive Benchmark 1 ['t96', 't20', 't43', 't69', 't47']
    
OpenFF Protein Fragments TorsionDrives v1.0 ['t61', 't1', 't2', 't22', 't23', 't69', 't17']
    
OpenFF Rowley Biaryl v1.0 ['t43', 't47', 't69']
    
OpenFF Substituted Phenyl Set 1 ['t96', 't43', 't97', 't72', 't74', 't69', 't47']
    
OpenFF Theory Benchmarking Set B3LYP-D3BJ DZVP v1.0 ['t96', 't152', 't64', 't88', 't126', 't91', 't17', 't124', 't41', 't138', 't51', 't52', 't114', 't61', 't74', 't69', 't151', 't141', 't98', 't1', 't13', 't65', 't5', 't113', 't47']
    
OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVP v1.0 ['t96', 't152', 't64', 't88', 't126', 't91', 't17', 't124', 't41', 't138', 't51', 't52', 't114', 't61', 't74', 't69', 't151', 't141', 't98', 't1', 't13', 't65', 't5', 't113', 't47']
    
OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVPD v1.0 ['t96', 't152', 't64', 't88', 't126', 't91', 't17', 't124', 't41', 't138', 't51', 't52', 't114', 't61', 't74', 't69', 't151', 't141', 't98', 

KeyError: 'canonical_isomeric_explicit_hydrogen_mapped_smiles'

In [2]:
datasets

['Fragment Stability Benchmark',
 'Fragmenter paper',
 'OpenFF DANCE 1 eMolecules t142 v1.0',
 'OpenFF Fragmenter Validation 1.0',
 'OpenFF Full TorsionDrive Benchmark 1',
 'OpenFF Gen 2 Torsion Set 1 Roche',
 'OpenFF Gen 2 Torsion Set 1 Roche 2',
 'OpenFF Gen 2 Torsion Set 2 Coverage',
 'OpenFF Gen 2 Torsion Set 2 Coverage 2',
 'OpenFF Gen 2 Torsion Set 3 Pfizer Discrepancy',
 'OpenFF Gen 2 Torsion Set 3 Pfizer Discrepancy 2',
 'OpenFF Gen 2 Torsion Set 4 eMolecules Discrepancy',
 'OpenFF Gen 2 Torsion Set 4 eMolecules Discrepancy 2',
 'OpenFF Gen 2 Torsion Set 5 Bayer',
 'OpenFF Gen 2 Torsion Set 5 Bayer 2',
 'OpenFF Gen 2 Torsion Set 6 Supplemental',
 'OpenFF Gen 2 Torsion Set 6 Supplemental 2',
 'OpenFF Group1 Torsions',
 'OpenFF Group1 Torsions 2',
 'OpenFF Group1 Torsions 3',
 'OpenFF Primary Benchmark 1 Torsion Set',
 'OpenFF Primary Benchmark 2 Torsion Set',
 'OpenFF Primary TorsionDrive Benchmark 1',
 'OpenFF Protein Fragments TorsionDrives v1.0',
 'OpenFF Rowley Biaryl v1.0',