In [1]:
import qcportal as ptl
import json
from collections import defaultdict

client = ptl.PortalClient("https://api.qcarchive.molssi.org/")
print(client)

PortalClient(server_name='The MolSSI OpenFF QCFractal Server', address='https://api.qcarchive.molssi.org/', username='None')


In [None]:
ds = client.get_dataset("TorsionDrive", "OpenFF Theory Benchmarking Set v1.0")
specifications = ["MP2/heavy-aug-cc-pVTZ"]    

In [None]:
with open('./data/dataset_indices.json', 'r') as file:
    dataset_indices = json.load(file)  

In [None]:
mp2_data = defaultdict(dict)
for i, index in enumerate(dataset_indices['dataset_indices']):
    entry = ds.get_entry(index)
    rec = ds.get_record(index, specification_name=specifications[0])
    mapped_smiles = entry.attributes['canonical_isomeric_explicit_hydrogen_mapped_smiles']
    dihedral_scanned = rec.dict()['specification']['keywords']['dihedrals']
    mol_charge = entry.initial_molecules[0].molecular_charge
    mol_multiplicity = entry.initial_molecules[0].molecular_multiplicity
    final_energies = rec.final_energies
    angles = [item[0] for item in list(rec.final_energies.keys())]
    energies = [float(item) for item in list(rec.final_energies.values())]
    final_geometries = []
    dipoles = []
    for key in angles:
        final_geometries.append(rec.minimum_optimizations[(key,)].final_molecule.geometry.tolist())
        dipoles.append(rec.minimum_optimizations[(key,)].trajectory[-1].properties['scf dipole'])
    angles, energies, final_geometries, dipoles = zip(*sorted(zip(angles, energies, final_geometries, dipoles)))
    mp2_data[str(i)] = {'metadata':{'mapped_smiles':mapped_smiles,
               'mol_charge':mol_charge,
               'mol_multiplicity':mol_multiplicity, 'dihedral scanned':dihedral_scanned}, 'angles':angles, 'final_energies':energies, 'final_geometries':final_geometries, 'dipoles':dipoles}

In [None]:
with open('./data/MP2_heavy-aug-cc-pVTZ_torsiondrive_data.json', 'w') as outfile:
    json.dump(mp2_data, outfile)

In [2]:
sd = client.get_dataset("SinglePoint", "OpenFF Theory Benchmarking Single Point Energies v1.0")

In [4]:
nbasis = []
for x in sd.iterate_records(specification_names=['spec_6']):
    nbasis.append(x[2].properties['calcinfo_nbasis'])
    break
print(min(nbasis), max(nbasis))    

566 566


In [6]:
print(x[2].stdout)


    -----------------------------------------------------------------------
          Psi4: An Open-Source Ab Initio Electronic Structure Package
                               Psi4 1.4 release

                         Git: Rev {HEAD} 9485035 


    D. G. A. Smith, L. A. Burns, A. C. Simmonett, R. M. Parrish,
    M. C. Schieber, R. Galvelis, P. Kraus, H. Kruse, R. Di Remigio,
    A. Alenaizan, A. M. James, S. Lehtola, J. P. Misiewicz, M. Scheurer,
    R. A. Shaw, J. B. Schriber, Y. Xie, Z. L. Glick, D. A. Sirianni,
    J. S. O'Brien, J. M. Waldrop, A. Kumar, E. G. Hohenstein,
    B. P. Pritchard, B. R. Brooks, H. F. Schaefer III, A. Yu. Sokolov,
    K. Patkowski, A. E. DePrince III, U. Bozkaya, R. A. King,
    F. A. Evangelista, J. M. Turney, T. D. Crawford, C. D. Sherrill,
    J. Chem. Phys. 152(18) 184108 (2020). https://doi.org/10.1063/5.0006002

                            Additional Code Authors
    E. T. Seidl, C. L. Janssen, E. F. Valeev, M. L. Leininger,
    J. F. Gonthier, R

In [None]:
# The new update to qcfractal re-arranged lot of single points datasets and old way of accessing is not available, 
#   so here is a specification map created manually to navigate through the dataset

# QCF used to divide the dispersion correction calculation for most of the DFT-D methods and 
#   hence require adding the energies in case of older single points datasets
spec_map = defaultdict(dict)
specs = sd.specifications
# all b3lyp methods with d3bj correction
for method in ['b3lyp']:
    for basis in ["dzvp", "def2-tzvp", "def2-tzvpd", "def2-tzvpp", "def2-tzvppd", "def2-qzvp", "6-31+g**", "6-311+g**"]:
        for key, spec in specs.items():
            if spec.specification.program == 'psi4':
                if spec.specification.method == method:
                    if spec.specification.basis == basis:
                        spec_map[method+'-d3bj/'+basis] = {'dft_energy_spec':spec.name, 'functional': 'b3lyp', 'dispersion_correction_type': 'd3bj', 'basis': basis, 'dispersion_energy_spec': 'spec_7'}

# other methods with d3bj or d3 or d3mbj correction
for method in ["m05-2x-d3", "m06-2x-d3", "m08-hx-d3", "pw6b95-d3bj", "pw6b95-d3", "b3lyp-d3mbj", "dsd-blyp-d3bj"]:
    for basis in ["dzvp"]:
        method_split = method.split('-')
        dispersion_correction_type = method_split[-1]
        method_name = '-'.join(method_split[:-1])
        for key, spec in specs.items():
            if spec.specification.program == 'psi4':
                if spec.specification.method == method_name:
                    if method_name == "dsd-blyp": basis = "heavy-aug-cc-pvtz"
                    if spec.specification.basis == basis:
                        spec_map[method+'/'+basis].update({'dft_energy_spec':spec.name, 'functional':method_name, 'dispersion_correction_type': dispersion_correction_type, 'basis':basis})
            if spec.specification.program == 'dftd3':
                if spec.specification.method == method:
                    if spec.specification.basis == None:
                        spec_map[method+'/'+basis].update({'dispersion_energy_spec':spec.name})
                        
# methods where the dispersion correction is not split into two different calculations
for method in ["wb97m-d3bj", "wb97m-v", "b3lyp-nl"]:
    for basis in ["dzvp"]:
        method_split = method.split('-')
        dispersion_correction_type = method_split[-1]
        method_name = '-'.join(method_split[:-1])
        for key, spec in specs.items():
            if spec.specification.program == 'psi4':
                if spec.specification.method == method:
                    if spec.specification.basis == basis:
                        spec_map[method+'/'+basis] = {'dft_energy_spec':spec.name, 'functional':method_name, 'dispersion_correction_type': dispersion_correction_type, 'basis':basis, 'dispersion_energy_spec':None}

# reference method with no dispersion correction
method = "mp2/heavy-aug-cc-pv[tq]z + d:ccsd(t)/heavy-aug-cc-pvdz"
for key, spec in specs.items():
    if spec.specification.program == 'psi4':
        if spec.specification.method == method:
            spec_map[method] = {'dft_energy_spec':spec.name, 'functional':method, 'dispersion_correction_type': None, 'dispersion_energy_spec':None, 'basis':'CBS'}
spec_map.update({'mp2/aug-cc-pvtz': {'dft_energy_spec':'spec_31', 'functional':'mp2', 'dispersion_correction_type': None, 'dispersion_energy_spec':None, 'basis':'aug-cc-pvtz'}})            
# spec_map.update({'b97-d3bj/def2-tzvp': {'dft_energy_spec':'spec_2', 'functional':'b97', 'dispersion_correction_type': 'd3bj', 'dispersion_energy_spec':'spec_25', 'basis':'def2-tzvp'}})            

In [None]:
spec_map

In [None]:
with open('./data/single-point-dataset-specification-map.json', 'w') as outfile:
    json.dump(spec_map, outfile)

In [None]:
angle_dict = json.load(
        open("./data/angle_indices_for_single_points.txt")
    )

In [None]:
for key, val in spec_map.items():
    print(key)
    
    dft_energy_spec = val['dft_energy_spec']
    dft_energies = defaultdict(float)    
    disp_energies = defaultdict(float)    
    dipoles = defaultdict(list)
    dipole_flag = False
    for x in sd.iterate_records(specification_names=[dft_energy_spec]):
        mol_index = x[0].split('-')
        dft_energies[(int(mol_index[0]), int(mol_index[1]))] = x[2].return_result
        if 'scf dipole' in list(x[2].properties.keys()):
            dipole_flag = True
            dipoles[(int(mol_index[0]), int(mol_index[1]))] = x[2].properties['scf dipole']

    if val['dispersion_energy_spec']:
        dispersion_energy_spec = val['dispersion_energy_spec']
        for x in sd.iterate_records(specification_names=[dispersion_energy_spec]):
            mol_index = x[0].split('-')
            disp_energies[(int(mol_index[0]), int(mol_index[1]))] = x[2].return_result
    
    energies_and_dipoles = defaultdict(dict)
    for i in range(59):
        dft_energy_list = []
        dipole_list = []
        disp_energy_list = []
        total_energy_list = []
        for j in range(24):
            dft_energy_list.append(dft_energies[(i,j)])
            if dipole_flag: 
                dipole_list.append(dipoles[(i,j)])
            else:
                dipole_list.append('N/A')
            if val['dispersion_energy_spec']:
                disp_energy_list.append(disp_energies[(i,j)])
                total_energy_list.append(dft_energies[(i,j)]+disp_energies[(i,j)])
            else:
                disp_energy_list.append('N/A')
                total_energy_list.append(dft_energies[(i,j)])
        
        angles = angle_dict[str(i)]
        angles, total_energy_list, dft_energy_list, disp_energy_list, dipole_list = zip(*sorted(zip(angles, total_energy_list, dft_energy_list, disp_energy_list, dipole_list)))
        energies_and_dipoles[i] = {'angles':angles, 'total energies':total_energy_list, 'dft energies':dft_energy_list, 'dispersion energies': disp_energy_list,  'dipoles': dipole_list}
        
    with open('./data/'+key.replace('/','_')+'_single_points_data.json', 'w') as outfile:
        json.dump(energies_and_dipoles, outfile)    