### Path of database of different properties

In [1]:
job_dict = {
    'density': '../../../Database/density_database.csv',
    'viscosity': '../../../Database/viscosity_database.csv',
    'mp': '../../../Database/mp_database.csv',
    'tg': '../../../Database/tg_database.csv',
    'td': '../../../Database/td_database.csv',
    'ph': '../../../Database/pH_database.csv',
    'surface_tension': '../../../Database/surface_tension_database.csv',
    'xTB_DFT': '../../../Database_Structure/structure/xTB_DFT/data.csv'
}

### Determine the type of job

In [2]:
job_type = 'density'

### Match the entry with the index of corresponding DES

In [3]:
import pandas as pd

src_database = pd.read_csv('../../../Database_Structure/DES.csv')
DES_list = []
for i in range(len(src_database)):
    text = src_database['HBA_smiles'][i] + src_database['HBD_smiles'][i] + str(src_database['molar_ratio'][i])
    if text in DES_list:
        print(i)
    DES_list.append(text)

index_list = []
job_database = pd.read_csv(job_dict[job_type])
for i in range(len(job_database)):
    DES_type = job_database['HBA_smiles'][i] + job_database['HBD_smiles'][i] + str(job_database['molar_ratio (HBA:HBD)'][i])
    index_list.append(DES_list.index(DES_type))

### Save node attributes of DES to a `.json` file

In [13]:
import os
from utils.post_processing import get_disp, get_charge

database_dir = '../../database/'

chg_list = []
disp_list = []

node_attr_dict = {
    'charge': {
        'list': chg_list,
        'process_method': get_charge,
        'raw_data_path': '../../../Database_Structure/MD_simulation/xTB/xTB_MDS_002/multiwfn/charge/',
        'raw_data_type': 'chg',
        'output_path': os.path.join(database_dir, f'{job_type}/charge.json')
        },
    'dispersion': {
        'list': disp_list,
        'process_method': get_disp,
        'raw_data_path': '../../../Database_Structure/MD_simulation/xTB/xTB_MDS_002/multiwfn/dispersion/',
        'raw_data_type': 'pqr',
        'output_path': os.path.join(database_dir, f'{job_type}/dispersion.json')
        }
}

In [15]:
import json
import os

for key in node_attr_dict.keys():
    for index in index_list:
        file = os.path.join(
            node_attr_dict[key]['raw_data_path'],
            f"DES_{index + 1}.{node_attr_dict[key]['raw_data_type']}"
        )
        if os.path.exists(file):
            node_attr_dict[key]['list'].append(
                node_attr_dict[key]['process_method'](file)
            )
    with open(node_attr_dict[key]['output_path'], 'w') as op:
        json.dump(node_attr_dict[key]['list'], op)


In [17]:
NODE_ATTR_DICT = {}

for key in node_attr_dict.keys():
    with open(node_attr_dict[key]['output_path']) as op:
        attr_list = json.load(op)
    NODE_ATTR_DICT[key] = attr_list

length_list = [len(value) for value in NODE_ATTR_DICT.values()]
if min(length_list) != max(length_list):
    print(length_list)
    raise RuntimeError('Node attributes are not of the same length.')

output_path = os.path.join(database_dir, f'{job_type}/node_attr.json')
with open(output_path, 'w') as op:
    json.dump(NODE_ATTR_DICT, op)