Import and Path

In [None]:
#import packages and set paths
import os, sys
import numpy as np 
from pathlib import Path 
import pandas as pd
import pickle
import dask
from dask import delayed
from dask.distributed import Client, progress, LocalCluster

path_to_mgr = Path('.../PoroNet')
sys.path.append(str(path_to_mgr))
import mofography as mgr

Load the MOF structures from the feather file

In [None]:
path_to_df = Path('Selected_MOFs_for_H2.feather')
df_Tobacco_training = pd.read_feather(path_to_df)
Tobacco_atoms = mgr.atoms_from_mofdb_df(df_Tobacco_training)

Create a workflow for pore graphs (using Dask to perform parallel calculation)

In [None]:
def regions_maxima_radii_from_dgrid(dgrid, mask_thickness,h, threshold_abs):
    regions, maxima = mgr.regions_from_dgrid_with_threshold_abs(dgrid, mask_thickness,h,threshold_abs)
    maxima_radii = dgrid[tuple(maxima.T )]
    print('made the regions and maxima')
    return regions, maxima, maxima_radii

def make_rag(regions, maxima, maxima_radii, dgrid, mat_atoms,spacing):
    connections = mgr.connections_from_regions_and_dgrid(regions, dgrid, maxima, mat_atoms)
    rag = mgr.rag_from_connections_pixel_multi(regions,connections, maxima, only_use_internal=False)
    rag  = mgr.add_pixel_ratio_to_rag(rag, mat_atoms, spacing) 
    shape = dgrid.shape
    rag = mgr.add_maxima_to_rag(rag, maxima, maxima_radii, shape, mat_atoms)
    rag = mgr.add_volume_to_rag(rag,mat_atoms)
    print('made the rag')
    return rag

def energy_calculation(mat_atoms, spacing, blocksize,forcefield_mixed,cutoff,probe_symbol):
    egrid = mgr.egrid_from_atoms2(mat_atoms, spacing, blocksize, forcefield_mixed, cutoff, probe_symbol, return_ncells=False, precompute_aabb=False)
    egrid= mgr.egrid_transfer(egrid)
    einterp=mgr.get_energy_interpolator(egrid)
    print('made the einterp')
    return einterp

def energy_histogram(rag,regions,einterp,pbc_groups):
    bins1=np.array([-np.inf,-10])
    bins2=np.linspace(-9,-1,9)
    bins3 = np.array([0, np.inf])
    bins=np.concatenate((bins1,bins2,bins3))
    rag=mgr.add_vdw_hist_to_rag_probability_right(rag, regions, einterp, use_pbc=True, pbc_groups=pbc_groups, bins_energy=bins)
    print('made the bins and added ehist')
    return rag

In [None]:
#delayed task
delayed_rag_futures = []
for mat_atoms in Tobacco_atoms:
    dgrid = delayed(mgr.dgrid_from_atoms_cpu_no_aabb)(mat_atoms, spacing=0.5)
    regions_maxima_radii = delayed(regions_maxima_radii_from_dgrid)(dgrid, mask_thickness=0, h=0.5, threshold_abs=1)
    rag = delayed(make_rag)(regions=regions_maxima_radii[0], maxima=regions_maxima_radii[1],maxima_radii=regions_maxima_radii[2], dgrid=dgrid, mat_atoms=mat_atoms, spacing=0.5)
    pbc_groups = delayed(mgr.apply_pbc)(regions_maxima_radii[0], regions_maxima_radii[1], regions_maxima_radii[2], mat_atoms, return_conn=True)
    rag = delayed(mgr.add_pbc_to_rag)(rag,pbc_groups)
    einterp=delayed(energy_calculation)(mat_atoms,spacing=0.5, blocksize=20000, forcefield_mixed=None,cutoff=12.8,probe_symbol='H_com')
    rag = delayed(energy_histogram)(rag,regions=regions_maxima_radii[0],einterp=einterp,pbc_groups=pbc_groups)
    delayed_rag_futures.append(rag)


In [None]:
# Create a Dask client with 64 CPUs for parallel computation
client = Client(n_workers=64)

In [None]:
client

In [None]:
#Start the delayed tasks 
futures = dask.persist(*delayed_rag_futures) 

In [None]:
#Check the progress
progress(futures)

In [None]:
#Stop the task
client.shutdown()

Etraction of results

In [None]:
#Collecting results from the completed delayed task
results = client.compute(futures)
rags = [r.result() if r.status == 'finished' else None for r in results]

In [None]:
# Check how many MOFs failed (i.e., no pore graph)
np.sum([a == None for a in rags])

In [None]:
#Extract the pore graphs and find the indice of none
rag_list = []
none_indices = []

for i, g in enumerate(rags):
    if g is None:
        none_indices.append(i) 
    else:
        rag_list.append(g) 

In [None]:
rag_list

In [None]:
#Store the results as a pkl file
pickle.dump(rag_list, open(Path('Pore_Graph_for_H2.pkl'), 'wb'))