# Architector initial examples

This should run on any type of machine, but should be scalable to larger clusters for testing MPI threading.

This notebook will setup and execute generation of the default monodentate-only ligands in Architector (52) 

with the first-row transition metals (or any subset of them) in CN=6 aqueous environments.

This is already up to 520 architector runs to do in parallel.

In [66]:
from architector import (build_complex,convert_io_molecule,view_structures) # Secondary_Solv_Shell branch
from architector import io_ptable as io_ptable
from tqdm import tqdm # conda install -c conda-forge tqdm
from pympipool.mpi import PyMPIExecutor # conda install -c conda-forge pympipool
import os
import concurrent.futures 
import mendeleev
import multiprocessing
import copy
import pandas as pd
import time

In [61]:
# Get the CPU count (if running under slurm, just set to total cores available to the mpi you instantiated)
cpu_count = multiprocessing.cpu_count() 

In [62]:
def calc_architector(input):
    start = time.time()
    os.environ["MKL_NUM_THREADS"]="1" # Set these to single thread.
    os.environ["NUMEXPR_NUM_THREADS"]="1"
    os.environ["OMP_NUM_THREADS"]="1"
    # Helps A LOT with xtb efficiency.
    try:
        complex_mol = build_complex(input)
        end = time.time()
    except: 
        end = time.time()
    else:
        ttime = end-start
        dfrows = []
        for key,val in complex_mol.items():
            val['total_walltime'] = ttime
            val['gen_unique_name'] = key
            val['total_end_to_end_time'] = end-start
            if ("sampled_solvation_shells" in val): # Save indices
                sampled_solvation_shells_row = [x.write_mol2('Conformer={},Energy={}'.format(i,
                                                    x.param_dict['energy'])) for i,x in enumerate(val['sampled_solvation_shells'])]
                val['sampled_solvation_shells'] = sampled_solvation_shells_row
            else:
                val['sampled_solvation_shells'] = None
            dfrows.append(val)
        resultsdf = pd.DataFrame(dfrows)
        if "ase_atoms" in resultsdf.columns:
            resultsdf.drop("ase_atoms", inplace=True, axis=1)
        results_dataframe = resultsdf
        return results_dataframe

In [63]:
# Get the first-row transition metals
metals = io_ptable.transition_metals[0:10]
metals

['Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn']

In [64]:
ligands_df = pd.DataFrame(io_ptable.ligands_dict).T
ligands_df

Unnamed: 0,smiles,coordList,ligType,ligCharge
water,O,[0],mono,0
hydroxyl,[OH-],[0],mono,
methanide,[CH3-],[0],mono,
ethanide,[CH2-]C,[0],mono,
benzyl,[c-]1ccccc1,[0],mono,
...,...,...,...,...
bisterbutylphosphinoeoxidemethane,CC(C)(C)P(=O)(CP(=O)(C(C)(C)C)C(C)(C)C)C(C)(C)C,"[5, 8]",bi_cis,
bisadamphosphinoeoxidemethane,O=P(CP(=O)(C23CC1CC(CC(C1)C2)C3)C56CC4CC(CC(C4...,"[0, 4]",bi_cis,
bisdimethylphosphinesulfidemethane,CP(C)(=S)CP(C)(C)=S,"[3, 8]",bi_cis,
bisdimethylphosphineselenidemethane,CP(C)(=Se)CP(C)(C)=Se,"[3, 8]",bi_cis,


In [40]:
# Pull out only the default monodentate ligands
monodf = ligands_df[ligands_df.ligType == 'mono'] 
monodf

Unnamed: 0,smiles,coordList,ligType,ligCharge
water,O,[0],mono,0.0
hydroxyl,[OH-],[0],mono,
methanide,[CH3-],[0],mono,
ethanide,[CH2-]C,[0],mono,
benzyl,[c-]1ccccc1,[0],mono,
oxo,[O-2],[0],mono,
hydrogen_peroxide,OO,[0],mono,
oxygen_top,O=O,[0],mono,
hydride,[H-],[0],mono,
sulfide,[S-2],[0],mono,


In [23]:
monodf.shape

(52, 3)

In [25]:
monodf.iloc[0].name

'water'

In [41]:
# Setup the input dictionary
input_dict_example = {'core':{'metal':'Fe','coreCN':6},
                      'ligands':[monodf.iloc[0].name],
                      'parameters':{}}
# Runs in just a couple seconds.
output_example = calc_architector(input_dict_example)

In [42]:
output_example

Unnamed: 0,total_charge,xtb_n_unpaired_electrons,calc_n_unpaired_electrons,xtb_total_charge,metal_ox,init_energy,energy,mol2string,init_mol2string,energy_sorted_index,...,core_preprocess_time,symmetry_preprocess_time,total_liggen_time,total_complex_assembly_time,final_relaxation_time,sum_total_conformer_time_spent,total_walltime,gen_unique_name,total_end_to_end_time,sampled_solvation_shells
0,2,4,4,2,2,-892.682242,-892.682242,@<TRIPOS>MOLECULE\noctahedral_0_nunpairedes_4_...,,0,...,0.111207,0.006917,0.059149,0.395733,0.031184,0.769353,5.666265,octahedral_0_nunpairedes_4_charge_2,5.666265,
1,2,4,4,2,2,-892.565967,-892.565967,@<TRIPOS>MOLECULE\ntrigonal_prismatic_0_nunpai...,,0,...,0.111207,0.006917,0.059149,0.380541,0.027977,1.367125,5.666265,trigonal_prismatic_0_nunpairedes_4_charge_2,5.666265,
2,2,4,4,2,2,-892.415213,-892.415213,@<TRIPOS>MOLECULE\npentagonal_pyramidal_0_nunp...,,0,...,0.111207,0.006917,0.059149,0.389528,0.028971,2.163487,5.666265,pentagonal_pyramidal_0_nunpairedes_4_charge_2,5.666265,


In [43]:
view_structures(output_example.mol2string)

In [67]:
# Now, let's do this in embarassingly parallel with a chunk of ligands with Iron
with PyMPIExecutor(max_workers=cpu_count, cores_per_worker=1) as exe:
    fs_list = []
    for lig_name in monodf.index.values[0:20]:
        tmp_inp = copy.deepcopy(input_dict_example)
        tmp_inp['ligands'] = [lig_name]
        fs = exe.submit(calc_architector,tmp_inp)
        fs_list.append(fs)
    out_dfs = []
    for future in tqdm(concurrent.futures.as_completed(fs_list),total=len(fs_list)):
        out_dfs.append(future.result())
# Note that this block took ~50 seconds on my laptop (Macbook Pro/M2Max)

100%|███████████████████████████████████████████| 20/20 [00:53<00:00,  2.69s/it]


[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m[0m

In [68]:
# Combine outputs
out_df = pd.concat(out_dfs)
out_df.reset_index(inplace=True,drop=True)

In [69]:
out_df

Unnamed: 0,total_charge,xtb_n_unpaired_electrons,calc_n_unpaired_electrons,xtb_total_charge,metal_ox,init_energy,energy,mol2string,init_mol2string,energy_sorted_index,...,core_preprocess_time,symmetry_preprocess_time,total_liggen_time,total_complex_assembly_time,final_relaxation_time,sum_total_conformer_time_spent,total_walltime,gen_unique_name,total_end_to_end_time,sampled_solvation_shells
0,2,4,4,2,2,-892.669267,-892.669267,@<TRIPOS>MOLECULE\noctahedral_0_nunpairedes_4_...,,0,...,0.116500,0.007407,11.143919,0.138660,0.013355,0.353667,5.442197,octahedral_0_nunpairedes_4_charge_2,5.442197,
1,2,4,4,2,2,-892.560732,-892.560732,@<TRIPOS>MOLECULE\ntrigonal_prismatic_0_nunpai...,,0,...,0.116500,0.007407,11.143919,0.193277,0.011160,1.107874,5.442197,trigonal_prismatic_0_nunpairedes_4_charge_2,5.442197,
2,2,4,4,2,2,-892.409045,-892.409045,@<TRIPOS>MOLECULE\npentagonal_pyramidal_0_nunp...,,0,...,0.116500,0.007407,11.143919,0.145919,0.009672,2.114506,5.442197,pentagonal_pyramidal_0_nunpairedes_4_charge_2,5.442197,
3,0,4,4,0,2,-877.757967,-877.757967,@<TRIPOS>MOLECULE\noctahedral_0_nunpairedes_4_...,,0,...,0.108345,0.003912,9.139184,0.143223,0.007961,0.318303,5.793172,octahedral_0_nunpairedes_4_charge_0,5.793172,
4,0,4,4,0,2,-877.280662,-877.280662,@<TRIPOS>MOLECULE\ntrigonal_prismatic_0_nunpai...,,1,...,0.108345,0.003912,9.139184,0.134305,0.007199,0.766230,5.793172,trigonal_prismatic_0_nunpairedes_4_charge_0,5.793172,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,2,4,4,2,2,-1064.865300,-1064.865300,@<TRIPOS>MOLECULE\npentagonal_pyramidal_0_nunp...,,2,...,0.104814,0.003572,0.878757,0.149838,0.011680,3.368164,12.907034,pentagonal_pyramidal_0_nunpairedes_4_charge_2,12.907034,
66,2,4,4,2,2,-1582.159430,-1582.159430,@<TRIPOS>MOLECULE\nhexagonal_planar_0_nunpaire...,,0,...,0.079605,0.025068,4.208129,0.360765,0.037933,4.256979,37.245130,hexagonal_planar_0_nunpairedes_4_charge_2,37.245130,
67,2,4,4,2,2,-1582.052222,-1582.052222,@<TRIPOS>MOLECULE\noctahedral_0_nunpairedes_4_...,,0,...,0.079605,0.025068,4.208129,0.325062,0.039498,5.155580,37.245130,octahedral_0_nunpairedes_4_charge_2,37.245130,
68,2,4,4,2,2,-1582.010157,-1582.010157,@<TRIPOS>MOLECULE\ntrigonal_prismatic_0_nunpai...,,1,...,0.079605,0.025068,4.208129,0.330265,0.044216,6.984808,37.245130,trigonal_prismatic_0_nunpairedes_4_charge_2,37.245130,


In [70]:
# This produced 70 structures (you may get more/less depending on how architector sampled)
view_structures(out_df.sample(10,random_state=42).mol2string)
# All of these (except the OXO-structures) are relaxed with GFN2-xTB already.

In [71]:
out_df.iloc[0]

total_charge                                                                      2
xtb_n_unpaired_electrons                                                          4
calc_n_unpaired_electrons                                                         4
xtb_total_charge                                                                  2
metal_ox                                                                          2
init_energy                                                             -892.669267
energy                                                                  -892.669267
mol2string                        @<TRIPOS>MOLECULE\noctahedral_0_nunpairedes_4_...
init_mol2string                                                                None
energy_sorted_index                                                               0
total_possible_n_symmetries                                                       1
inputDict                         {'core': {'metal': 'Fe', 'coreCN': 6, 'smi

In [59]:
# # To test on larger systems, just up the number of things requested.
# with PyMPIExecutor(max_workers=cpu_count, cores_per_worker=1) as exe:
#     fs_list = []
#     for lig_name in monodf.index.values: # Iterate over all 52 monodentate ligands
#         for metal in io_ptable.transition_metals[0:10]: # Iterate over first 10 transition metals
#             tmp_inp = copy.deepcopy(input_dict_example)
#             tmp_inp['core']['metal'] = metal
#             tmp_inp['ligands'] = [lig_name]
#             fs = exe.submit(calc_architector,tmp_inp)
#             fs_list.append(fs)
#     out_dfs = []
#     for future in tqdm(concurrent.futures.as_completed(fs_list),total=len(fs_list)): # 520 calculations for testing scaling.
#         out_dfs.append(future.result())
# # Note that this block would take a while on just a laptop (probably ~1 hour).