In [None]:
# Suppress PySCF warning...
import pyscf
pyscf.__config__.B3LYP_WITH_VWN5 = False

from pathlib import Path

# The Mole class is used to define molecular information in PySCF.
from pyscf.gto import Mole
from pyscf import gto

# logger contains definitions of verbosity levels for PySCF.
from pyscf.lib import logger

# Functionality for (state-averaged) CASSCF.
from pyscf.mcscf import CASSCF, state_average_mix
from pyscf.fci.direct_spin1 import FCISolver
from pyscf.fci.addons import fix_spin
from pyscf import dmrgscf
# Wrapper functions to perform selection for variable and fixed active space sizes
from asf.wrapper import find_from_mol, find_from_scf, sized_space_from_mol, sized_space_from_scf

# Various utility functions...
from asf.utility import compare_active_spaces, show_mos_grid, pictures_Jmol

import sys
#######################################################
# !{sys.executable} -m pip install --upgrade AaronTools
#######################################################
#Import packages
import numpy as np
import os
import re
from math import sin, cos, pi
from glob import glob
import subprocess
import pickle
from subprocess import call, check_output
import pandas as pd
# import psi4
from joblib import Parallel,effective_n_jobs,delayed
from time import time
import matplotlib.pyplot as plt
from plumbum.cmd import grep, awk
import json
import shutil
import random
import sklearn
from shutil import copy
import csv
import h5py as h5
import seaborn as sns; sns.set(style="ticks", color_codes=True)

from sklearn.model_selection import train_test_split

# Geom manipulate
from AaronTools.geometry import Geometry
from tqdm import tqdm

In [None]:
topdir=os.getcwd()

In [None]:
structure_key = pd.read_csv('key.csv').set_index('Numerical').to_dict()['Alphabetical']

In [None]:
hexacoord = {v:k for k,v in structure_key.items() if int(k)<16}
pentacoord = {v:k for k,v in structure_key.items() if int(k)>15}

In [None]:
def clean():
    for i in glob('structures/*xyz'):
        geom = Geometry(i)
        
        rm = [geom.atoms[i] for i in range(geom.num_atoms) if i not in [0,1,3,4,5]]
        print(os.path.basename(i).replace('.xyz',''))
        Geometry(rm).write(os.path.join('cleaned',os.path.basename(i).replace('.xyz','')))
        

In [None]:
hexs = []
pents = []
for i in glob('cleaned/*xyz'):
    try:
        base = os.path.basename(i).replace('.xyz','').split('-')[1]
    except:
        base = os.path.basename(i).replace('.xyz','')
        
    if base in pentacoord.keys():
        pents.append(i)
    elif base in hexacoord.keys():
        hexs.append(i)



In [None]:
def find_orbitals(xyzname,coord,basis_file='ano-rcc-vdzp.1.nw'):
    shortname = os.path.basename(xyzname).replace('.xyz','')
    mol = Mole()
    mol.atom = os.path.join(topdir,xyzname)
    basis_set={'H':gto.basis.load(basis_file,'H'),
'C':gto.basis.load(basis_file,'C'),
'N':gto.basis.load(basis_file,'N'),
'O':gto.basis.load(basis_file,'O'),
'F':gto.basis.load(basis_file,'F'),
'P':gto.basis.load(basis_file,'P'),
'Cl':gto.basis.load(basis_file,'Cl'),
'Fe':gto.basis.load(basis_file,'Fe'),
'Br':gto.basis.load(basis_file,'Br')}
    mol.basis = basis_set
    mol.charge = 2
    mol.spin = 4
    # Set mol.verbose = logger.INFO to enable printing of SCF iterations and further output.
    mol.verbose = logger.NOTE
    mol.build()
    
    # Get number of alpha and beta electrons
    nelec_alpha, nelec_beta = mol.nelec
    # print(f"Number of alpha electrons: {nelec_alpha}")
    # print(f"Number of beta electrons: {nelec_beta}")   

    # Find active space
    
    if coord=='penta':
        NACTEL = 20
        act_orbs = 13
    else:
        NACTEL = 18
        act_orbs = 12
        
    two_S = nelec_alpha-nelec_beta
    total_elec = nelec_alpha+nelec_beta
    inactive = ((total_elec - two_S) - (NACTEL - two_S)) // 2
    return (shortname,{"NACTEL":NACTEL,"act_orbs":act_orbs,"inactive":inactive,"2S":two_S})

In [None]:
with open('penta_dict.json') as f:
    penta_dict = json.load(f)

with open('hexa_dict.json') as f:
    hexa_dict = json.load(f)
# pents_pulled = random.sample(pents, 10)
# hexs_pulled = random.sample(hexs, 10)
# penta_dict = dict([find_orbitals(i,'penta') for i in pents_pulled])
# hexa_dict = dict([find_orbitals(i,'hexa') for i in hexs_pulled])
# with open('hexa_dict.json','w') as f:
#     json.dump(hexa_dict,f)

# with open('penta_dict.json','w') as f:
#     json.dump(penta_dict,f)

merged_dict = {**penta_dict, **hexa_dict}

In [None]:
# os.mkdir('cluster_data')

In [None]:
with open('active_spaces.txt','r') as f:
    print(f.readlines())

In [None]:
radius_range=np.linspace(1,3,100)
basis_set = 'ANO-RCC-VDZP'
# topdir=os.path.join(os.getcwd(),'cluster_data')

In [None]:
def gen_gateway(name,basis_set):
    string=f'''&GATEWAY 
coord={f'{name}.xyz'}
Basis = {basis_set}
Group = nosymm
Expert
End of Input

'''
    return string

def gen_seward():
    string=f'''&SEWARD
End of Input

'''
    return string

def gen_motra(name):
    string=f'''&MOTRA
Frozen=0
>>> COPY $WorkDir/GMJ_one_int_indx.csv $CurrDir/{name}.GMJ_one_int_indx.csv
>>> COPY $WorkDir/GMJ_one_int.csv $CurrDir/{name}.GMJ_one_int.csv
>>> COPY $WorkDir/GMJ_two_int_indx.csv $CurrDir/{name}.GMJ_two_int_indx.csv
>>> COPY $WorkDir/GMJ_two_int.csv $CurrDir/{name}.GMJ_two_int.csv

'''
    return string



def gen_scf(name,spin):
    string=f"""&SCF &END
UHF
charge
2
spin
{spin + 1}
>>> COPY $WorkDir/{name}.scf.h5 $CurrDir/

"""
    return string    


def gen_rasscf(name,e,o,i,spin,previous=None):
    start_string="""&RASSCF &END
Title= RASSCF
"""
    if previous!=None:
        fileorb=f"""FileOrb
{previous}
"""
    else:
        fileorb=''

    end_string=f"""
NACTEL
{e} 0 0
Inactive
{i}
RAS2
{o}
Symmetry
1
charge
2
spin
{spin + 1}
orblisting
all
ITERation
200 100
CIMX
200
SDAV
500

>>> COPY $WorkDir/{name}.rasscf.h5 $CurrDir/
>>> COPY $WorkDir/GMJ_Fock_MO.csv $CurrDir/{name}.GMJ_Fock_MO.csv

"""
    return start_string+fileorb+end_string 

def gen_caspt2():
    string="""&CASPT2 &END
Frozen 
0
Imaginary Shift
0.0

>>foreach i in (B,E,F,G,H)
>>foreach j in (P,M)
>>if ( -FILE GMJ_e2_${i}_${j}.csv )
>>> COPY $WorkDir/GMJ_RHS_${i}_${j}.csv $CurrDir/GMJ_RHS_${i}_${j}.csv
>>> COPY $WorkDir/GMJ_IVECW_${i}_${j}.csv $CurrDir/GMJ_IVECW_${i}_${j}.csv
>>> COPY $WorkDir/GMJ_e2_${i}_${j}.csv $CurrDir/GMJ_e2_${i}_${j}.csv
>>endif
>>enddo
>>enddo

>>foreach i in (A,C,D)
>>if ( -FILE GMJ_e2_$i.csv )
>>> COPY $WorkDir/GMJ_RHS_$i.csv $CurrDir/GMJ_RHS_$i.csv
>>> COPY $WorkDir/GMJ_IVECW_$i.csv $CurrDir/GMJ_IVECW_$i.csv
>>> COPY $WorkDir/GMJ_e2_$i.csv $CurrDir/GMJ_e2_$i.csv
>>endif
>>enddo

"""
    return string    






In [None]:
os.chdir(topdir)

In [None]:
def gen_run(name,r,path):
    with open(os.path.join(path,f'run.sh'),'w') as g:
        g.write(f"""#!/bin/bash
#This file is a submission script to request the ISAAC resources from Slurm
#SBATCH --account=ACF-UTK0022             # The project account to be charged
#SBATCH --job-name={name}_{r}		       #The name of the job
#SBATCH --nodes=1                     # Number of nodes
#SBATCH --ntasks-per-node=16          # cpus per node
#SBATCH --partition=condo-kvogiatz            # If not specified then default is "campus"
#SBATCH --qos=condo-kvogiatz
#SBATCH --time=1-00:00:00             # Wall time (days-hh:mm:ss)
#SBATCH --error=job.e%J	       # The file where run time errors will be dumped
#SBATCH --output=job.o%J	       # The file where the output of the terminal will be dumped



# PATCH!
# MOVE TO WORKING DIR
echo $SLURM_SUBMIT_DIR
cd $SLURM_SUBMIT_DIR
pwd
# DEFINE WorkDir
mkdir $SLURM_SUBMIT_DIR/$SLURM_JOBID
export WorkDir=$SLURM_SUBMIT_DIR/$SLURM_JOBID
export MOLCAS_WORKDIR="/lustre/isaac/scratch/gjones39"
export MOLCAS="/lustre/isaac/proj/UTK0022/GMJ/OpenMolcas/build"
echo $WorkDir



module purge
module load intel-compilers/2021.2.0
module load hdf5/1.10.8-intel
module load cmake/3.23.2-intel
# DEFINE WorkDir
echo "Starting at $(date)"
echo "Running on hosts: $SLURM_NODELIST"
echo "Running on $SLURM_NNODES nodes."
echo "Running $SLURM_NTASKS tasks."
echo "Current working directory is $(pwd)"

# THE COMMAND
#python3 Data_Script.py
conda activate /lustre/isaac/proj/UTK0022/GMJ/modules/anaconda3/envs/ddcaspt2
pymolcas {name}.input -oe {name}.output
# CLEAN-UP AND EXIT
rm -r $SLURM_SUBMIT_DIR/$SLURM_JOBID
echo "Program finished with exit code $? at: $(date)"
""")    

In [None]:
topdir

In [None]:
def gen_input(k,v):    
    # Create structure subdirectory
    struct_dir = os.path.join(topdir,'cluster_data',k)
    if os.path.exists(struct_dir)==False:
        os.mkdir(struct_dir)

    # Get original structure
    xyzname = f'cleaned/{k}.xyz'
    geom = Geometry(xyzname)
    iron = geom.find('Fe')
    oxo = geom.get_near(iron,1.8)
    
    for idxr, r in tqdm(enumerate(radius_range)):
        # Create radius subdirectory
        rad_dir = os.path.join(struct_dir,f"{r:.2f}")
        if os.path.exists(rad_dir)==False:
            os.mkdir(rad_dir)
        # Write xyz
        geom.change_distance(iron,oxo,r)
        geom.write(os.path.join(rad_dir,f'{k}'))

        with open(os.path.join(rad_dir,f'{k}.input'),'wb') as g:
            g.write(gen_gateway(k,basis_set).encode())
            g.write(gen_seward().encode())
            g.write(gen_motra(k).encode())
            g.write(gen_scf(k,v['2S']).encode())
            if idxr==0:
                g.write(gen_rasscf(k,v['NACTEL'],v['act_orbs'],v['inactive'],v['2S'],previous=None).encode())
            else:
                previous = os.path.join(f"../{float(radius_range[idxr-1]):.2f}",f"{k}.RasOrb")
                g.write(gen_rasscf(k,v['NACTEL'],v['act_orbs'],v['inactive'],v['2S'],previous=previous).encode())
            g.write(gen_caspt2().encode())
        gen_run(k,r,rad_dir)
        os.chdir(rad_dir)
        # print(f"Moving to {rad_dir}")
        # call(['pymolcas','-new','-clean',f'{k}.input', '-oe', f'{k}.output'])
        # os.chdir(struct_dir)
    os.chdir(topdir)

In [None]:
for k,v in merged_dict.items():
    gen_input(k,v) 