In [1]:
import sys
# !{sys.executable} -m pip install plumbum h5py seaborn 
#######################################################
#Import packages
import numpy as np
import os
os.environ['MOLCAS']='/home/grierjones/Test/build'


import re
from math import sin, cos, pi
from glob import glob
import subprocess
import pickle
from subprocess import call, check_output
import pandas as pd
# import psi4
from joblib import Parallel,effective_n_jobs,delayed
from time import time
import matplotlib.pyplot as plt
from plumbum.cmd import grep, awk

import shutil
import random
import sklearn
from shutil import copy
import csv
import h5py as h5
import seaborn as sns; sns.set(style="ticks", color_codes=True)

from sklearn.model_selection import train_test_split
from time import perf_counter


# Geom manipulate
from AaronTools.geometry import Geometry

from mendeleev import element

# Suppress PySCF warning...
import pyscf
pyscf.__config__.B3LYP_WITH_VWN5 = False

from pathlib import Path

# The Mole class is used to define molecular information in PySCF.
from pyscf.gto import Mole

# logger contains definitions of verbosity levels for PySCF.
from pyscf.lib import logger

# Functionality for (state-averaged) CASSCF.
from pyscf.mcscf import CASSCF, state_average_mix
from pyscf.fci.direct_spin1 import FCISolver
from pyscf.fci.addons import fix_spin

# Wrapper functions to perform selection for variable and fixed active space sizes
from asf.wrapper import find_from_mol, find_from_scf, sized_space_from_mol, sized_space_from_scf

# Various utility functions...
from asf.utility import compare_active_spaces, show_mos_grid, pictures_Jmol

import sys



In [2]:
#######################################################
# Obital labels
## Inactive i,j
## Active t,u,v
## Virtual a,b

## Type 1: IA->AA
## Type 2: II->AA (P)
## Type 3: II->AA (M)
## Type 4: AA->VA
## Type 5: IA->VA/AV
## Type 6: II->AV (P)
## Type 7: II->AV (M)
## Type 8: AA->VV (P)
## Type 9: AA->VV (M)
## Type 10: IA->VV (P)
## Type 11: IA->VV (M)
## Type 12: II->VV (P)
## Type 13: II->VV (M)

## A: IA->AA
## B: II->AA
## C: AA->VA
## D: IA->VA/AV
## E: II->AV
## F: AA->VV
## G: IA->VV 
## H: II->VV
#######################################################


In [3]:
# Delete excessive extra files
def del_useless():
    '''
    Delete the extra files
    '''
    for root, dirs, files in os.walk(os.getcwd()):
        for file in files:
            for j in ['status','GssOrb','LprOrb','LoProp','guessorb','xmldump','RasOrb','SpdOrb']:
                if j in file:
    #                 print(root,dirs,file)
                    os.remove(os.path.join(root,file))

In [4]:
# When restarting a setr of calculations just clear everyting out
def clean_dir():
    for entry in os.scandir(path=os.getcwd()):
        if entry.is_dir():
            if entry.name=='Fock':
                shutil.rmtree(entry.name)
            if entry.name=='hdf5':
                shutil.rmtree(entry.name)
            if entry.name=='e2':
                shutil.rmtree(entry.name)                
            if entry.name=='Labels':
                shutil.rmtree(entry.name)
            if entry.name=='Coords':
                shutil.rmtree(entry.name)
            if 'dir' in entry.name:
                shutil.rmtree(entry.name)
                


In [5]:
# Run this before clean_dir, this pulls the xyz files out just to 
def pull_xyz():
    import re
    for i in struct_name:
        if os.path.exists(os.path.join(os.getcwd(),i))==False and os.path.exists(os.path.join(os.getcwd(),'Coords')):
            shutil.copy(os.path.join(os.getcwd(),'/'.join(('Coords',i))),os.path.join(os.getcwd(),i))


In [6]:
def gen_gateway(name,basis_set):
    string=f'''&GATEWAY 
coord={f'{name}.xyz'}
Basis = {basis_set}
Group = nosymm
Expert
End of Input

'''
    return string

def gen_seward():
    string=f'''&SEWARD
End of Input

'''
    return string

def gen_motra(name):
    string=f'''&MOTRA
Frozen=0
>>> COPY $WorkDir/GMJ_one_int_indx.csv $CurrDir/{name}.GMJ_one_int_indx.csv
>>> COPY $WorkDir/GMJ_one_int.csv $CurrDir/{name}.GMJ_one_int.csv
>>> COPY $WorkDir/GMJ_two_int_indx.csv $CurrDir/{name}.GMJ_two_int_indx.csv
>>> COPY $WorkDir/GMJ_two_int.csv $CurrDir/{name}.GMJ_two_int.csv

'''
    return string



def gen_scf(name,spin):
    string=f"""&SCF &END
UHF
charge
2
spin
{spin + 1}
>>> COPY $WorkDir/{name}.scf.h5 $CurrDir/

"""
    return string    


def gen_rasscf(name,e,o,i,spin,previous=None):
    start_string="""&RASSCF &END
Title= RASSCF
"""
    if previous!=None:
        fileorb=f"""FileOrb
{previous}
"""
    else:
        fileorb=''

    end_string=f"""
NACTEL
{e} 0 0
Inactive
{i}
RAS2
{o}
Symmetry
1
charge
2
spin
{spin + 1}
orblisting
all
ITERation
200 100
CIMX
200
SDAV
500

>>> COPY $WorkDir/{name}.rasscf.h5 $CurrDir/
>>> COPY $WorkDir/GMJ_Fock_MO.csv $CurrDir/{name}.GMJ_Fock_MO.csv

"""
    return start_string+fileorb+end_string 

def gen_caspt2():
    string="""&CASPT2 &END
Frozen 
0
Imaginary Shift
0.0

>>foreach i in (B,E,F,G,H)
>>foreach j in (P,M)
>>if ( -FILE GMJ_e2_${i}_${j}.csv )
>>> COPY $WorkDir/GMJ_RHS_${i}_${j}.csv $CurrDir/GMJ_RHS_${i}_${j}.csv
>>> COPY $WorkDir/GMJ_IVECW_${i}_${j}.csv $CurrDir/GMJ_IVECW_${i}_${j}.csv
>>> COPY $WorkDir/GMJ_e2_${i}_${j}.csv $CurrDir/GMJ_e2_${i}_${j}.csv
>>endif
>>enddo
>>enddo

>>foreach i in (A,C,D)
>>if ( -FILE GMJ_e2_$i.csv )
>>> COPY $WorkDir/GMJ_RHS_$i.csv $CurrDir/GMJ_RHS_$i.csv
>>> COPY $WorkDir/GMJ_IVECW_$i.csv $CurrDir/GMJ_IVECW_$i.csv
>>> COPY $WorkDir/GMJ_e2_$i.csv $CurrDir/GMJ_e2_$i.csv
>>endif
>>enddo

"""
    return string    






In [7]:
os.getcwd()

'/home/grierjones/DDCASPT2/iron_oxo_DDCASPT2'

In [8]:
basis_set='ANO-RCC-MB'

In [9]:
top=os.getcwd()
# top="/lustre/isaac/proj/UTK0022/GMJ/DDCASPT2/ozone"

In [48]:
xyzname = 'opt_OPBE_react/gmj_1.xyz'
geom = Geometry(xyzname)

iron = geom.find('Fe')
oxo = geom.get_near(iron,1.8)

min_radii = ((element('Fe').covalent_radius_cordero+element('O').covalent_radius_cordero)/2)/100


In [47]:
radius_range=np.linspace(min_radii,3,100)



train_ind,test_ind=radius_range[0::2],radius_range[1::2]
# train_test_split(radius_range, test_size=0.3, random_state=0)
print(len(train_ind),len(test_ind))
with open('train_ind.pickle', 'wb') as handle:
    pickle.dump(train_ind, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('test_ind.pickle', 'wb') as handle:
    pickle.dump(test_ind, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('test_ind.pickle', 'rb') as handle:
    test_ind = pickle.load(handle)

with open('train_ind.pickle', 'rb') as handle:
    train_ind = pickle.load(handle)
    
print(len(train_ind),len(test_ind))    


50 50
50 50


In [None]:

def gen_data(verbose=True):
    
    dirname=os.path.basename(xyzname).replace(".xyz","")
    if os.path.exists(dirname)==False:
        if verbose:
            print(f'Making {dirname}')
        os.mkdir(dirname)
        
        
    for idxr, r in enumerate(radius_range):
        t0=perf_counter()
        # Loop radius
        name=f"{dirname}_{float(r):.2f}"
        subdirname=os.path.join(dirname,f'{name}')
        
        # Create files
        if os.path.exists(subdirname)==False:
            if verbose:
                print(f'Making {subdirname}')
            os.mkdir(subdirname)
            

        # Write xyz
        geom.change_distance(iron,oxo,r)
        geom.write(os.path.join(subdirname,f'{name}.xyz')
                   
        if verbose:
            print(f"xyz created")
        # Write input
        with open(os.path.join(subdirname,f'{name}.input'),'wb') as g:
            g.write(gen_gateway(name,basis_set).encode())
            g.write(gen_seward().encode())
            g.write(gen_motra(name).encode())
            g.write(gen_scf(name).encode())   
            # Choose active space and inactive orbitals
            #g.write(gen_rasscf(name,2,2,int((i/2)-1)).encode())
            if idxr==0:
                g.write(gen_rasscf(name,4,3,10,previous=None).encode()) # int((i/2)-1)
            else:

                previous=os.path.join(top,dirname,f'{dirname}_{float(radius_range[idxr-1]):.2f}',f"{dirname}_{float(radius_range[idxr-1]):.2f}.RasOrb")
                g.write(gen_rasscf(name,4,3,10,previous=previous).encode()) # int((i/2)-1)
            g.write(gen_caspt2().encode())

        if verbose:
            print(f"Input created")
        # Change dir
        if os.getcwd()!=subdirname:   
            if verbose:
                print(f"Changing directories to {subdirname}")
            os.chdir(subdirname)

        # Run
        call(['pymolcas','-new','-clean',f'{name}.input', '-oe', f'{name}.output'])
        if verbose:
            print(f"Calculation ran in {perf_counter()-t0:.2f} seconds")
        
        for c in glob("*csv"):
            if 'e2' in c or 'IVECW' in c:
                pd.read_csv(c).to_csv(c,compression='zip')
            else:
                pd.read_csv(c,header=None).to_csv(c,compression='zip') 
        if verbose:
            print("Compression done")
        
        # Back to top dir
        if os.getcwd()!=top:
            if verbose:
                print(f"Moving back to {top}")
            os.chdir(top)
        print()


# In[12]:



In [None]:

def gen_energy():
    energy=[]
    dirname=f'{dirname}'
    drop=[]
    # Loop radius
    for idr,r in enumerate(radius_range):
        name=f"{dirname}_{float(r):.2f}"
        try:
            output=os.path.join(dirname,f'{name}',f'{name}.output')
            energy.append([r,float((grep['-i', '::    CASPT2',output] | awk['{print $NF }'])())])
        except:
            energy.append([r,0])
            drop.append(idr)

    pd.DataFrame(energy,columns=['radius','energy']).to_csv(f'{dirname}/CASPT2.csv')

    casscf_energy=[]
    dirname=f'{dirname}'
    # Loop radius
    for idr,r in enumerate(radius_range):
        name=f"{dirname}_{float(r):.2f}"
        try:
            output=os.path.join(dirname,f'{name}',f'{name}.output')
            casscf_energy.append([r,float((grep['-i', '::    RASSCF root number  1',output] | awk['{print $8 }'])())])
        except:
            casscf_energy.append([r,0])

    pd.DataFrame(casscf_energy,columns=['radius','energy']).to_csv(f'{dirname}/CASSCF.csv')       





    E2_energy=[]
    dirname=f'{dirname}'
    # Loop radius
    for idr,r in enumerate(radius_range):
        name=f"{dirname}_{float(r):.2f}"
        try:
            output=os.path.join(dirname,f'{name}',f'{name}.output')
            E2_energy.append([r,float((grep['-i', 'E2 (Variational):',output] | awk['{print $NF }'])())])
        except:
            E2_energy.append([r,0])
    pd.DataFrame(E2_energy,columns=['radius','energy']).to_csv(f'{dirname}/E2.csv')        



In [None]:
# gen_data()
# del_useless()

In [None]:
# gen_energy()

In [None]:
cmap=sns.color_palette('rocket',7)

In [None]:
# gen_energy()
# fig,ax=plt.subplots(2,2,figsize=(10,6),sharex=True)
CASSCF=pd.read_csv(f"{dirname}/CASSCF.csv",index_col=0)
CASPT2=pd.read_csv(f"{dirname}/CASPT2.csv",index_col=0)
plt.plot(CASSCF['radius'],CASSCF['energy']-CASSCF.iloc[-1]['energy'],label='CASSCF')
plt.plot(CASPT2['radius'],CASPT2['energy']-CASPT2.iloc[-1]['energy'],label='CASPT2')
plt.title('O$_{3}$')
plt.xlabel("Radius (Å)")
plt.ylabel("Energy (E$_{h}$)")
plt.legend()
plt.tight_layout()
plt.savefig('energies.png',dpi=300,bbox_inches='tight')
plt.show()