In [None]:
import sys
# !{sys.executable} -m pip install --upgrade  xeus-python notebook
#######################################################
#Import packages
import numpy as np
import os
os.environ['MOLCAS']='/home/grierjones/Test/build'
os.environ['MOLCAS_WORKDIR']='/tmp'
import re
from math import sin, cos, pi
from glob import glob
import subprocess
import pickle
from subprocess import call, check_output
import pandas as pd
# import psi4
from joblib import Parallel,effective_n_jobs,delayed
from time import time
import matplotlib.pyplot as plt
from plumbum.cmd import grep, awk

import shutil
import random
import sklearn
from shutil import copy
import csv
import h5py as h5
import seaborn as sns; sns.set(style="ticks", color_codes=True)

from sklearn.model_selection import train_test_split

In [None]:
#######################################################
# Obital labels
## Inactive i,j
## Active t,u,v
## Virtual a,b

## Type 1: IA->AA
## Type 2: II->AA (P)
## Type 3: II->AA (M)
## Type 4: AA->VA
## Type 5: IA->VA/AV
## Type 6: II->AV (P)
## Type 7: II->AV (M)
## Type 8: AA->VV (P)
## Type 9: AA->VV (M)
## Type 10: IA->VV (P)
## Type 11: IA->VV (M)
## Type 12: II->VV (P)
## Type 13: II->VV (M)

## A: IA->AA
## B: II->AA
## C: AA->VA
## D: IA->VA/AV
## E: II->AV
## F: AA->VV
## G: IA->VV 
## H: II->VV
#######################################################


In [None]:
# Delete excessive extra files
def del_useless():
    '''
    Delete the extra files
    '''
    for root, dirs, files in os.walk(os.getcwd()):
        for file in files:
            for j in ['status','GssOrb','LprOrb','LoProp','guessorb','xmldump','RasOrb','SpdOrb']:
                if j in file:
    #                 print(root,dirs,file)
                    os.remove(os.path.join(root,file))

In [None]:
# When restarting a setr of calculations just clear everyting out
def clean_dir():
    for entry in os.scandir(path=os.getcwd()):
        if entry.is_dir():
            if entry.name=='Fock':
                shutil.rmtree(entry.name)
            if entry.name=='hdf5':
                shutil.rmtree(entry.name)
            if entry.name=='e2':
                shutil.rmtree(entry.name)                
            if entry.name=='Labels':
                shutil.rmtree(entry.name)
            if entry.name=='Coords':
                shutil.rmtree(entry.name)
            if 'dir' in entry.name:
                shutil.rmtree(entry.name)
                


In [None]:
def gen_gateway(name,basis_set):
    string=f'''&GATEWAY 
coord={f'{name}.xyz'}
Basis = {basis_set}
Group = nosymm
Expert
End of Input

'''
    return string

def gen_seward():
    string=f'''&SEWARD
End of Input

'''
    return string

def gen_motra(name):
    string=f'''&MOTRA
Frozen=0
LUMORB
>>> COPY $WorkDir/GMJ_one_int_indx.csv $CurrDir/{name}.GMJ_one_int_indx.csv
>>> COPY $WorkDir/GMJ_one_int.csv $CurrDir/{name}.GMJ_one_int.csv
>>> COPY $WorkDir/GMJ_two_int_indx.csv $CurrDir/{name}.GMJ_two_int_indx.csv
>>> COPY $WorkDir/GMJ_two_int.csv $CurrDir/{name}.GMJ_two_int.csv

'''
    return string

def gen_scf(name):
    string=f"""&SCF &END
>>> COPY $WorkDir/{name}.scf.h5 $CurrDir/

"""
    return string    


def gen_rasscf(name,electrons,occupied,inactive,symmetry=1,spin=1,previous=None):
    start_string="""&RASSCF &END
Title= RASSCF
"""
    if previous!=None:
        fileorb=f"""FileOrb
{previous}
"""
    else:
        fileorb=''

    end_string=f"""NACTEL
{electrons} 0 0
Inactive
{inactive}
RAS2
{occupied}
Symmetry
{symmetry}
Spin
{spin}
orblisting
all
ITERation
200 100
CIMX
200
SDAV
500

>>> COPY $WorkDir/{name}.rasscf.h5 $CurrDir/
>>> COPY $WorkDir/GMJ_Fock_MO.csv $CurrDir/{name}.GMJ_Fock_MO.csv
>>> COPY $WorkDir/GMJ_PT2_Fock_MO.csv $CurrDir/{name}.GMJ_PT2_Fock_MO.csv

"""
    return start_string+fileorb+end_string 

def gen_caspt2():
    string="""&CASPT2 &END
Frozen 
0

>>foreach i in (B,E,F,G,H)
>>foreach j in (P,M)
>>if ( -FILE GMJ_e2_${i}_${j}.csv )
>>> COPY $WorkDir/GMJ_RHS_${i}_${j}.csv $CurrDir/GMJ_RHS_${i}_${j}.csv
>>> COPY $WorkDir/GMJ_IVECW_${i}_${j}.csv $CurrDir/GMJ_IVECW_${i}_${j}.csv
>>> COPY $WorkDir/GMJ_e2_${i}_${j}.csv $CurrDir/GMJ_e2_${i}_${j}.csv
>>endif
>>enddo
>>enddo

>>foreach i in (A,C,D)
>>if ( -FILE GMJ_e2_$i.csv )
>>> COPY $WorkDir/GMJ_RHS_$i.csv $CurrDir/GMJ_RHS_$i.csv
>>> COPY $WorkDir/GMJ_IVECW_$i.csv $CurrDir/GMJ_IVECW_$i.csv
>>> COPY $WorkDir/GMJ_e2_$i.csv $CurrDir/GMJ_e2_$i.csv
>>endif
>>enddo

"""
    return string    





In [None]:
basis_set='ANO-RCC-VDZP'
top=os.getcwd()

In [None]:
with open(os.path.join(f'H2.xyz'),'w') as f:
    f.write(f'2\n\n')
    for j in range(2):
        f.write(f'H {0:>8f} {0:>8f} {j*0.94:>8f}\n')

In [None]:
def write_input(path,basis_set,name,electrons,occupied,inactive,previous=None):
       # Write input
        with open(os.path.join(path,f'{name}.input'),'wb') as g:
            g.write(gen_gateway(name,basis_set).encode())
            g.write(gen_seward().encode())
            g.write(gen_scf(name).encode())   
            g.write(gen_rasscf(name,electrons,occupied,inactive,previous=previous).encode())
            g.write(gen_motra(name).encode())
            g.write(gen_caspt2().encode())    

In [None]:
write_input('./',basis_set,'H2',2,2,0,previous=None)

In [None]:
call(['pymolcas','-new','-clean',f'H2.input', '-oe', f'H2.output'])

In [None]:
# Load the PT2 Fock elements
# Columns are as follows:
# IT,IU,F(global index),FI(global index),fa(global index),d(global index)
pt2fock = 'H2.GMJ_PT2_Fock_MO.csv'
pt2fock_values = np.fromfile(pt2fock,dtype=float).reshape(-1,6)[:,3:]
pt2fock_idx = np.fromfile(pt2fock,dtype=int).reshape(-1,6)[:,0:3]
pt2fock_stacked = np.hstack([pt2fock_idx,pt2fock_values])

In [None]:
# Read CASSCF Fock from file
CASSCF_fock = np.fromfile('H2.GMJ_Fock_MO.csv')

In [None]:
# Load one-electron integrals
oneelint = np.fromfile('H2.GMJ_one_int.csv')
oneelint_idx = np.fromfile('H2.GMJ_one_int_indx.csv')

# Load two-electron integrals
twoelint = np.fromfile('H2.GMJ_two_int.csv')
twoelint_idx = np.fromfile('H2.GMJ_two_int_indx.csv')

In [None]:
# Grab rasscf and scf hdf5 data
rasscf_h5 = h5.File('H2.rasscf.h5', 'r')
scf_h5 = h5.File('H2.scf.h5', 'r')

datasetNames = [n for n in rasscf_h5.keys()]
NBAS = rasscf_h5.attrs['NBAS']
NACTEL = rasscf_h5.attrs['NACTEL']

#Keys: MO_VECTORS, MO_ENERGIES, MO_OCCUPATIONS
casMO_dict = {k:np.array(rasscf_h5[k]) for k in datasetNames if "MO" in k}
scfMO_dict = {k:np.array(scf_h5[k]) for k in datasetNames if "MO" in k}

In [None]:
# Get two-electron indices
two_el_ex_labels = {i.split('.')[0].replace("GMJ_RHS_",""):[re.sub(r'(?<!\d)0+(\d+)', r'\1', j) for j in pd.read_csv(i,header=None)[0].values] for i in glob("GMJ_RHS_*.csv")}

pair_labels = {i.split('.')[0].replace("GMJ_RHS_",""):['_'.join(re.sub(r'(?<!\d)0+(\d+)', r'\1', j).split('_')[0:2]) for j in pd.read_csv(i,header=None)[0].values] for i in glob("GMJ_RHS_*.csv")}

In [None]:
np.genfromtxt('GMJ_e2_C.csv',skip_header=True).flatten().sum()+np.genfromtxt('GMJ_e2_F_P.csv',skip_header=True).flatten().sum()

In [None]:
def strip(lst):   
    return '_'.join(re.sub(r'(?<!\d)0+(\d+)', r'\1', i) for i in lst.split('_'))


Dimension check for DDCASPT2: check the ordering of the pair-energies,
this notation follows a mix of the papers and code.

A (IA->AA): \ TIUV \ E$_{ti}$ E$_{uv}$ \ pqrs=tiuv=0123 \    
B_P (II->AA) (P): \ IJTU \ E$_{ti}$ E$_{uj}$ \ pqrs=tiuj=2031 \
B_M (II->AA) (M): \ IJTU \ E$_{ti}$ E$_{uj}$ \ pqrs=tiuj=2031 \
C (AA->VA): \ UVAT \ E$_{at}$ E$_{uv}$ \ pqrs=atuv=2301 \
D (IA->VA/AV): \ IUAT/IUTA \ E$_{ai}$ E$_{tu}$/E$_{ti}$ E$_{au}$ \ pqrs=(a/t)i(t/a)u=2031 \
E_P (II->AV) (P): \ IJAT \ E$_{ti}$ E$_{aj}$ \ pqrs=tiaj=3021 \
E_M (II->AV) (M): \ IJAT \ E$_{ti}$ E$_{aj}$ \ pqrs=tiaj=3021 \
F_P (AA->VV) (P): \ TUAB \ E$_{at}$ E$_{bu}$ \ pqrs=atbu=2031 \
F_M (AA->VV) (M): \ TUAB \ E$_{at}$ E$_{bu}$ \ pqrs=atbu=2031 \
G_P (IA->VV) (P): \ ITAB \ E$_{ai}$ E$_{bt}$ \ pqrs=aibt=2031 \
G_M (IA->VV) (M): \ ITAB \ E$_{ai}$ E$_{bt}$ \ pqrs=aibt=2031 \
H_P (II->VV) (P): \ IJAB \ E$_{ai}$ E$_{bj}$ \ pqrs=aibj=2031 \
H_M (II->VV) (M): \ IJAB \ E$_{ai}$ E$_{bj}$ \ pqrs=aibj=2031 \


In [None]:
RHS = pd.read_csv('GMJ_RHS_C.csv',sep=',',header=None,index_col=0)
RHS.index = list(map(strip,RHS.index))
IVEC = pd.read_csv('GMJ_IVECW_C.csv',sep='\s+',header=None,skiprows=[0])
e2 = np.genfromtxt('GMJ_e2_C.csv',skip_header=True)

In [None]:
np.hstack([np.array(RHS.index).reshape(-1,1),e2.reshape(-1,1)])

In [None]:
ordered=pd.read_csv('GMJ_IVECW_C.csv',sep='\s+', skiprows=[0],header=None).astype(np.float64).dropna(axis=1)

ordered.columns=list(range(len(ordered.columns)))

ordered=ordered.stack()

df=pd.read_csv('GMJ_RHS_C.csv',header=None,delimiter=',',index_col=0)
df.index=list(map(strip,df.index))
merged=ordered.reset_index().sort_values(by=0).set_index(df.sort_values(by=1).index).sort_values(['level_0','level_1'])    

In [None]:
merged

In [None]:
path_check = 'H2.output'

In [None]:
E2 = float((grep['-i', 'E2 (Variational):',path_check] | awk['{print $NF }'])())
CASSCF_E = float((grep['-i', '::    RASSCF root number  1',path_check] | awk['{print $8 }'])())
CASPT2_E = float((grep['-i', '::    CASPT2',path_check] | awk['{print $NF }'])())

In [None]:

fro=int(subprocess.Popen(f"grep -i 'Frozen orbitals' {path_check} | tail -n 1",shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).communicate()[0].split()[-1])
# Number of inactive orbitals
inact=int(subprocess.Popen(f"grep -i 'Inactive orbitals' {path_check} | tail -n 1",shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).communicate()[0].split()[-1])
# Number of active orbitals
act=int(subprocess.Popen(f"grep -i 'Active orbitals' {path_check} | tail -n 1",shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).communicate()[0].split()[-1])
# Number of seconary orbitals
virt=int(subprocess.Popen(f"grep -i 'Secondary orbitals' {path_check} | tail -n 1",shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).communicate()[0].split()[-1])
# Number of basis functions for sanity check
bas_check=int(subprocess.Popen(f"grep -i 'Number of basis functions' {path_check} | tail -n 1",shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).communicate()[0].split()[-1])

Basis_Indices=[]
for i in range(fro):
    Basis_Indices.append(f'F{i+1}')
for i in range(inact):
    Basis_Indices.append(f'I{i+1}')
for i in range(act):
    Basis_Indices.append(f'A{i+1}')
for i in range(virt):
    Basis_Indices.append(f'S{i+1}')   

In [None]:
Basis_Indices

In [None]:
indexarr = np.array([i.split('_') for i in df.index])

In [None]:
np.unique(indexarr[:,0]).shape,np.unique(indexarr[:,1]).shape,np.unique(indexarr[:,2]).shape,np.unique(indexarr[:,3]).shape

In [None]:
df.index.values

In [None]:
sorted([i.split('_') for i in df.index.values],key= lambda x: (x[2],x[3],x[0],x[1]))