In [1]:
# Importing the libraies
import psi4
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsRegressor
from helper_CC_ML_old import *
import matplotlib.pyplot as plt
import os
MLt2=0

In [2]:
# This box contains all the features the CCSD module logs for us to make predictions
# Ultimately, just make sure you run this every time and all is well.

features = ['Evir1', 'Hvir1', 'Jvir1', 'Kvir1', 'Evir2', 'Hvir2', 'Jvir2', 'Kvir2', 'Eocc1', 'Jocc1', 'Kocc1', 'Hocc1',
            'Eocc2', 'Jocc2', 'Kocc2', 'Hocc2', 'Jia1', 'Jia2', 'Kia1', 'Kia2', 'diag', 'orbdiff', 'doublecheck', 't2start', 't2mag', 't2sign', 'Jia1mag', 'Jia2mag', 'Kia1mag', 'Kia2mag']
'''
Key:
Letters:
E-Energy of the orbital
H-1e contribution to the orbital energy
J-Coulombic contribution to orbital energy
K-Exchange contribution to orbital energy
Placement:
occ or virt, you get this..
Number:
is it electron one or two from the two electron excitation


Jia1- coulomb integral between orbital occ1 and vir1
Jia2 " but 2
Kia1 - exchange integral between orbital 
Kia2 Same but exchange integral
diag - is it on the diagonal, aka, are the two excited electrons going to the same orbital **this is important fyi
orbdiff - (Evir2 + Evir1 - Eocc1 - Eocc2)
doublecheck - full 2electron integral
t2start - INITIAL MP2 amplitude **this is the inital guess
t2mag - np.log10(np.absolute(t2start)) ~ this is going to be a common trend, since it is more straightforward for ML algorithms to understand
t2sign - (t2start > 1)? 
Jia1mag - np.log10(np.absolute(feature))
Jia2mag np.log10(np.absolute(feature))
Kia1mag  np.log10(np.absolute(feature))
Kia2mag np.log10(np.absolute(feature))

'''


factors=(1,1.25,1.5,2,5,10,100,1000)

factor= np.zeros((len(features)))
finalfactor=factor
factor[2]=3
factor[6]=3
factor[12]=4
factor[8]=4
factor[16]=1
factor[17]=1
factor[20]=5
factor[21]=5
factor[22]=6
factor[23]=6
factor[24]=5
factor[25]=1
factor=factor.astype(int)

for j in range(0,len(features)):
    a=factor[j]
    finalfactor[j]=factors[a]


In [3]:
# This function extract the features and the t2 amplitudes from the training set.

def GetAmps(Foldername, occ=False, vir=False):
    i=1
    for filename in os.listdir(str(Foldername)):
            psi4.core.clean()
            file_path=str(str(Foldername)+filename)
            text = open(file_path, 'r').read()
            mol = psi4.geometry(text)
            psi4.core.clean()


            psi4.set_options({'basis':        'cc-pVDZ',#'6-31g',
                              'scf_type':     'pk',
                              'reference':    'rhf',
                              'mp2_type':     'conv',
                              'e_convergence': 1e-8,

                              'd_convergence': 1e-8})

            A=HelperCCEnergy(mol)

            A.compute_energy()
            matrixsize=A.nocc*A.nocc*A.nvirt*A.nvirt
            Bigmatrix=np.zeros([matrixsize, len(features)])
            for x in range(0,len(features)):
                Bigmatrix[:, x]=getattr(A, features[x]).reshape(matrixsize)
            Bigamp=A.t2.reshape(matrixsize,1)
            if i==1:
                Bigfeatures=Bigmatrix
                Bigamps=Bigamp
                i=2
            else:
                Bigfeatures=np.vstack((Bigfeatures,Bigmatrix))
                Bigamps=np.vstack((Bigamps,Bigamp))

    array=Bigfeatures
    finalamps=Bigamps
    
    return array,finalamps


# Error Calculation

Error is calculated using following equation:

$Error = |Final Energy - Start Energy|$

Start Energy = Energy calculated by substituting the predicted $t_{2}$ amplitude to CCSD energy equation
Final Energy = Energy calculated by substituting the optimized $t_{2}$ amplitude to CCSD energy equation

In [4]:
# This function retrives the features from the test set and then predict the t2 amplitudes.
# It will input the predicted amplitudes to the CCSD energy equation and optimize those amplitudes.

def Test(Foldername, occ=False, vir=False):
    steps=list()
    difference=list()
    supalist=list()
    startenergy=list()
    finalenergy=list()
    filenames=list()
    rhfenergy=list()
    OH_distance_list = list()
    for filename in os.listdir(Foldername):
                psi4.core.clean()
                filenames.append(filename)
                print ("filename is "+filename)
                file_path=str(Foldername+filename)
                xyz_file = open(file_path, 'r')
                text = xyz_file.read()
                xyz_file.close()
                
                xyz_file = open(file_path, 'r')
                text_lines = xyz_file.readlines()
                xyz_file.close()
                
                print(file_path)
                
                mol = psi4.geometry(text)

                psi4.set_options({'basis':        'cc-pVDZ',
                                  'scf_type':     'pk',
                                  'maxiter':      1000,
                                  'reference':    'rhf',
                                  'mp2_type':     'conv',
                                  'e_convergence': 1e-8,
                                  'd_convergence': 1e-8})
                
                MLt2=0
                A=HelperCCEnergy(mol)
                
                matrixsize=A.nocc*A.nocc*A.nvirt*A.nvirt
                Xnew=np.zeros([1,matrixsize,len(features)])
                for x in range (0,len(features)):
                    Xnew[0,:,x]=getattr(A, features[x]).reshape(matrixsize)

                Xnew=np.reshape(Xnew, (matrixsize,len(features)))


                X_new_scaled= scaler.transform(Xnew)
                X_newer_scaled= X_new_scaled
                
                for x in range (0,len(features)):
                    X_newer_scaled[:,x] *= finalfactor[x]



                ynew2=knn.predict(X_newer_scaled)
                MLt2=ynew2.reshape(A.nocc,A.nocc,A.nvirt,A.nvirt)
                A.t2=MLt2
                
                OH_distance = float(text_lines[1].split()[2])
                
                A.compute_t1()
                A.compute_energy()
                rhfenergy.append(A.rhf_e)
                startenergy.append(A.StartEnergy)
                finalenergy.append(A.FinalEnergy)
                OH_distance_list.append(OH_distance)
                
    
    startEn = np.add(np.array(startenergy),np.array(rhfenergy))
    finalEn = np.add(np.array(finalenergy),np.array(rhfenergy))
    difference.append(sum( np.abs(np.asarray(startenergy) - np.asarray(finalenergy))) /len(startenergy))
    differences = np.abs(np.asarray(startenergy) - np.asarray(finalenergy))
    
    print('-------------------------------------------------------------------------------------------------------')
    print ('Average Error: ')
    print (difference)
    
    return(startEn, finalEn, OH_distance_list)

In [5]:
# Extracting features from training molecules

X_train,y_train=GetAmps('Water/Regular/Water5/')

Computing RHF reference.

Scratch directory: /tmp/

*** tstart() called on utkinternet.desktops.utk.edu
*** at Fri Dec 10 15:20:45 2021

   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ---------------------------------------------------------

  ==> Geometry <==

    Molecular point group: c1
    Full point group: C2v

    Geometry (in Angstrom), charge = 0, multiplicity = 1:

       Center              X   


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


  ==> Integral Setup <==

  Using in-core PK algorithm.
   Calculation information:
      Number of atoms:                   3
      Number of AO shells:              12
      Number of primitives:             32
      Number of atomic orbitals:        25
      Number of basis functions:        24

      Integral cutoff                 1.00e-12
      Number of threads:                 1

  Performing in-core PK
  Using 90300 doubles for integral storage.
  We computed 3081 shell quartets total.
  Whereas there are 3081 unique shell quartets.

  ==> DiskJK: Disk-Based J/K Matrices <==

    J tasked:                  Yes
    K tasked:                  Yes
    wK tasked:                  No
    Memory [MiB]:              375
    Schwarz Cutoff:          1E-12

    OpenMP threads:              1

  Minimum eigenvalue in the overlap matrix is 2.6472818251E-02.
  Reciprocal condition number of the overlap matrix is 6.6664559791E-03.
    Using symmetric orthogonalization.

  ==> Pre-Iteration


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ---------------------------------------------------------

  ==> Geometry <==

    Molecular point group: c1
    Full point group: C2v

    Geometry (in Angstrom), charge = 0, multiplicity = 1:

       Center              X                  Y                   Z               Mass       
    ------------   -----------------  -----------------  -----------------


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


  ==> Integral Setup <==

  Using in-core PK algorithm.
   Calculation information:
      Number of atoms:                   3
      Number of AO shells:              12
      Number of primitives:             32
      Number of atomic orbitals:        25
      Number of basis functions:        24

      Integral cutoff                 1.00e-12
      Number of threads:                 1

  Performing in-core PK
  Using 90300 doubles for integral storage.
  We computed 3081 shell quartets total.
  Whereas there are 3081 unique shell quartets.

  ==> DiskJK: Disk-Based J/K Matrices <==

    J tasked:                  Yes
    K tasked:                  Yes
    wK tasked:                  No
    Memory [MiB]:              375
    Schwarz Cutoff:          1E-12

    OpenMP threads:              1

  Minimum eigenvalue in the overlap matrix is 3.8245127842E-03.
  Reciprocal condition number of the overlap matrix is 8.1454376710E-04.
    Using symmetric orthogonalization.

  ==> Pre-Iteration


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


  ==> Integral Setup <==

  Using in-core PK algorithm.
   Calculation information:
      Number of atoms:                   3
      Number of AO shells:              12
      Number of primitives:             32
      Number of atomic orbitals:        25
      Number of basis functions:        24

      Integral cutoff                 1.00e-12
      Number of threads:                 1

  Performing in-core PK
  Using 90300 doubles for integral storage.
  We computed 3081 shell quartets total.
  Whereas there are 3081 unique shell quartets.

  ==> DiskJK: Disk-Based J/K Matrices <==

    J tasked:                  Yes
    K tasked:                  Yes
    wK tasked:                  No
    Memory [MiB]:              375
    Schwarz Cutoff:          1E-12

    OpenMP threads:              1

  Minimum eigenvalue in the overlap matrix is 1.1814038197E-01.
  Reciprocal condition number of the overlap matrix is 4.7492563267E-02.
    Using symmetric orthogonalization.

  ==> Pre-Iteration


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})



         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ---------------------------------------------------------

  ==> Geometry <==

    Molecular point group: c1
    Full point group: C2v

    Geometry (in Angstrom), charge = 0, multiplicity = 1:

       Center              X                  Y                   Z               Mass       
    ------------   -----------------  -----------------  -----------------  -----------------
         O            0.000000000000     0.000000000000    -0.142513736185    15.994914619570
         H            0.000000000000    -1.644634313387     1.130898206286     1.007825032230
         H            0.000000000000     1.644634313387     1.130898206286     1.007825032230

  Running in 

CCSD Iteration  14: CCSD correlation = -0.367683508287414   dE =  2.02655E-06   DIIS = 7
CCSD Iteration  15: CCSD correlation = -0.367682818943335   dE =  6.89344E-07   DIIS = 7
CCSD Iteration  16: CCSD correlation = -0.367682980364846   dE = -1.61422E-07   DIIS = 7
CCSD Iteration  17: CCSD correlation = -0.367682981214001   dE = -8.49155E-10   DIIS = 7
CCSD Iteration  18: CCSD correlation = -0.367683018237548   dE = -3.70235E-08   DIIS = 7
CCSD Iteration  19: CCSD correlation = -0.367683014039160   dE =  4.19839E-09   DIIS = 7

CCSD has converged in 0.586 seconds!


In [6]:
len(os.listdir('Water/Regular/Water5/'))

5

In [7]:
# import sys
# !{sys.executable} -m pip install pickle5
# import pickle5 as pickle
import pickle
with open('X_train.pickle', 'wb') as handle:
    pickle.dump(X_train, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('y_train.pickle', 'wb') as handle:
    pickle.dump(y_train, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [8]:
X_train.shape,y_train.shape[0]/5

((45125, 30), 9025.0)

In [9]:
# Scale all data before using them as features

scaler = MinMaxScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)

# This multiplies by the weighting vector from the start
for a in range(0,len(features)):
    X_train_scaled[:,a] *= finalfactor[a]

# This trains the model with our data    
knn=(KNeighborsRegressor(n_neighbors=1, p=2).fit(X_train_scaled,y_train)  )


In [None]:
# Predicting t2 amplitudes

startEnergy, finalEnergy, OH_distance = Test('Water/Water100/')

filename is Water33.xyz
Water/Water100/Water33.xyz
Computing RHF reference.

Scratch directory: /tmp/

*** tstart() called on utkinternet.desktops.utk.edu
*** at Fri Dec 10 15:20:56 2021

   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ---------------------------------------------------------

  ==> Geometry <==

    Molecular point group: c1
    Full point group: C2v

    Geometry (in Angstrom), charge = 0,


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


  ==> Integral Setup <==

  Using in-core PK algorithm.
   Calculation information:
      Number of atoms:                   3
      Number of AO shells:              12
      Number of primitives:             32
      Number of atomic orbitals:        25
      Number of basis functions:        24

      Integral cutoff                 1.00e-12
      Number of threads:                 1

  Performing in-core PK
  Using 90300 doubles for integral storage.
  We computed 3081 shell quartets total.
  Whereas there are 3081 unique shell quartets.

  ==> DiskJK: Disk-Based J/K Matrices <==

    J tasked:                  Yes
    K tasked:                  Yes
    wK tasked:                  No
    Memory [MiB]:              375
    Schwarz Cutoff:          1E-12

    OpenMP threads:              1

  Minimum eigenvalue in the overlap matrix is 3.4131936647E-02.
  Reciprocal condition number of the overlap matrix is 9.1974684720E-03.
    Using symmetric orthogonalization.

  ==> Pre-Iteration


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


  ==> Integral Setup <==

  Using in-core PK algorithm.
   Calculation information:
      Number of atoms:                   3
      Number of AO shells:              12
      Number of primitives:             32
      Number of atomic orbitals:        25
      Number of basis functions:        24

      Integral cutoff                 1.00e-12
      Number of threads:                 1

  Performing in-core PK
  Using 90300 doubles for integral storage.
  We computed 3081 shell quartets total.
  Whereas there are 3081 unique shell quartets.

  ==> DiskJK: Disk-Based J/K Matrices <==

    J tasked:                  Yes
    K tasked:                  Yes
    wK tasked:                  No
    Memory [MiB]:              375
    Schwarz Cutoff:          1E-12

    OpenMP threads:              1

  Minimum eigenvalue in the overlap matrix is 2.7681754290E-02.
  Reciprocal condition number of the overlap matrix is 7.0504458938E-03.
    Using symmetric orthogonalization.

  ==> Pre-Iteration


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


  ==> Integral Setup <==

  Using in-core PK algorithm.
   Calculation information:
      Number of atoms:                   3
      Number of AO shells:              12
      Number of primitives:             32
      Number of atomic orbitals:        25
      Number of basis functions:        24

      Integral cutoff                 1.00e-12
      Number of threads:                 1

  Performing in-core PK
  Using 90300 doubles for integral storage.
  We computed 3081 shell quartets total.
  Whereas there are 3081 unique shell quartets.

  ==> DiskJK: Disk-Based J/K Matrices <==

    J tasked:                  Yes
    K tasked:                  Yes
    wK tasked:                  No
    Memory [MiB]:              375
    Schwarz Cutoff:          1E-12

    OpenMP threads:              1

  Minimum eigenvalue in the overlap matrix is 2.6712092369E-02.
  Reciprocal condition number of the overlap matrix is 6.7419038943E-03.
    Using symmetric orthogonalization.

  ==> Pre-Iteration


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})



*** tstart() called on utkinternet.desktops.utk.edu
*** at Fri Dec 10 15:21:39 2021

   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ---------------------------------------------------------

  ==> Geometry <==

    Molecular point group: c1
    Full point group: C2v

    Geometry (in Angstrom), charge = 0, multiplicity = 1:

       Center              X                  Y                   Z               


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})



*** tstart() called on utkinternet.desktops.utk.edu
*** at Fri Dec 10 15:21:52 2021

   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ---------------------------------------------------------

  ==> Geometry <==

    Molecular point group: c1
    Full point group: C2v

    Geometry (in Angstrom), charge = 0, multiplicity = 1:

       Center              X                  Y                   Z               


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


*** at Fri Dec 10 15:22:04 2021

   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ---------------------------------------------------------

  ==> Geometry <==

    Molecular point group: c1
    Full point group: C2v

    Geometry (in Angstrom), charge = 0, multiplicity = 1:

       Center              X                  Y                   Z               Mass       
    ------------   -----------------  ---

CCSD Iteration  10: CCSD correlation = -0.209805054233386   dE =  1.01395E-08   DIIS = 7

CCSD has converged in 0.161 seconds!
filename is Water18.xyz
Water/Water100/Water18.xyz
Computing RHF reference.

Scratch directory: /tmp/

*** tstart() called on utkinternet.desktops.utk.edu
*** at Fri Dec 10 15:22:16 2021

   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ------------------------------------------------


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


  ==> Integral Setup <==

  Using in-core PK algorithm.
   Calculation information:
      Number of atoms:                   3
      Number of AO shells:              12
      Number of primitives:             32
      Number of atomic orbitals:        25
      Number of basis functions:        24

      Integral cutoff                 1.00e-12
      Number of threads:                 1

  Performing in-core PK
  Using 90300 doubles for integral storage.
  We computed 3081 shell quartets total.
  Whereas there are 3081 unique shell quartets.

  ==> DiskJK: Disk-Based J/K Matrices <==

    J tasked:                  Yes
    K tasked:                  Yes
    wK tasked:                  No
    Memory [MiB]:              375
    Schwarz Cutoff:          1E-12

    OpenMP threads:              1

  Minimum eigenvalue in the overlap matrix is 1.9277241342E-02.
  Reciprocal condition number of the overlap matrix is 4.5455377270E-03.
    Using symmetric orthogonalization.

  ==> Pre-Iteration


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ---------------------------------------------------------

  ==> Geometry <==

    Molecular point group: c1
    Full point group: C2v

    Geometry (in Angstrom), charge = 0, multiplicity = 1:

       Center              X                  Y                   Z               Mass       
    ------------   -----------------  -----------------  -----------------

CCSD Iteration   9: CCSD correlation = -0.197581832727528   dE =  6.39568E-09   DIIS = 7
CCSD Iteration  10: CCSD correlation = -0.197581831886541   dE =  8.40987E-10   DIIS = 7

CCSD has converged in 0.180 seconds!
filename is Water31.xyz
Water/Water100/Water31.xyz
Computing RHF reference.

Scratch directory: /tmp/

*** tstart() called on utkinternet.desktops.utk.edu
*** at Fri Dec 10 15:22:50 2021

   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                   


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


  ==> Integral Setup <==

  Using in-core PK algorithm.
   Calculation information:
      Number of atoms:                   3
      Number of AO shells:              12
      Number of primitives:             32
      Number of atomic orbitals:        25
      Number of basis functions:        24

      Integral cutoff                 1.00e-12
      Number of threads:                 1

  Performing in-core PK
  Using 90300 doubles for integral storage.
  We computed 3081 shell quartets total.
  Whereas there are 3081 unique shell quartets.

  ==> DiskJK: Disk-Based J/K Matrices <==

    J tasked:                  Yes
    K tasked:                  Yes
    wK tasked:                  No
    Memory [MiB]:              375
    Schwarz Cutoff:          1E-12

    OpenMP threads:              1

  Minimum eigenvalue in the overlap matrix is 3.1835208031E-02.
  Reciprocal condition number of the overlap matrix is 8.4163710958E-03.
    Using symmetric orthogonalization.

  ==> Pre-Iteration


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})



*** tstart() called on utkinternet.desktops.utk.edu
*** at Fri Dec 10 15:23:09 2021

   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ---------------------------------------------------------

  ==> Geometry <==

    Molecular point group: c1
    Full point group: C2v

    Geometry (in Angstrom), charge = 0, multiplicity = 1:

       Center              X                  Y                   Z               

CCSD Iteration   8: CCSD correlation = -0.204263137736069   dE = -3.87170E-08   DIIS = 7
CCSD Iteration   9: CCSD correlation = -0.204263144234649   dE = -6.49858E-09   DIIS = 7

CCSD has converged in 0.190 seconds!
filename is Water21.xyz
Water/Water100/Water21.xyz
Computing RHF reference.

Scratch directory: /tmp/

*** tstart() called on utkinternet.desktops.utk.edu
*** at Fri Dec 10 15:23:26 2021




  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ---------------------------------------------------------

  ==> Geometry <==

    Molecular point group: c1
    Full point group: C2v

    Geometry (in Angstrom), charge = 0, multiplicity = 1:

       Center              X                  Y                   Z               Mass       
    ------------   -----------------  -----------------  -----------------


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})



*** tstart() called on utkinternet.desktops.utk.edu
*** at Fri Dec 10 15:23:43 2021

   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ---------------------------------------------------------

  ==> Geometry <==

    Molecular point group: c1
    Full point group: C2v

    Geometry (in Angstrom), charge = 0, multiplicity = 1:

       Center              X                  Y                   Z               

CCSD Iteration   8: CCSD correlation = -0.215602538622494   dE =  3.76766E-07   DIIS = 7
CCSD Iteration   9: CCSD correlation = -0.215602502154049   dE =  3.64684E-08   DIIS = 7
CCSD Iteration  10: CCSD correlation = -0.215602496321025   dE =  5.83302E-09   DIIS = 7
CCSD Iteration  11: CCSD correlation = -0.215602495301936   dE =  1.01909E-09   DIIS = 7

CCSD has converged in 0.228 seconds!
filename is Water34.xyz
Water/Water100/Water34.xyz
Computing RHF reference.

Scratch directory: /tmp/

*** tstart() called on utkinternet.desktops.utk.edu
*** at Fri Dec 10 15:24:00 2021

   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                  


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


  ==> Integral Setup <==

  Using in-core PK algorithm.
   Calculation information:
      Number of atoms:                   3
      Number of AO shells:              12
      Number of primitives:             32
      Number of atomic orbitals:        25
      Number of basis functions:        24

      Integral cutoff                 1.00e-12
      Number of threads:                 1

  Performing in-core PK
  Using 90300 doubles for integral storage.
  We computed 3081 shell quartets total.
  Whereas there are 3081 unique shell quartets.

  ==> DiskJK: Disk-Based J/K Matrices <==

    J tasked:                  Yes
    K tasked:                  Yes
    wK tasked:                  No
    Memory [MiB]:              375
    Schwarz Cutoff:          1E-12

    OpenMP threads:              1

  Minimum eigenvalue in the overlap matrix is 3.5347583968E-02.
  Reciprocal condition number of the overlap matrix is 9.6172242981E-03.
    Using symmetric orthogonalization.

  ==> Pre-Iteration


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


*** at Fri Dec 10 15:24:18 2021

   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ---------------------------------------------------------

  ==> Geometry <==

    Molecular point group: c1
    Full point group: C2v

    Geometry (in Angstrom), charge = 0, multiplicity = 1:

       Center              X                  Y                   Z               Mass       
    ------------   -----------------  ---


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ---------------------------------------------------------

  ==> Geometry <==

    Molecular point group: c1
    Full point group: C2v

    Geometry (in Angstrom), charge = 0, multiplicity = 1:

       Center              X                  Y                   Z               Mass       
    ------------   -----------------  -----------------  -----------------


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


  ==> Integral Setup <==

  Using in-core PK algorithm.
   Calculation information:
      Number of atoms:                   3
      Number of AO shells:              12
      Number of primitives:             32
      Number of atomic orbitals:        25
      Number of basis functions:        24

      Integral cutoff                 1.00e-12
      Number of threads:                 1

  Performing in-core PK
  Using 90300 doubles for integral storage.
  We computed 3081 shell quartets total.
  Whereas there are 3081 unique shell quartets.

  ==> DiskJK: Disk-Based J/K Matrices <==

    J tasked:                  Yes
    K tasked:                  Yes
    wK tasked:                  No
    Memory [MiB]:              375
    Schwarz Cutoff:          1E-12

    OpenMP threads:              1

  Minimum eigenvalue in the overlap matrix is 2.2979827569E-02.
  Reciprocal condition number of the overlap matrix is 5.5994210071E-03.
    Using symmetric orthogonalization.

  ==> Pre-Iteration


  psi4.set_module_options('SCF', {'SCF_TYPE': 'PK'})

  psi4.set_module_options('SCF', {'E_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'D_CONVERGENCE': 10e-10})

  psi4.set_module_options('SCF', {'MAXITER': 100000})


*** at Fri Dec 10 15:25:14 2021

   => Loading Basis Set <=

    Name: CC-PVDZ
    Role: ORBITAL
    Keyword: BASIS
    atoms 1   entry O          line   198 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 
    atoms 2-3 entry H          line    22 file /Users/voglab04/miniconda3/envs/voglab/share/psi4/basis/cc-pvdz.gbs 


         ---------------------------------------------------------
                                   SCF
               by Justin Turney, Rob Parrish, Andy Simmonett
                          and Daniel G. A. Smith
                              RHF Reference
                        1 Threads,    500 MiB Core
         ---------------------------------------------------------

  ==> Geometry <==

    Molecular point group: c1
    Full point group: C2v

    Geometry (in Angstrom), charge = 0, multiplicity = 1:

       Center              X                  Y                   Z               Mass       
    ------------   -----------------  ---

In [None]:
len(os.listdir('Water/Water100/'))

In [None]:
Foldername
steps=list()
difference=list()
supalist=list()
startenergy=list()
finalenergy=list()
filenames=list()
rhfenergy=list()
OH_distance_list = list()
for filename in os.listdir(Foldername):
            psi4.core.clean()
            filenames.append(filename)
            print ("filename is "+filename)
            file_path=str(Foldername+filename)
            xyz_file = open(file_path, 'r')
            text = xyz_file.read()
            xyz_file.close()

            xyz_file = open(file_path, 'r')
            text_lines = xyz_file.readlines()
            xyz_file.close()

            print(file_path)

            mol = psi4.geometry(text)

            psi4.set_options({'basis':        'cc-pVDZ',
                              'scf_type':     'pk',
                              'maxiter':      1000,
                              'reference':    'rhf',
                              'mp2_type':     'conv',
                              'e_convergence': 1e-8,
                              'd_convergence': 1e-8})

            MLt2=0
            A=HelperCCEnergy(mol)

            matrixsize=A.nocc*A.nocc*A.nvirt*A.nvirt
            Xnew=np.zeros([1,matrixsize,len(features)])
            for x in range (0,len(features)):
                Xnew[0,:,x]=getattr(A, features[x]).reshape(matrixsize)

            Xnew=np.reshape(Xnew, (matrixsize,len(features)))


            X_new_scaled= scaler.transform(Xnew)
            X_newer_scaled= X_new_scaled

            for x in range (0,len(features)):
                X_newer_scaled[:,x] *= finalfactor[x]



            ynew2=knn.predict(X_newer_scaled)
            MLt2=ynew2.reshape(A.nocc,A.nocc,A.nvirt,A.nvirt)
            A.t2=MLt2

            OH_distance = float(text_lines[1].split()[2])

            A.compute_t1()
            A.compute_energy()
            rhfenergy.append(A.rhf_e)
            startenergy.append(A.StartEnergy)
            finalenergy.append(A.FinalEnergy)
            OH_distance_list.append(OH_distance)


startEn = np.add(np.array(startenergy),np.array(rhfenergy))
finalEn = np.add(np.array(finalenergy),np.array(rhfenergy))
difference.append(sum( np.abs(np.asarray(startenergy) - np.asarray(finalenergy))) /len(startenergy))
differences = np.abs(np.asarray(startenergy) - np.asarray(finalenergy))

print('-------------------------------------------------------------------------------------------------------')
print ('Average Error: ')
print (difference)


In [None]:
# Plotting the start energy and final energy against bond distance

zipped_lists = zip(OH_distance, startEnergy, finalEnergy)
sorted_pairs = sorted(zipped_lists)

tuples = zip(*sorted_pairs)
BondDistance, StartEnergy, FinalEnergy = [ list(tuple) for tuple in  tuples]

plt.title('Energy vs Bond Distance')
plt.xlabel('Bond distance ($\AA$)')
plt.ylabel('Energy ($E_{h}$)')
plt.plot(BondDistance, StartEnergy, label = 'Start Energy')
plt.plot(BondDistance, FinalEnergy, label = 'Final Energy')
plt.legend(loc="upper right")
plt.show()