# AILFT CASSCF + NEVPT2 Parser

In [1]:
import re
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Files read and written by the software

coord_number = 2
structure_name = "CoOO"

input_path = "/run/media/henrique/Henrique/Calculos/Co/Done/FieldScan/{0}/{0}.out".format(structure_name)
result_path = "/home/henrique/Coding/PhDUFF/Databases/{0}.csv".format(structure_name)
save_to_global = "/home/henrique/Coding/PhDUFF/Databases/big_data.csv"
save_to_pickle = "/home/henrique/Coding/PhDUFF/Databases/{0}.pkl".format(structure_name)

In [3]:
# Store the input in a variable and get rid of the "No D-tensor"
with open(input_path, "r") as inp:
    input_file = inp.read()
    # If there is no D-tensor, use zero
    input_file = input_file.replace(
        "# The ground state multiplicity is smaller than 3. No D-tensor.", "D   =  0.0  cm-1\nE/D =    0.0")

In [85]:
# Function to fill the lists


def dataparser(my_pattern, list_name, type_of_data="string", g_number=0):
    """This function is used to parse the outputs with regex to a readable format and, optionally, to a pandas object.

    Args:
        my_pattern (string): Here goes the pattern compiled to be used by RegExp
        list_name (variable): Name of the empty list to be filled with data
        type_of_data (string, optional): Select if the data is string or float.
        Defaults to None.
        Options = "string", "float"
        g_number (int, optional): The group number taked from the re.object. Defaults to 0.
    """
    if type_of_data == "string":
        mo = my_pattern.finditer(input_file, re.MULTILINE)
        for i in mo:
            list_name.append(str(i.group(g_number)))
    else:
        mo = my_pattern.finditer(input_file, re.MULTILINE)
        for i in mo:
            list_name.append(float(i.group(g_number)))

## Cartesian and Internal Coordinates

In [5]:
# Generate pattern
pattCartCoord = re.compile(
    r"(CARTESIAN COORDINATES \(ANGSTROEM\)\n-+\n)(\s\s.*?)(\n\n-+)", re.DOTALL)  # Group 2

# Generate pattern
pattIntCoord = re.compile(
    r"INTERNAL COORDINATES \(ANGSTROEM\)\n-+\n(\s.*?)()\n\n-+", re.DOTALL)  # Group 1

# Create empty list
cartCoord = []

# Run the function and fill the list
dataparser(pattCartCoord, cartCoord, "string", 2)

# Create empty list
intCoord = []

dataparser(pattIntCoord, intCoord, "string", 1)

## Coordination number & name

In [6]:
coord_number_lst = [coord_number] * len(cartCoord)
name_list = [structure_name] * len(cartCoord)

## Bonds

In [7]:
bonds1 = []

for i in range(len(intCoord)):
    bonds1.append(float(intCoord[i].split("\n")[1].split()[4]))

In [8]:
bonds2 = []

for i in range(len(intCoord)):
    bonds2.append(float(intCoord[i].split("\n")[2].split()[4]))

In [9]:
angles = []

for i in range(len(intCoord)):
    angles.append(float(intCoord[i].split("\n")[2].split()[5]))

## CASSCF Transition Energies

---

### CASSCF Lowest Roots

In [10]:
pattCASLowestRoot = re.compile(
    r"((SA-CASSCF TRANSITION ENERGIES\n-+\n\n)(LOWEST ROOT \(((.*),(.*))\) =\s+(.* Eh)\s+(.* eV)))")  # Grupos 5, 6 7

# Create empty lists
CASlowestRoots = []
CASlowestRootsMult = []
CASlowestRootsEh = []

# Function to fill the list
dataparser(pattCASLowestRoot, CASlowestRoots, "string", 6)

for i in range(len(CASlowestRoots)):
    CASlowestRootsMult.append(int(CASlowestRoots[i].split()[1]))

# Need to flush the list before adding new items
CASlowestRoots = []

# Function to fill the list
dataparser(pattCASLowestRoot, CASlowestRoots, "string", 7)

for i in range(len(CASlowestRoots)):
    CASlowestRootsEh.append(float(CASlowestRoots[i].split()[0]))

### CASSCF Transitions

In [11]:
pattCAStrans = re.compile(
    r"(SA-CASSCF TRANSITION ENERGIES\n-+\n\nLOWEST.*\n\nSTATE.*cm\*\*-1\n)([\s\S]*?)(\n\n-+\nDENSITY MATRIX)")  # group 2

CAStrans = []

dataparser(pattCAStrans, CAStrans, "string", 2)

### CASSCF Molecular energies

#### CASSCF One electron energies

In [12]:
# Group 2 = Nome, 3 = value
pattOneElecEnerg = re.compile(r"((One electron energy)\s+:\s+(.+ Eh))")

oneElecEnergy = []
oneElecEnergyEh = []

dataparser(pattOneElecEnerg, oneElecEnergy, "string", 3)

for i in range(len(oneElecEnergy)):
    oneElecEnergyEh.append(float(oneElecEnergy[i].split()[0]))

#### CASSCF Two electron energies

In [13]:
# Group 2 = Nome, 3 = value
pattTwoElecEnerg = re.compile(r"((Two electron energy)\s+:\s+(.+ Eh))")

twoElecEnergy = []
twoElecEnergyEh = []

dataparser(pattTwoElecEnerg, twoElecEnergy, "string", 3)

for i in range(len(twoElecEnergy)):
    twoElecEnergyEh.append(float(twoElecEnergy[i].split()[0]))

#### CASSCF Nuclear repulsion

In [14]:
# Group 2 = Nome, 3 = value
pattNucRep = re.compile(r"((Nuclear repulsion energy)\s+:\s+(.+ Eh))")

nucRepulsion = []
nucRepulsionEh = []

dataparser(pattNucRep, nucRepulsion, "string", 3)

for i in range(len(nucRepulsion)):
    nucRepulsionEh.append(float(nucRepulsion[i].split()[0]))

#### CASSCF Kinetic energy

In [15]:
# Group 2 = Nome, 3 = value
pattKinEner = re.compile(r"((Kinetic energy)\s+:\s+(.+ Eh))")

kinEnergy = []
kinEnergyEh = []

dataparser(pattKinEner, kinEnergy, "string", 3)

for i in range(len(kinEnergy)):
    kinEnergyEh.append(float(kinEnergy[i].split()[0]))

#### CASSCF Potential energy

In [16]:
# Group 2 = Nome, 3 = value
pattPotEner = re.compile(r"((Potential energy)\s+:\s+(.+ Eh))")

potEnergy = []
potEnergyEh = []

dataparser(pattPotEner, potEnergy, "string", 3)

for i in range(len(potEnergy)):
    potEnergyEh.append(float(potEnergy[i].split()[0]))

#### CASSCF Virial Ratio

In [17]:
# Group 2 = Nome, 3 = value
pattVirialRatio = re.compile(r"((Virial ratio)\s+:\s+(.+))")

virialRatio = []
virialRatioEh = []

dataparser(pattVirialRatio, virialRatio, "string", 3)

for i in range(len(virialRatio)):
    virialRatioEh.append(float(virialRatio[i].split()[0]))

#### CASSCF Core energy

In [18]:
# Group 2 = Nome, 3 = value
pattCoreEnerg = re.compile(r"((Core energy)\s+:\s+(.+))")

coreEnergy = []
coreEnergyEh = []

dataparser(pattCoreEnerg, coreEnergy, "string", 3)

for i in range(len(coreEnergy)):
    coreEnergyEh.append(float(coreEnergy[i].split()[0]))

### CASSCF Spin Orbit Coupling

#### CASSCF SOC Lowest Eigenvalue

In [19]:

pattCASLowestEigen = re.compile(
    r"CASSCF[\s\S]*?Lowest eigenvalue of the SOC matrix:\s+(.*) Eh")  # Group 1

SOCCASLowestEigenv = []  # Eh
SOCCASLowestEigenvEh = []

dataparser(pattCASLowestEigen, SOCCASLowestEigenv, "float", 1)

#### CASSCF SOC Energy Stabilization

In [20]:
pattCASEnerStab = re.compile(
    r"QDPT WITH CASSCF DIAGONAL ENERGIES([\s\S]*?)Energy stabilization:\s+(.*)")  # Group 2

SOCCASEnergyStab = []  # Eh
SOCCASEnergyStabcm = []

dataparser(pattCASEnerStab, SOCCASEnergyStab, "string", 2)

for i in range(len(SOCCASEnergyStab)):
    SOCCASEnergyStabcm.append(float(SOCCASEnergyStab[i].split()[0]))

#### CASSCF Kramers

In [21]:
pattCASKramers = re.compile(
    r"QDPT WITH CASSCF DIAGONAL ENERGIES([\s\S]*?)Eigenvalues:.*\n([\s\S]*?)The threshold for printing is 0.0100")  # Group 2

CASKramers = []  # Lista

dataparser(pattCASKramers, CASKramers, "string", 2)

#### CASSCF SOC Spin Eigenstates

In [22]:
pattCASSpinStates = re.compile(
    r"QDPT WITH CASSCF DIAGONAL ENERGIES[\s\S]*The threshold for printing is 0\.0100\nEigenvectors:\n.*([\s\S]*)Center of nuclear charge")  # Group 1

CASSpinStates = []  # Lista

dataparser(pattCASSpinStates, CASSpinStates, "string", 1)

### CASSCF Tensors

#### CASSCF 2nd Order Perturbation Theory D Tensor

In [23]:
pattCAS2NDDTensor = re.compile(
    r"QDPT WITH CASSCF DIAGONAL ENERGIES[\s\S]*?2ND ORDER[\s\S]*?D\s+=\s+(.*) cm-1")  # Grupo 1

CAS2PTD = []

dataparser(pattCAS2NDDTensor, CAS2PTD, float, 1)

#### CASSCF 2nd Order Perturbation Theory E/D

In [24]:
pattCAS2NDE_D = re.compile(
    r"QDPT WITH CASSCF DIAGONAL ENERGIES[\s\S]*?2ND ORDER[\s\S]*?E\/D\s+=\s+(.*)")  # Grupo 1

CAS2PTD_E = []

dataparser(pattCAS2NDE_D, CAS2PTD_E, "float", 1)

#### CASSCF Effective Spin Hamiltonian Theory D Tensor

In [25]:
pattCASEFFDTensor = re.compile(
    r"QDPT WITH CASSCF DIAGONAL ENERGIES[\s\S]+?(\(EFFECTIVE HAMILTONIAN SPIN-ORBIT COUPLING CONTRIBUTION[\s\S]+?)D\s+=\s+(.*) cm-1")  # Grupo 2

CASHSED = []

dataparser(pattCASEFFDTensor, CASHSED, "float", 2)

#### CASSCF Effective Spin Hamiltonian Theory E/D

In [26]:
pattCASEFFE_D = re.compile(
    r"QDPT WITH CASSCF DIAGONAL ENERGIES[\s\S]+?EFFECTIVE HAMILTONIAN SPIN-ORBIT COUPLING CONTRIBUTION[\s\S]+?E\/D\s+=\s+(.*)")  # Grupo 2

CASHSED_E = []

dataparser(pattCASEFFE_D, CASHSED_E, "float", 1)

### CASSCF G-tensors

#### CASSCF gx, gy, gz and g iso

In [27]:
# Grupo 1 Gx. 2 Gy, 3 Gz, 4 Giso
pattCASGFactors = re.compile(
    r"CASSCF[\s\S]+?ELECTRONIC G-MATRIX FROM EFFECTIVE HAMILTONIAN[\s\S*]+?g-factors:\n\s+(\S+)\s+(\S+)\s+(\S+)\s+iso =\s+(\S+)")

CASgx = []
CASgy = []
CASgz = []
CASgiso = []


dataparser(pattCASGFactors, CASgx, "float", 1)
dataparser(pattCASGFactors, CASgy, "float", 2)
dataparser(pattCASGFactors, CASgz, "float", 3)
dataparser(pattCASGFactors, CASgiso, "float", 4)

### CASSCF Slater-Condon parameters: F0dd, F2dd, F4dd

In [28]:
# Group 1 = au, 2 = eV, 3 = cm-1
pattCASF0dd = re.compile(
    r"AILFT MATRIX ELEMENTS \(CASSCF\)\n[\s\S*]*?F0dd\s+=\s+(.*) a\.u\. =\s+(.*)\s+eV\s+=\s+(.*)\s+cm\*\*-1")
# Group 1 = au, 2 = eV, 3 = cm-1
pattCASF2dd = re.compile(
    r"AILFT MATRIX ELEMENTS \(CASSCF\)\n[\s\S*]*?F2dd\s+=\s+(.*) a\.u\. =\s+(.*)\s+eV\s+=\s+(.*)\s+cm\*\*-1")
# Group 1 = au, 2 = eV, 3 = cm-1
pattCASF4dd = re.compile(
    r"AILFT MATRIX ELEMENTS \(CASSCF\)\n[\s\S*]*?F4dd\s+=\s+(.*) a\.u\. =\s+(.*)\s+eV\s+=\s+(.*)\s+cm\*\*-1")

CASF0dd = []
CASF2dd = []
CASF4dd = []

dataparser(pattCASF0dd, CASF0dd, "float", 3)
dataparser(pattCASF2dd, CASF2dd, "float", 3)
dataparser(pattCASF4dd, CASF4dd, "float", 3)

### CASSCF Racah parameters: A, B, C

In [29]:
# Group 1 = au, 2 = eV, 3 = cm-1
pattCASRacahA = re.compile(
    r"AILFT MATRIX ELEMENTS \(CASSCF\)\n[\s\S*]*?A\s+=\s+(.*) a\.u\. =\s+(.*)\s+eV\s+=\s+(.*)\s+cm\*\*-1")
# Group 1 = au, 2 = eV, 3 = cm-1
pattCASRacahB = re.compile(
    r"AILFT MATRIX ELEMENTS \(CASSCF\)\n[\s\S*]*?B\s+=\s+(.*) a\.u\. =\s+(.*)\s+eV\s+=\s+(.*)\s+cm\*\*-1")
# Group 1 = au, 2 = eV, 3 = cm-1
pattCASRacahC = re.compile(
    r"AILFT MATRIX ELEMENTS \(CASSCF\)\n[\s\S*]*?C\s+=\s+(.*) a\.u\. =\s+(.*)\s+eV\s+=\s+(.*)\s+cm\*\*-1")
pattCASC_B = re.compile(
    r"AILFT MATRIX ELEMENTS \(CASSCF\)\n[\s\S*]*?C/B\s+=\s+(.*)")

CASRacahA = []
CASRacahB = []
CASRacahC = []

dataparser(pattCASRacahA, CASRacahA, "float", 3)
dataparser(pattCASRacahB, CASRacahB, "float", 3)
dataparser(pattCASRacahC, CASRacahC, "float", 3)

### CASSCF d-orbitals

In [30]:
# Group 1 = eV, 2 = cm-1, 3 = dxy, 4 = dyz, 5 = dz2, 6 = dxz, 7 = dx2-y2
pattCASdOrb1 = re.compile(
    r"AILFT MATRIX ELEMENTS \(CASSCF\)\n[\s\S*]*?The ligand field one electron[\s\S*]*?\s\s\s\s1\s+(\S*)\s+(\S*)\s+(\S*)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)")
# Group 1 = eV, 2 = cm-1, 3 = dxy, 4 = dyz, 5 = dz2, 6 = dxz, 7 = dx2-y2
pattCASdOrb2 = re.compile(
    r"AILFT MATRIX ELEMENTS \(CASSCF\)\n[\s\S*]*?The ligand field one electron[\s\S*]*?\s\s\s\s2\s+(\S*)\s+(\S*)\s+(\S*)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)")
# Group 1 = eV, 2 = cm-1, 3 = dxy, 4 = dyz, 5 = dz2, 6 = dxz, 7 = dx2-y2
pattCASdOrb3 = re.compile(
    r"AILFT MATRIX ELEMENTS \(CASSCF\)\n[\s\S*]*?The ligand field one electron[\s\S*]*?\s\s\s\s3\s+(\S*)\s+(\S*)\s+(\S*)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)")
# Group 1 = eV, 2 = cm-1, 3 = dxy, 4 = dyz, 5 = dz2, 6 = dxz, 7 = dx2-y2
pattCASdOrb4 = re.compile(
    r"AILFT MATRIX ELEMENTS \(CASSCF\)\n[\s\S*]*?The ligand field one electron[\s\S*]*?\s\s\s\s4\s+(\S*)\s+(\S*)\s+(\S*)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)")
# Group 1 = eV, 2 = cm-1, 3 = dxy, 4 = dyz, 5 = dz2, 6 = dxz, 7 = dx2-y2
pattCASdOrb5 = re.compile(
    r"AILFT MATRIX ELEMENTS \(CASSCF\)\n[\s\S*]*?The ligand field one electron[\s\S*]*?\s\s\s\s5\s+(\S*)\s+(\S*)\s+(\S*)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)")


CASdOrb1eV = []
CASdOrb1cm = []
CASdOrb1xy = []
CASdOrb1yz = []
CASdOrb1z2 = []
CASdOrb1xz = []
CASdOrb1x2y2 = []

CASdOrb2eV = []
CASdOrb2cm = []
CASdOrb2xy = []
CASdOrb2yz = []
CASdOrb2z2 = []
CASdOrb2xz = []
CASdOrb2x2y2 = []

CASdOrb3eV = []
CASdOrb3cm = []
CASdOrb3xy = []
CASdOrb3yz = []
CASdOrb3z2 = []
CASdOrb3xz = []
CASdOrb3x2y2 = []

CASdOrb4eV = []
CASdOrb4cm = []
CASdOrb4xy = []
CASdOrb4yz = []
CASdOrb4z2 = []
CASdOrb4xz = []
CASdOrb4x2y2 = []

CASdOrb5eV = []
CASdOrb5cm = []
CASdOrb5xy = []
CASdOrb5yz = []
CASdOrb5z2 = []
CASdOrb5xz = []
CASdOrb5x2y2 = []

dataparser(pattCASdOrb1, CASdOrb1eV, "float", 1)
dataparser(pattCASdOrb1, CASdOrb1cm, "float", 2)
dataparser(pattCASdOrb1, CASdOrb1xy, "float", 3)
dataparser(pattCASdOrb1, CASdOrb1yz, "float", 4)
dataparser(pattCASdOrb1, CASdOrb1z2, "float", 5)
dataparser(pattCASdOrb1, CASdOrb1xz, "float", 6)
dataparser(pattCASdOrb1, CASdOrb1x2y2, "float", 7)

dataparser(pattCASdOrb2, CASdOrb2eV, "float", 1)
dataparser(pattCASdOrb2, CASdOrb2cm, "float", 2)
dataparser(pattCASdOrb2, CASdOrb2xy, "float", 3)
dataparser(pattCASdOrb2, CASdOrb2yz, "float", 4)
dataparser(pattCASdOrb2, CASdOrb2z2, "float", 5)
dataparser(pattCASdOrb2, CASdOrb2xz, "float", 6)
dataparser(pattCASdOrb2, CASdOrb2x2y2, "float", 7)

dataparser(pattCASdOrb3, CASdOrb3eV, "float", 1)
dataparser(pattCASdOrb3, CASdOrb3cm, "float", 2)
dataparser(pattCASdOrb3, CASdOrb3xy, "float", 3)
dataparser(pattCASdOrb3, CASdOrb3yz, "float", 4)
dataparser(pattCASdOrb3, CASdOrb3z2, "float", 5)
dataparser(pattCASdOrb3, CASdOrb3xz, "float", 6)
dataparser(pattCASdOrb3, CASdOrb3x2y2, "float", 7)

dataparser(pattCASdOrb4, CASdOrb4eV, "float", 1)
dataparser(pattCASdOrb4, CASdOrb4cm, "float", 2)
dataparser(pattCASdOrb4, CASdOrb4xy, "float", 3)
dataparser(pattCASdOrb4, CASdOrb4yz, "float", 4)
dataparser(pattCASdOrb4, CASdOrb4z2, "float", 5)
dataparser(pattCASdOrb4, CASdOrb4xz, "float", 6)
dataparser(pattCASdOrb4, CASdOrb4x2y2, "float", 7)

dataparser(pattCASdOrb5, CASdOrb5eV, "float", 1)
dataparser(pattCASdOrb5, CASdOrb5cm, "float", 2)
dataparser(pattCASdOrb5, CASdOrb5xy, "float", 3)
dataparser(pattCASdOrb5, CASdOrb5yz, "float", 4)
dataparser(pattCASdOrb5, CASdOrb5z2, "float", 5)
dataparser(pattCASdOrb5, CASdOrb5xz, "float", 6)
dataparser(pattCASdOrb5, CASdOrb5x2y2, "float", 7)

### CASSCF Spin-Orbit Coupling parameters and Zeta

In [31]:

# Group 1
pattSOCa = re.compile(
    r"SPIN ORBIT COUPLING \(based on CASSCF orbitals\)[\s\S*]*?a\s+=\s+(.*)")  # Group 1
# Group 1 = eV, 2 = cm-1
pattSOCb = re.compile(
    r"SPIN ORBIT COUPLING \(based on CASSCF orbitals\)[\s\S*]*?b\s+=\s+(\S*)\s+eV\s+=\s+(\S+)\s+cm\*\*-1")
# Group 1 = eV, 2 = cm-1
pattSOCZeta = re.compile(
    r"SPIN ORBIT COUPLING \(based on CASSCF orbitals\)[\s\S*]*?constant zeta\s+=\s+(\S+)\s+eV\s+=\s+(\S+)\s+cm\*\*-1")

CASSOCa = []
CASSOCb = []
CASSOCZeta = []

dataparser(pattSOCa, CASSOCa, "float", 1)
dataparser(pattSOCb, CASSOCb, "float", 2)
dataparser(pattSOCZeta, CASSOCZeta, "float", 2)

## NEVPT2

---

### NEVPT2 Transition energies

In [32]:
pattNEVPT2LowestRoot = re.compile(
    r"(( NEVPT2 TRANSITION ENERGIES\n-+\n\n)(LOWEST ROOT \(((.*),(.*))\) =\s+(.* Eh)\s+(.* eV)))")  # Grupos 5, 6 7
pattNEVPT2trans = re.compile(
    r"( NEVPT2 TRANSITION ENERGIES\n-+\n\nLOWEST ROOT.*\n\nSTATE ROOT MULT  DE/a\.u\.     DE/eV    DE/cm\**-1)([\s\S]*?)(\n\n-+\n NEVPT2 CORRECTION TO THE TRANSITION ENERGY)")  # group 2

NEVPT2LowestRoot = []
NEVPT2lowestRootsMult = []
NEVPT2lowestRootsEh = []

NEVPT2trans = []

dataparser(pattNEVPT2LowestRoot, NEVPT2LowestRoot, "string", 6)

for i in range(len(NEVPT2LowestRoot)):
    NEVPT2lowestRootsMult.append(int(NEVPT2LowestRoot[i].split()[1]))

NEVPT2LowestRoot = []  # Need to flush the list before adding new items

dataparser(pattNEVPT2LowestRoot, NEVPT2LowestRoot, "string", 7)

for i in range(len(NEVPT2LowestRoot)):
    NEVPT2lowestRootsEh.append(float(NEVPT2LowestRoot[i].split()[0]))

dataparser(pattNEVPT2trans, NEVPT2trans, "string", 2)

### NEVPT2 Spin-Orbit Coupling

#### NEVPT2 Lowest eigenvector

In [33]:

pattNEVPT2LowestEigen = re.compile(
    r"NEVPT2[\s\S]*?Lowest eigenvalue of the SOC matrix:\s+(.*) Eh")  # Group 1

SOCNEVPT2LowestEigenv = []  # Eh
SOCNEVPT2LowestEigenvEh = []


dataparser(pattNEVPT2LowestEigen, SOCNEVPT2LowestEigenv, "string", 1)

for i in range(len(SOCNEVPT2LowestEigenv)):
    SOCNEVPT2LowestEigenvEh.append(float(SOCNEVPT2LowestEigenv[i].split()[0]))


#### NEVPT2 Stabilization Energy

In [34]:
pattNEVPT2EnerStab = re.compile(
    r"QDPT WITH NEVPT2 DIAGONAL ENERGIES([\s\S]*?)Energy stabilization:\s+(.*)")  # Group 2

SOCNEVPT2EnergyStab = []  # Eh
SOCNEVPT2EnergyStabcm = []

dataparser(pattNEVPT2EnerStab, SOCNEVPT2EnergyStab, "string", 2)

for i in range(len(SOCNEVPT2EnergyStab)):
    SOCNEVPT2EnergyStabcm.append(float(SOCNEVPT2EnergyStab[i].split()[0]))

#### NEVPT2 Kramers

In [35]:
pattNEVPT2Kramers = re.compile(
    r"QDPT WITH NEVPT2 DIAGONAL ENERGIES([\s\S]*?)Eigenvalues:.*\n([\s\S]*?)The threshold for printing is 0.0100")  # Group 2


NEVPT2Kramers = []  # Lista

dataparser(pattNEVPT2Kramers, NEVPT2Kramers, "string", 2)

#### NEVPT2 SOC Spin eigenstates

In [36]:
pattNEVPT2SpinStates = re.compile(
    r"QDPT WITH NEVPT2 DIAGONAL ENERGIES[\s\S]*The threshold for printing is 0\.0100\nEigenvectors:\n.*([\s\S]*)Center of nuclear charge")  # Group 1

NEVPT2SpinStates = []  # Lista

dataparser(pattNEVPT2SpinStates, NEVPT2SpinStates, "string", 1)

### NEVPT2 Tensors

#### NEVPT2 2nd Order Perturbation Theory D

In [37]:
pattNEVPT22NDDTensor = re.compile(
    r"QDPT WITH NEVPT2 DIAGONAL ENERGIES[\s\S]+?2ND ORDER[\s\S]+?D\s+=\s+(.*) cm-1")  # Grupo 1

NEVPT22PTD = []

dataparser(pattNEVPT22NDDTensor, NEVPT22PTD, "float", 1)

#### NEVPT2 2nd Order Perturbation Theory E/D

In [38]:
pattNEVPT22NDE_D = re.compile(
    r"QDPT WITH NEVPT2 DIAGONAL ENERGIES[\s\S]+?2ND ORDER[\s\S]+?E\/D\s+=\s+(.*)")  # Grupo 1

NEVPT22PTD_E = []

dataparser(pattNEVPT22NDE_D, NEVPT22PTD_E, "float", 1)

#### NEVPT2 Effective Spin Hamiltonian Theory D

In [39]:
pattNEVPT2EFFDTensor = re.compile(
    r"QDPT WITH NEVPT2 DIAGONAL ENERGIES[\s\S]+?EFFECTIVE HAMILTONIAN SPIN-ORBIT COUPLING[\s\S]+?D\s+=\s+(.*) cm-1")  # Grupo 1

NEVPT2HSED = []

dataparser(pattNEVPT2EFFDTensor, NEVPT2HSED, "float", 1)

#### NEVPT2 Effective Spin Hamiltonian Theory E/D

In [40]:
pattNEVPT2EFFE_D = re.compile(
    r"QDPT WITH NEVPT2 DIAGONAL ENERGIES[\s\S]+?EFFECTIVE HAMILTONIAN SPIN-ORBIT COUPLING[\s\S]+?E\/D\s+=\s+(.*)")  # Grupo 1

NEVPT2HSED_E = []

dataparser(pattNEVPT2EFFE_D, NEVPT2HSED_E, "float", 1)

### NEVPT2 G-tensors

#### NEVPT2 gx, gy, gz and g iso

In [41]:
# Grupo 1 Gx. 2 Gy, 3 Gz, 4 Giso
pattNEVPT2GFactors = re.compile(
    r"QDPT WITH NEVPT2 DIAGONAL ENERGIES[\s\S]+?ELECTRONIC G-MATRIX FROM EFFECTIVE HAMILTONIAN[\s\S*]+?g-factors:\n\s+(\S+)\s+(\S+)\s+(\S+)\s+iso =\s+(\S+)")

NEVPT2gx = []
NEVPT2gy = []
NEVPT2gz = []
NEVPT2giso = []

dataparser(pattNEVPT2GFactors, NEVPT2gx, "float", 1)
dataparser(pattNEVPT2GFactors, NEVPT2gy, "float", 2)
dataparser(pattNEVPT2GFactors, NEVPT2gz, "float", 3)
dataparser(pattNEVPT2GFactors, NEVPT2giso, "float", 4)

## NEVPT2 AILFT Data

### NEVPT2 Slater-Condon parameters: F0dd, F2dd, F4dd

In [42]:
# Group 1 = au, 2 = eV, 3 = cm-1
pattNEVPT2F0dd = re.compile(
    r"AILFT MATRIX ELEMENTS \(NEVPT2\)\n[\s\S*]*?F0dd\s+=\s+(.*) a\.u\. =\s+(.*)\s+eV\s+=\s+(.*)\s+cm\*\*-1")
# Group 1 = au, 2 = eV, 3 = cm-1
pattNEVPT2F2dd = re.compile(
    r"AILFT MATRIX ELEMENTS \(NEVPT2\)\n[\s\S*]*?F2dd\s+=\s+(.*) a\.u\. =\s+(.*)\s+eV\s+=\s+(.*)\s+cm\*\*-1")
# Group 1 = au, 2 = eV, 3 = cm-1
pattNEVPT2F4dd = re.compile(
    r"AILFT MATRIX ELEMENTS \(NEVPT2\)\n[\s\S*]*?F4dd\s+=\s+(.*) a\.u\. =\s+(.*)\s+eV\s+=\s+(.*)\s+cm\*\*-1")

NEVPT2F0dd = []
NEVPT2F2dd = []
NEVPT2F4dd = []

dataparser(pattNEVPT2F0dd, NEVPT2F0dd, "float", 3)
dataparser(pattNEVPT2F2dd, NEVPT2F2dd, "float", 3)
dataparser(pattNEVPT2F4dd, NEVPT2F4dd, "float", 3)

### NEVPT2 Racah parameters

In [43]:
# Group 1 = au, 2 = eV, 3 = cm-1
pattNEVPT2RacahA = re.compile(
    r"AILFT MATRIX ELEMENTS \(NEVPT2\)\n[\s\S*]*?A\s+=\s+(.*) a\.u\. =\s+(.*)\s+eV\s+=\s+(.*)\s+cm\*\*-1")
# Group 1 = au, 2 = eV, 3 = cm-1
pattNEVPT2RacahB = re.compile(
    r"AILFT MATRIX ELEMENTS \(NEVPT2\)\n[\s\S*]*?B\s+=\s+(.*) a\.u\. =\s+(.*)\s+eV\s+=\s+(.*)\s+cm\*\*-1")
# Group 1 = au, 2 = eV, 3 = cm-1
pattNEVPT2RacahC = re.compile(
    r"AILFT MATRIX ELEMENTS \(NEVPT2\)\n[\s\S*]*?C\s+=\s+(.*) a\.u\. =\s+(.*)\s+eV\s+=\s+(.*)\s+cm\*\*-1")
pattNEVPT2C_B = re.compile(
    r"AILFT MATRIX ELEMENTS \(NEVPT2\)\n[\s\S*]*?C/B\s+=\s+(.*)")

NEVPT2RacahA = []
NEVPT2RacahB = []
NEVPT2RacahC = []
NEVPT2RacahCB = []

dataparser(pattNEVPT2RacahA, NEVPT2RacahA, "float", 3)
dataparser(pattNEVPT2RacahB, NEVPT2RacahB, "float", 3)
dataparser(pattNEVPT2RacahC, NEVPT2RacahC, "float", 3)
dataparser(pattNEVPT2C_B, NEVPT2RacahCB, "float", 1)

### NEVPT2 d-orbitals

In [44]:

# Group 1 = eV, 2 = cm-1, 3 = dxy, 4 = dyz, 5 = dz2, 6 = dxz, 7 = dx2-y2
pattNEVPT2dOrb1 = re.compile(
    r"AILFT MATRIX ELEMENTS \(NEVPT2\)\n[\s\S*]*?The ligand field one electron[\s\S*]*?\s\s\s\s1\s+(\S*)\s+(\S*)\s+(\S*)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)")
# Group 1 = eV, 2 = cm-1, 3 = dxy, 4 = dyz, 5 = dz2, 6 = dxz, 7 = dx2-y2
pattNEVPT2dOrb2 = re.compile(
    r"AILFT MATRIX ELEMENTS \(NEVPT2\)\n[\s\S*]*?The ligand field one electron[\s\S*]*?\s\s\s\s2\s+(\S*)\s+(\S*)\s+(\S*)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)")
# Group 1 = eV, 2 = cm-1, 3 = dxy, 4 = dyz, 5 = dz2, 6 = dxz, 7 = dx2-y2
pattNEVPT2dOrb3 = re.compile(
    r"AILFT MATRIX ELEMENTS \(NEVPT2\)\n[\s\S*]*?The ligand field one electron[\s\S*]*?\s\s\s\s3\s+(\S*)\s+(\S*)\s+(\S*)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)")
# Group 1 = eV, 2 = cm-1, 3 = dxy, 4 = dyz, 5 = dz2, 6 = dxz, 7 = dx2-y2
pattNEVPT2dOrb4 = re.compile(
    r"AILFT MATRIX ELEMENTS \(NEVPT2\)\n[\s\S*]*?The ligand field one electron[\s\S*]*?\s\s\s\s4\s+(\S*)\s+(\S*)\s+(\S*)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)")
# Group 1 = eV, 2 = cm-1, 3 = dxy, 4 = dyz, 5 = dz2, 6 = dxz, 7 = dx2-y2
pattNEVPT2dOrb5 = re.compile(
    r"AILFT MATRIX ELEMENTS \(NEVPT2\)\n[\s\S*]*?The ligand field one electron[\s\S*]*?\s\s\s\s5\s+(\S*)\s+(\S*)\s+(\S*)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)")

NEVPT2dOrb1eV = []
NEVPT2dOrb1cm = []
NEVPT2dOrb1xy = []
NEVPT2dOrb1yz = []
NEVPT2dOrb1z2 = []
NEVPT2dOrb1xz = []
NEVPT2dOrb1x2y2 = []

NEVPT2dOrb2eV = []
NEVPT2dOrb2cm = []
NEVPT2dOrb2xy = []
NEVPT2dOrb2yz = []
NEVPT2dOrb2z2 = []
NEVPT2dOrb2xz = []
NEVPT2dOrb2x2y2 = []

NEVPT2dOrb3eV = []
NEVPT2dOrb3cm = []
NEVPT2dOrb3xy = []
NEVPT2dOrb3yz = []
NEVPT2dOrb3z2 = []
NEVPT2dOrb3xz = []
NEVPT2dOrb3x2y2 = []

NEVPT2dOrb4eV = []
NEVPT2dOrb4cm = []
NEVPT2dOrb4xy = []
NEVPT2dOrb4yz = []
NEVPT2dOrb4z2 = []
NEVPT2dOrb4xz = []
NEVPT2dOrb4x2y2 = []

NEVPT2dOrb5eV = []
NEVPT2dOrb5cm = []
NEVPT2dOrb5xy = []
NEVPT2dOrb5yz = []
NEVPT2dOrb5z2 = []
NEVPT2dOrb5xz = []
NEVPT2dOrb5x2y2 = []

dataparser(pattNEVPT2dOrb1, NEVPT2dOrb1eV, "float", 1)
dataparser(pattNEVPT2dOrb1, NEVPT2dOrb1cm, "float",  2)
dataparser(pattNEVPT2dOrb1, NEVPT2dOrb1xy, "float",  3)
dataparser(pattNEVPT2dOrb1, NEVPT2dOrb1yz, "float",  4)
dataparser(pattNEVPT2dOrb1, NEVPT2dOrb1z2, "float",  5)
dataparser(pattNEVPT2dOrb1, NEVPT2dOrb1xz, "float",  6)
dataparser(pattNEVPT2dOrb1, NEVPT2dOrb1x2y2, "float",  7)

dataparser(pattNEVPT2dOrb2, NEVPT2dOrb2eV, "float",  1)
dataparser(pattNEVPT2dOrb2, NEVPT2dOrb2cm, "float",  2)
dataparser(pattNEVPT2dOrb2, NEVPT2dOrb2xy, "float",  3)
dataparser(pattNEVPT2dOrb2, NEVPT2dOrb2yz, "float",  4)
dataparser(pattNEVPT2dOrb2, NEVPT2dOrb2z2, "float",  5)
dataparser(pattNEVPT2dOrb2, NEVPT2dOrb2xz, "float",  6)
dataparser(pattNEVPT2dOrb2, NEVPT2dOrb2x2y2, "float",  7)

dataparser(pattNEVPT2dOrb3, NEVPT2dOrb3eV, "float",  1)
dataparser(pattNEVPT2dOrb3, NEVPT2dOrb3cm, "float",  2)
dataparser(pattNEVPT2dOrb3, NEVPT2dOrb3xy, "float",  3)
dataparser(pattNEVPT2dOrb3, NEVPT2dOrb3yz, "float",  4)
dataparser(pattNEVPT2dOrb3, NEVPT2dOrb3z2, "float",  5)
dataparser(pattNEVPT2dOrb3, NEVPT2dOrb3xz, "float",  6)
dataparser(pattNEVPT2dOrb3, NEVPT2dOrb3x2y2, "float",  7)

dataparser(pattNEVPT2dOrb4, NEVPT2dOrb4eV, "float",  1)
dataparser(pattNEVPT2dOrb4, NEVPT2dOrb4cm, "float",  2)
dataparser(pattNEVPT2dOrb4, NEVPT2dOrb4xy, "float",  3)
dataparser(pattNEVPT2dOrb4, NEVPT2dOrb4yz, "float",  4)
dataparser(pattNEVPT2dOrb4, NEVPT2dOrb4z2, "float",  5)
dataparser(pattNEVPT2dOrb4, NEVPT2dOrb4xz, "float",  6)
dataparser(pattNEVPT2dOrb4, NEVPT2dOrb4x2y2, "float",  7)

dataparser(pattNEVPT2dOrb5, NEVPT2dOrb5eV, "float",  1)
dataparser(pattNEVPT2dOrb5, NEVPT2dOrb5cm, "float",  2)
dataparser(pattNEVPT2dOrb5, NEVPT2dOrb5xy, "float",  3)
dataparser(pattNEVPT2dOrb5, NEVPT2dOrb5yz, "float",  4)
dataparser(pattNEVPT2dOrb5, NEVPT2dOrb5z2, "float",  5)
dataparser(pattNEVPT2dOrb5, NEVPT2dOrb5xz, "float",  6)
dataparser(pattNEVPT2dOrb5, NEVPT2dOrb5x2y2, "float",  7)

# Data engineering

---

In [45]:
parameters = [name_list,
              cartCoord,
              intCoord,
              coord_number_lst,
              bonds1,
              bonds2,
              angles,
              CASlowestRootsMult,
              CASlowestRootsEh,
              CAStrans,
              oneElecEnergyEh,
              twoElecEnergyEh,
              nucRepulsionEh,
              kinEnergyEh,
              potEnergyEh,
              virialRatioEh,
              coreEnergyEh,
              SOCCASLowestEigenvEh,
              SOCCASEnergyStabcm,
              CASKramers,
              CASSpinStates,
              CAS2PTD,
              CAS2PTD_E,
              CASHSED,
              CASHSED_E,
              CASgx,
              CASgy,
              CASgz,
              CASgiso,
              CASF0dd,
              CASF2dd,
              CASF4dd,
              CASRacahA,
              CASRacahB,
              CASRacahC,
              CASdOrb1eV,
              CASdOrb1cm,
              CASdOrb1xy,
              CASdOrb1yz,
              CASdOrb1z2,
              CASdOrb1xz,
              CASdOrb1x2y2,
              CASdOrb2eV,
              CASdOrb2cm,
              CASdOrb2xy,
              CASdOrb2yz,
              CASdOrb2z2,
              CASdOrb2xz,
              CASdOrb2x2y2,
              CASdOrb3eV,
              CASdOrb3cm,
              CASdOrb3xy,
              CASdOrb3yz,
              CASdOrb3z2,
              CASdOrb3xz,
              CASdOrb3x2y2,
              CASdOrb4eV,
              CASdOrb4cm,
              CASdOrb4xy,
              CASdOrb4yz,
              CASdOrb4z2,
              CASdOrb4xz,
              CASdOrb4x2y2,
              CASdOrb5eV,
              CASdOrb5cm,
              CASdOrb5xy,
              CASdOrb5yz,
              CASdOrb5z2,
              CASdOrb5xz,
              CASdOrb5x2y2,
              CASSOCa,
              CASSOCb,
              CASSOCZeta,
              NEVPT2lowestRootsMult,
              NEVPT2lowestRootsEh,
              NEVPT2trans,
              SOCNEVPT2LowestEigenvEh,
              SOCNEVPT2EnergyStabcm,
              NEVPT2Kramers,
              NEVPT2SpinStates,
              NEVPT22PTD,
              NEVPT22PTD_E,
              NEVPT2HSED,
              NEVPT2HSED_E,
              NEVPT2gx,
              NEVPT2gy,
              NEVPT2gz,
              NEVPT2giso,
              NEVPT2F0dd,
              NEVPT2F2dd,
              NEVPT2F4dd,
              NEVPT2RacahA,
              NEVPT2RacahB,
              NEVPT2RacahC,
              NEVPT2RacahCB,
              NEVPT2dOrb1eV,
              NEVPT2dOrb1cm,
              NEVPT2dOrb1xy,
              NEVPT2dOrb1yz,
              NEVPT2dOrb1z2,
              NEVPT2dOrb1xz,
              NEVPT2dOrb1x2y2,
              NEVPT2dOrb2eV,
              NEVPT2dOrb2cm,
              NEVPT2dOrb2xy,
              NEVPT2dOrb2yz,
              NEVPT2dOrb2z2,
              NEVPT2dOrb2xz,
              NEVPT2dOrb2x2y2,
              NEVPT2dOrb3eV,
              NEVPT2dOrb3cm,
              NEVPT2dOrb3xy,
              NEVPT2dOrb3yz,
              NEVPT2dOrb3z2,
              NEVPT2dOrb3xz,
              NEVPT2dOrb3x2y2,
              NEVPT2dOrb4eV,
              NEVPT2dOrb4cm,
              NEVPT2dOrb4xy,
              NEVPT2dOrb4yz,
              NEVPT2dOrb4z2,
              NEVPT2dOrb4xz,
              NEVPT2dOrb4x2y2,
              NEVPT2dOrb5eV,
              NEVPT2dOrb5cm,
              NEVPT2dOrb5xy,
              NEVPT2dOrb5yz,
              NEVPT2dOrb5z2,
              NEVPT2dOrb5xz,
              NEVPT2dOrb5x2y2
              ]

In [46]:
names = ["Structure name",
         "Cartesian coordinates",
         "Internal coordinates",
         "Coordination number",
         "Bond lengths 1",
         "Bond lengths 2",
         "Bond angles",
         "CAS root Mult",
         "CAS GS energy (Eh)",
         "CAS transition energies",
         "CAS 1 el energy (Eh)",
         "CAS 2 el energy (Eh)",
         "CAS nucl. repulsion (Eh)",
         "Kinetic energy (Eh)",
         "Potential energy (Eh)",
         "Virial ratio (Eh)",
         "Core energy (Eh)",
         "SOC CAS lowest eigenv. (Eh)",
         "SOC CAS stab. energy (cm-1)",
         "CAS Kramers",
         "CAS Ms states",
         "CAS 2PT D",
         "CAS 2PT E/D",
         "CAS Heff D",
         "CAS Heff E/D",
         "CAS gx",
         "CAS gy",
         "CAS gz",
         "CAS giso",
         "CAS F0dd",
         "CAS F2dd",
         "CAS F4dd",
         "CAS Racah A",
         "CAS Racah B",
         "CAS Racah C",
         "CAS d-orb1 (eV)",
         "CAS d-orb1 (cm-1)",
         "CAS d-orb1 (xy)",
         "CAS d-orb1 (yz)",
         "CAS d-orb1 (z2)",
         "CAS d-orb1 (xz)",
         "CAS d-orb1 (x2y2)",
         "CAS d-orb2 (eV)",
         "CAS d-orb2 (cm-1)",
         "CAS d-orb2 (xy)",
         "CAS d-orb2 (yz)",
         "CAS d-orb2 (z2)",
         "CAS d-orb2 (xz)",
         "CAS d-orb2 (x2y2)",
         "CAS d-orb3 (eV)",
         "CAS d-orb3 (cm-1)",
         "CAS d-orb3 (xy)",
         "CAS d-orb3 (yz)",
         "CAS d-orb3 (z2)",
         "CAS d-orb3 (xz)",
         "CAS d-orb3 (x2y2)",
         "CAS d-orb4 (eV)",
         "CAS d-orb4 (cm-1)",
         "CAS d-orb4 (xy)",
         "CAS d-orb4 (yz)",
         "CAS d-orb4 (z2)",
         "CAS d-orb4 (xz)",
         "CAS d-orb4 (x2y2)",
         "CAS d-orb5 (eV)",
         "CAS d-orb5 (cm-1)",
         "CAS d-orb5 (xy)",
         "CAS d-orb5 (yz)",
         "CAS d-orb5 (z2)",
         "CAS d-orb5 (xz)",
         "CAS d-orb5 (x2y2)",
         "CAS SOC a",
         "CAS SOC b",
         "CAS SOC Zeta",
         "NEVPT2 root Mult",
         "NEVPT2 GS energy (Eh)",
         "NEVPT2 transition energies",
         "SOC NEVPT2 lowest eigenv. (Eh)",
         "SOC NEVPT2 stab. energy (cm-1)",
         "NEVPT2 Kramers",
         "NEVPT2 Ms states",
         "NEVPT2 2PT D",
         "NEVPT2 2PT E/D",
         "NEVPT2 Heff D",
         "NEVPT2 Heff E/D",
         "NEVPT2 gx",
         "NEVPT2 gy",
         "NEVPT2 gz",
         "NEVPT2 giso",
         "NEVPT2 F0dd",
         "NEVPT2 F2dd",
         "NEVPT2 F4dd",
         "NEVPT2 Racah A",
         "NEVPT2 Racah B",
         "NEVPT2 Racah C",
         "NEVPT2 Racah C/B",
         "NEVPT2 d-orb1 (eV)",
         "NEVPT2 d-orb1 (cm-1)",
         "NEVPT2 d-orb1 (xy)",
         "NEVPT2 d-orb1 (yz)",
         "NEVPT2 d-orb1 (z2)",
         "NEVPT2 d-orb1 (xz)",
         "NEVPT2 d-orb1 (x2y2)",
         "NEVPT2 d-orb2 (eV)",
         "NEVPT2 d-orb2 (cm-1)",
         "NEVPT2 d-orb2 (xy)",
         "NEVPT2 d-orb2 (yz)",
         "NEVPT2 d-orb2 (z2)",
         "NEVPT2 d-orb2 (xz)",
         "NEVPT2 d-orb2 (x2y2)",
         "NEVPT2 d-orb3 (eV)",
         "NEVPT2 d-orb3 (cm-1)",
         "NEVPT2 d-orb3 (xy)",
         "NEVPT2 d-orb3 (yz)",
         "NEVPT2 d-orb3 (z2)",
         "NEVPT2 d-orb3 (xz)",
         "NEVPT2 d-orb3 (x2y2)",
         "NEVPT2 d-orb4 (eV)",
         "NEVPT2 d-orb4 (cm-1)",
         "NEVPT2 d-orb4 (xy)",
         "NEVPT2 d-orb4 (yz)",
         "NEVPT2 d-orb4 (z2)",
         "NEVPT2 d-orb4 (xz)",
         "NEVPT2 d-orb4 (x2y2)",
         "NEVPT2 d-orb5 (eV)",
         "NEVPT2 d-orb5 (cm-1)",
         "NEVPT2 d-orb5 (xy)",
         "NEVPT2 d-orb5 (yz)",
         "NEVPT2 d-orb5 (z2)",
         "NEVPT2 d-orb5 (xz)",
         "NEVPT2 d-orb5 (x2y2)"
         ]

## Numerical data

In [47]:
numeric = [
         "Coordination number",
         "Bond lengths 1",
         "Bond lengths 2",
         "Bond angles",
         "CAS root Mult",
         "CAS GS energy (Eh)",
         "CAS 1 el energy (Eh)",
         "CAS 2 el energy (Eh)",
         "CAS nucl. repulsion (Eh)",
         "Kinetic energy (Eh)",
         "Potential energy (Eh)",
         "Virial ratio (Eh)",
         "Core energy (Eh)",
         "SOC CAS lowest eigenv. (Eh)",
         "SOC CAS stab. energy (cm-1)",
         "CAS 2PT D",
         "CAS 2PT E/D",
         "CAS Heff D",
         "CAS Heff E/D",
         "CAS gx",
         "CAS gy",
         "CAS gz",
         "CAS giso",
         "CAS F0dd",
         "CAS F2dd",
         "CAS F4dd",
         "CAS Racah A",
         "CAS Racah B",
         "CAS Racah C",
         "CAS d-orb1 (eV)",
         "CAS d-orb1 (cm-1)",
         "CAS d-orb1 (xy)",
         "CAS d-orb1 (yz)",
         "CAS d-orb1 (z2)",
         "CAS d-orb1 (xz)",
         "CAS d-orb1 (x2y2)",
         "CAS d-orb2 (eV)",
         "CAS d-orb2 (cm-1)",
         "CAS d-orb2 (xy)",
         "CAS d-orb2 (yz)",
         "CAS d-orb2 (z2)",
         "CAS d-orb2 (xz)",
         "CAS d-orb2 (x2y2)",
         "CAS d-orb3 (eV)",
         "CAS d-orb3 (cm-1)",
         "CAS d-orb3 (xy)",
         "CAS d-orb3 (yz)",
         "CAS d-orb3 (z2)",
         "CAS d-orb3 (xz)",
         "CAS d-orb3 (x2y2)",
         "CAS d-orb4 (eV)",
         "CAS d-orb4 (cm-1)",
         "CAS d-orb4 (xy)",
         "CAS d-orb4 (yz)",
         "CAS d-orb4 (z2)",
         "CAS d-orb4 (xz)",
         "CAS d-orb4 (x2y2)",
         "CAS d-orb5 (eV)",
         "CAS d-orb5 (cm-1)",
         "CAS d-orb5 (xy)",
         "CAS d-orb5 (yz)",
         "CAS d-orb5 (z2)",
         "CAS d-orb5 (xz)",
         "CAS d-orb5 (x2y2)",
         "CAS SOC a",
         "CAS SOC b",
         "CAS SOC Zeta",
         "NEVPT2 root Mult",
         "NEVPT2 GS energy (Eh)",
         "SOC NEVPT2 lowest eigenv. (Eh)",
         "SOC NEVPT2 stab. energy (cm-1)",
         "NEVPT2 2PT D",
         "NEVPT2 2PT E/D",
         "NEVPT2 Heff D",
         "NEVPT2 Heff E/D",
         "NEVPT2 gx",
         "NEVPT2 gy",
         "NEVPT2 gz",
         "NEVPT2 giso",
         "NEVPT2 F0dd",
         "NEVPT2 F2dd",
         "NEVPT2 F4dd",
         "NEVPT2 Racah A",
         "NEVPT2 Racah B",
         "NEVPT2 Racah C",
         "NEVPT2 Racah C/B",
         "NEVPT2 d-orb1 (eV)",
         "NEVPT2 d-orb1 (cm-1)",
         "NEVPT2 d-orb1 (xy)",
         "NEVPT2 d-orb1 (yz)",
         "NEVPT2 d-orb1 (z2)",
         "NEVPT2 d-orb1 (xz)",
         "NEVPT2 d-orb1 (x2y2)",
         "NEVPT2 d-orb2 (eV)",
         "NEVPT2 d-orb2 (cm-1)",
         "NEVPT2 d-orb2 (xy)",
         "NEVPT2 d-orb2 (yz)",
         "NEVPT2 d-orb2 (z2)",
         "NEVPT2 d-orb2 (xz)",
         "NEVPT2 d-orb2 (x2y2)",
         "NEVPT2 d-orb3 (eV)",
         "NEVPT2 d-orb3 (cm-1)",
         "NEVPT2 d-orb3 (xy)",
         "NEVPT2 d-orb3 (yz)",
         "NEVPT2 d-orb3 (z2)",
         "NEVPT2 d-orb3 (xz)",
         "NEVPT2 d-orb3 (x2y2)",
         "NEVPT2 d-orb4 (eV)",
         "NEVPT2 d-orb4 (cm-1)",
         "NEVPT2 d-orb4 (xy)",
         "NEVPT2 d-orb4 (yz)",
         "NEVPT2 d-orb4 (z2)",
         "NEVPT2 d-orb4 (xz)",
         "NEVPT2 d-orb4 (x2y2)",
         "NEVPT2 d-orb5 (eV)",
         "NEVPT2 d-orb5 (cm-1)",
         "NEVPT2 d-orb5 (xy)",
         "NEVPT2 d-orb5 (yz)",
         "NEVPT2 d-orb5 (z2)",
         "NEVPT2 d-orb5 (xz)",
         "NEVPT2 d-orb5 (x2y2)"
         ]

## Categorical data

In [48]:
categ = ["Structure name",
         "Cartesian coordinates",
         "Internal coordinates",
         "CAS transition energies",
         "CAS Kramers",
         "CAS Ms states",
         "NEVPT2 transition energies",
         "NEVPT2 Kramers",
         "NEVPT2 Ms states",
         ]

### DataFrame creation and type setting

In [49]:
# DataFrame Creation
df = pd.DataFrame(parameters)
df = df.transpose()
df.columns = names

In [50]:
df[numeric] = df[numeric].astype(float)
df[categ] = df[categ].astype(str)

# Data Selection

## CAS Only

In [51]:
names_cas = ["Structure name",
         "Cartesian coordinates",
         "Internal coordinates",
         "Coordination number",
         "Bond lengths 1",
         "Bond lengths 2",
         "Bond angles",
         "CAS root Mult",
         "CAS GS energy (Eh)",
         "CAS transition energies",
         "CAS 1 el energy (Eh)",
         "CAS 2 el energy (Eh)",
         "CAS nucl. repulsion (Eh)",
         "Kinetic energy (Eh)",
         "Potential energy (Eh)",
         "Virial ratio (Eh)",
         "Core energy (Eh)",
         "SOC CAS lowest eigenv. (Eh)",
         "SOC CAS stab. energy (cm-1)",
         "CAS Kramers", "CAS Ms states",
         "CAS 2PT D",
         "CAS 2PT E/D",
         "CAS Heff D",
         "CAS Heff E/D",
         "CAS gx",
         "CAS gy",
         "CAS gz",
         "CAS giso",
         "CAS F0dd",
         "CAS F2dd",
         "CAS F4dd",
         "CAS Racah A",
         "CAS Racah B",
         "CAS Racah C",
         "CAS d-orb1 (eV)",
         "CAS d-orb1 (cm-1)",
         "CAS d-orb1 (xy)",
         "CAS d-orb1 (yz)",
         "CAS d-orb1 (z2)",
         "CAS d-orb1 (xz)",
         "CAS d-orb1 (x2y2)",
         "CAS d-orb2 (eV)",
         "CAS d-orb2 (cm-1)",
         "CAS d-orb2 (xy)",
         "CAS d-orb2 (yz)",
         "CAS d-orb2 (z2)",
         "CAS d-orb2 (xz)",
         "CAS d-orb2 (x2y2)",
         "CAS d-orb3 (eV)",
         "CAS d-orb3 (cm-1)",
         "CAS d-orb3 (xy)",
         "CAS d-orb3 (yz)",
         "CAS d-orb3 (z2)",
         "CAS d-orb3 (xz)",
         "CAS d-orb3 (x2y2)",
         "CAS d-orb4 (eV)",
         "CAS d-orb4 (cm-1)",
         "CAS d-orb4 (xy)",
         "CAS d-orb4 (yz)",
         "CAS d-orb4 (z2)",
         "CAS d-orb4 (xz)",
         "CAS d-orb4 (x2y2)",
         "CAS d-orb5 (eV)",
         "CAS d-orb5 (cm-1)",
         "CAS d-orb5 (xy)",
         "CAS d-orb5 (yz)",
         "CAS d-orb5 (z2)",
         "CAS d-orb5 (xz)",
         "CAS d-orb5 (x2y2)",
         "CAS SOC a",
         "CAS SOC b",
         "CAS SOC Zeta"
         ]

In [52]:
cas_numeric = ["Coordination number",
         "Bond lengths 1",
         "Bond lengths 2",
         "Bond angles",
         "CAS root Mult",
         "CAS GS energy (Eh)",
         "CAS 1 el energy (Eh)",
         "CAS 2 el energy (Eh)",
         "CAS nucl. repulsion (Eh)",
         "Kinetic energy (Eh)",
         "Potential energy (Eh)",
         "Virial ratio (Eh)",
         "Core energy (Eh)",
         "SOC CAS lowest eigenv. (Eh)",
         "SOC CAS stab. energy (cm-1)",
         "CAS 2PT D",
         "CAS 2PT E/D",
         "CAS Heff D",
         "CAS Heff E/D",
         "CAS gx",
         "CAS gy",
         "CAS gz",
         "CAS giso",
         "CAS F0dd",
         "CAS F2dd",
         "CAS F4dd",
         "CAS Racah A",
         "CAS Racah B",
         "CAS Racah C",
         "CAS d-orb1 (eV)",
         "CAS d-orb1 (cm-1)",
         "CAS d-orb1 (xy)",
         "CAS d-orb1 (yz)",
         "CAS d-orb1 (z2)",
         "CAS d-orb1 (xz)",
         "CAS d-orb1 (x2y2)",
         "CAS d-orb2 (eV)",
         "CAS d-orb2 (cm-1)",
         "CAS d-orb2 (xy)",
         "CAS d-orb2 (yz)",
         "CAS d-orb2 (z2)",
         "CAS d-orb2 (xz)",
         "CAS d-orb2 (x2y2)",
         "CAS d-orb3 (eV)",
         "CAS d-orb3 (cm-1)",
         "CAS d-orb3 (xy)",
         "CAS d-orb3 (yz)",
         "CAS d-orb3 (z2)",
         "CAS d-orb3 (xz)",
         "CAS d-orb3 (x2y2)",
         "CAS d-orb4 (eV)",
         "CAS d-orb4 (cm-1)",
         "CAS d-orb4 (xy)",
         "CAS d-orb4 (yz)",
         "CAS d-orb4 (z2)",
         "CAS d-orb4 (xz)",
         "CAS d-orb4 (x2y2)",
         "CAS d-orb5 (eV)",
         "CAS d-orb5 (cm-1)",
         "CAS d-orb5 (xy)",
         "CAS d-orb5 (yz)",
         "CAS d-orb5 (z2)",
         "CAS d-orb5 (xz)",
         "CAS d-orb5 (x2y2)",
         "CAS SOC a",
         "CAS SOC b",
         "CAS SOC Zeta"
         ]

In [53]:
cas_numeric_nod = ["Coordination number",
         "Bond lengths 1",
         "Bond lengths 2",
         "Bond angles",
         "CAS root Mult",
         "CAS GS energy (Eh)",
         "CAS 1 el energy (Eh)",
         "CAS 2 el energy (Eh)",
         "CAS nucl. repulsion (Eh)",
         "Kinetic energy (Eh)",
         "Potential energy (Eh)",
         "Virial ratio (Eh)",
         "Core energy (Eh)",
         "SOC CAS lowest eigenv. (Eh)",
         "SOC CAS stab. energy (cm-1)",
         "CAS 2PT D",
         "CAS 2PT E/D",
         "CAS Heff D",
         "CAS Heff E/D",
         "CAS gx",
         "CAS gy",
         "CAS gz",
         "CAS giso",
         "CAS F0dd",
         "CAS F2dd",
         "CAS F4dd",
         "CAS Racah A",
         "CAS Racah B",
         "CAS Racah C",
         "CAS d-orb1 (cm-1)",
         "CAS d-orb2 (cm-1)",
         "CAS d-orb3 (cm-1)",
         "CAS d-orb4 (cm-1)",
         "CAS d-orb5 (cm-1)",
         "CAS SOC a",
         "CAS SOC b",
         "CAS SOC Zeta"
         ]

## NEVPT2 Only

In [54]:
names_nev = ["Structure name",
             "Cartesian coordinates",
         "Internal coordinates",
         "Coordination number",
         "Bond lengths 1",
         "Bond lengths 2",
         "Bond angles",
         "NEVPT2 root Mult",
         "NEVPT2 GS energy (Eh)",
         "NEVPT2 transition energies",
         "SOC NEVPT2 lowest eigenv. (Eh)",
         "SOC NEVPT2 stab. energy (cm-1)",
         "NEVPT2 Kramers",
         "NEVPT2 Ms states",
         "NEVPT2 2PT D",
         "NEVPT2 2PT E/D",
         "NEVPT2 Heff D",
         "NEVPT2 Heff E/D",
         "NEVPT2 gx",
         "NEVPT2 gy",
         "NEVPT2 gz",
         "NEVPT2 giso",
         "NEVPT2 F0dd",
         "NEVPT2 F2dd",
         "NEVPT2 F4dd",
         "NEVPT2 Racah A",
         "NEVPT2 Racah B",
         "NEVPT2 Racah C",
         "NEVPT2 Racah C/B",
         "NEVPT2 d-orb1 (eV)",
         "NEVPT2 d-orb1 (cm-1)",
         "NEVPT2 d-orb1 (xy)",
         "NEVPT2 d-orb1 (yz)",
         "NEVPT2 d-orb1 (z2)",
         "NEVPT2 d-orb1 (xz)",
         "NEVPT2 d-orb1 (x2y2)",
         "NEVPT2 d-orb2 (eV)",
         "NEVPT2 d-orb2 (cm-1)",
         "NEVPT2 d-orb2 (xy)",
         "NEVPT2 d-orb2 (yz)",
         "NEVPT2 d-orb2 (z2)",
         "NEVPT2 d-orb2 (xz)",
         "NEVPT2 d-orb2 (x2y2)",
         "NEVPT2 d-orb3 (eV)",
         "NEVPT2 d-orb3 (cm-1)",
         "NEVPT2 d-orb3 (xy)",
         "NEVPT2 d-orb3 (yz)",
         "NEVPT2 d-orb3 (z2)",
         "NEVPT2 d-orb3 (xz)",
         "NEVPT2 d-orb3 (x2y2)",
         "NEVPT2 d-orb4 (eV)",
         "NEVPT2 d-orb4 (cm-1)",
         "NEVPT2 d-orb4 (xy)",
         "NEVPT2 d-orb4 (yz)",
         "NEVPT2 d-orb4 (z2)",
         "NEVPT2 d-orb4 (xz)",
         "NEVPT2 d-orb4 (x2y2)",
         "NEVPT2 d-orb5 (eV)",
         "NEVPT2 d-orb5 (cm-1)",
         "NEVPT2 d-orb5 (xy)",
         "NEVPT2 d-orb5 (yz)",
         "NEVPT2 d-orb5 (z2)",
         "NEVPT2 d-orb5 (xz)",
         "NEVPT2 d-orb5 (x2y2)"
         ]

In [55]:
nev_numeric = [
         "Coordination number",
         "Bond lengths 1",
         "Bond lengths 2",
         "Bond angles",
         "NEVPT2 root Mult",
         "NEVPT2 GS energy (Eh)",
         "SOC NEVPT2 lowest eigenv. (Eh)",
         "SOC NEVPT2 stab. energy (cm-1)",
         "NEVPT2 2PT D",
         "NEVPT2 2PT E/D",
         "NEVPT2 Heff D",
         "NEVPT2 Heff E/D",
         "NEVPT2 gx",
         "NEVPT2 gy",
         "NEVPT2 gz",
         "NEVPT2 giso",
         "NEVPT2 F0dd",
         "NEVPT2 F2dd",
         "NEVPT2 F4dd",
         "NEVPT2 Racah A",
         "NEVPT2 Racah B",
         "NEVPT2 Racah C",
         "NEVPT2 Racah C/B",
         "NEVPT2 d-orb1 (cm-1)",
         "NEVPT2 d-orb1 (xy)",
         "NEVPT2 d-orb1 (yz)",
         "NEVPT2 d-orb1 (z2)",
         "NEVPT2 d-orb1 (xz)",
         "NEVPT2 d-orb1 (x2y2)",
         "NEVPT2 d-orb2 (cm-1)",
         "NEVPT2 d-orb2 (xy)",
         "NEVPT2 d-orb2 (yz)",
         "NEVPT2 d-orb2 (z2)",
         "NEVPT2 d-orb2 (xz)",
         "NEVPT2 d-orb2 (x2y2)",
         "NEVPT2 d-orb3 (cm-1)",
         "NEVPT2 d-orb3 (xy)",
         "NEVPT2 d-orb3 (yz)",
         "NEVPT2 d-orb3 (z2)",
         "NEVPT2 d-orb3 (xz)",
         "NEVPT2 d-orb3 (x2y2)",
         "NEVPT2 d-orb4 (cm-1)",
         "NEVPT2 d-orb4 (xy)",
         "NEVPT2 d-orb4 (yz)",
         "NEVPT2 d-orb4 (z2)",
         "NEVPT2 d-orb4 (xz)",
         "NEVPT2 d-orb4 (x2y2)",
         "NEVPT2 d-orb5 (cm-1)",
         "NEVPT2 d-orb5 (xy)",
         "NEVPT2 d-orb5 (yz)",
         "NEVPT2 d-orb5 (z2)",
         "NEVPT2 d-orb5 (xz)",
         "NEVPT2 d-orb5 (x2y2)"
         ]

In [56]:
nev_numeric_nod = [
         "Coordination number",
         "Bond lengths 1",
         "Bond lengths 2",
         "Bond angles",
         "NEVPT2 root Mult",
         "NEVPT2 GS energy (Eh)",
         "SOC NEVPT2 lowest eigenv. (Eh)",
         "SOC NEVPT2 stab. energy (cm-1)",
         "NEVPT2 2PT D",
         "NEVPT2 2PT E/D",
         "NEVPT2 Heff D",
         "NEVPT2 Heff E/D",
         "NEVPT2 gx",
         "NEVPT2 gy",
         "NEVPT2 gz",
         "NEVPT2 giso",
         "NEVPT2 F0dd",
         "NEVPT2 F2dd",
         "NEVPT2 F4dd",
         "NEVPT2 Racah A",
         "NEVPT2 Racah B",
         "NEVPT2 Racah C",
         "NEVPT2 Racah C/B",
         "NEVPT2 d-orb1 (cm-1)",
         "NEVPT2 d-orb2 (cm-1)",
         "NEVPT2 d-orb3 (cm-1)",
         "NEVPT2 d-orb4 (cm-1)",
         "NEVPT2 d-orb5 (cm-1)"
         ]

In [57]:
df.to_csv(result_path)
df.to_csv(save_to_global, mode="a", header=False)
df.to_pickle(save_to_pickle)

In [58]:
df

Unnamed: 0,Structure name,Cartesian coordinates,Internal coordinates,Coordination number,Bond lengths 1,Bond lengths 2,Bond angles,CAS root Mult,CAS GS energy (Eh),CAS transition energies,...,NEVPT2 d-orb4 (z2),NEVPT2 d-orb4 (xz),NEVPT2 d-orb4 (x2y2),NEVPT2 d-orb5 (eV),NEVPT2 d-orb5 (cm-1),NEVPT2 d-orb5 (xy),NEVPT2 d-orb5 (yz),NEVPT2 d-orb5 (z2),NEVPT2 d-orb5 (xz),NEVPT2 d-orb5 (x2y2)
0,CoOO,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,2.0,1.8,1.8,180.0,4.0,-1532.920533,1: 1 4 0.000032 0.001 7.1\...,...,,,,,,,,,,
1,CoOO,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,2.0,1.8,1.9,180.0,4.0,-1532.927177,1: 1 4 0.000022 0.001 4.8\...,...,,,,,,,,,,
2,CoOO,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,2.0,1.8,2.0,180.0,4.0,-1532.927659,1: 1 4 0.000015 0.000 3.4\...,...,,,,,,,,,,
3,CoOO,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,2.0,1.8,2.1,180.0,4.0,-1532.924461,1: 1 4 0.000013 0.000 2.8\...,...,,,,,,,,,,
4,CoOO,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,2.0,1.8,2.2,180.0,4.0,-1532.919167,1: 1 4 0.000013 0.000 2.8\...,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2495,CoOO,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,2.0,2.7,2.3,60.0,4.0,-1532.846109,1: 1 4 0.000153 0.004 33.5\...,...,,,,,,,,,,
2496,CoOO,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,2.0,2.7,2.4,60.0,4.0,-1532.840809,1: 1 4 0.000125 0.003 27.4\...,...,,,,,,,,,,
2497,CoOO,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,2.0,2.7,2.5,60.0,4.0,-1532.835551,1: 1 4 0.000109 0.003 23.9\...,...,,,,,,,,,,
2498,CoOO,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,2.0,2.7,2.6,60.0,4.0,-1532.830556,1: 1 4 0.000102 0.003 22.4\...,...,,,,,,,,,,


# DFT Parser

In [83]:
import re
import pandas as pd

In [2]:
input_path = "/home/henrique/Coding/PhDUFF/Parser/CoOOO.out"

# Store the input in a variable and get rid of the "No D-tensor"
with open(input_path, "r") as inp:
    input_file = inp.read()

In [86]:
# Function to fill the lists


def dataparser(my_pattern, list_name, type_of_data="string", g_number=0):
    """This function is used to parse the outputs with regex to a readable format and, optionally, to a pandas object.

    Args:
        my_pattern (string): Here goes the pattern compiled to be used by RegExp
        list_name (variable): Name of the empty list to be filled with data
        type_of_data (string, optional): Select if the data is string or float.
        Defaults to None.
        Options = "string", "float"
        g_number (int, optional): The group number taked from the re.object. Defaults to 0.
    """
    if type_of_data == "string":
        mo = my_pattern.finditer(input_file, re.MULTILINE)
        for i in mo:
            list_name.append(str(i.group(g_number)))
    else:
        mo = my_pattern.finditer(input_file, re.MULTILINE)
        for i in mo:
            list_name.append(float(i.group(g_number)))

In [87]:
# Generate pattern
pattCartCoord = re.compile(
    r"(CARTESIAN COORDINATES \(ANGSTROEM\)\n-+\n)(\s\s.*?)(\n\n-+)", re.DOTALL)  # Group 2

# Generate pattern
pattIntCoord = re.compile(
    r"INTERNAL COORDINATES \(ANGSTROEM\)\n-+\n(\s.*?)()\n\n-+", re.DOTALL)  # Group 1

# Create empty list
cartCoord = []

# Run the function and fill the list
dataparser(pattCartCoord, cartCoord, "string", 2)

# Create empty list
intCoord = []

dataparser(pattIntCoord, intCoord, "string", 1)

In [98]:
pattSpinDeviation = re.compile(r"Deviation\s+:\s+(\d+\D\d+)", re.DOTALL) # Group 1

mo = pattSpinDeviation.finditer(input_file, re.MULTILINE)

spinDeviation = []

for i in mo:
    spinDeviation.append(float(i.group(1)))

In [94]:
pattSOMOLUMO = re.compile(r"\s+\d+\(\s+1\)[\s\S]*?(\(\s0\).*eV)")

mo = pattSOMOLUMO.finditer(input_file, re.MULTILINE)

SOMOLUMO = []

for i in mo:
#    print(i.group(0))
    SOMOLUMO.append(i.group(0))

SOMOLUMOlist = []
SOMOLUMObuffer = []

for i in range(len(SOMOLUMO)):
    for j in range(1, 5):
        SOMOLUMObuffer.append(float(SOMOLUMO[i].split("\n")[j].split()[3]))
    SOMOLUMOlist.append(SOMOLUMObuffer)
    SOMOLUMObuffer = []
    
SOMO_LUMOgap = []
SOMO1_LUMOgap = []
SOMO2_LUMOgap = []

for i in SOMOLUMOlist:
    SOMO_LUMOgap.append(i[3] - i[2])
    SOMO1_LUMOgap.append(i[3] - i[1])
    SOMO2_LUMOgap.append(i[3] - i[0])

In [95]:
pattDipoleMoment = re.compile(r"Total Dipole Moment\s+:\s+(-?\d+.\d+)\s+(-?\d+.\d+)\s+(-?\d+.\d+)", re.DOTALL)

mo = pattDipoleMoment.finditer(input_file)

dipoleMomentX = []
dipoleMomentY = []
dipoleMomentZ = []

for i in mo:
    dipoleMomentX.append(float(i.group(1)))
    dipoleMomentY.append(float(i.group(2)))
    dipoleMomentZ.append(float(i.group(3)))

In [96]:
pattDtensorDFT = re.compile(r"D\s\s\s=\s+(-?\d+.\d+)", re.DOTALL)

mo = pattDtensorDFT.finditer(input_file)

DtensorDFT = []

for i in mo:
    DtensorDFT.append(float(i.group(1)))

In [102]:
pametersDFT = [cartCoord, intCoord, spinDeviation, SOMO_LUMOgap, SOMO1_LUMOgap, SOMO2_LUMOgap, dipoleMomentX, dipoleMomentY, dipoleMomentZ, DtensorDFT]

In [117]:
dftNames = ["DFT cart. coord",
            "DFT int coord",
            "Spin deviation",
            "SOMO-LUMO gap (a.u)",
            "SOMO-1-LUMO gap",
            "SOMO-2-LUMO gap",
           "Dipole moment X",
           "Dipole moment Y",
           "Dipole moment Z",
           "DFT D"]

In [114]:
df_dft = pd.DataFrame(pametersDFT)

In [115]:
df_dft = df_dft.transpose()

In [118]:
df_dft.columns = dftNames

In [119]:
df_dft

Unnamed: 0,DFT cart. coord,DFT int coord,Spin deviation,SOMO-LUMO gap,SOMO-1-LUMO gap,SOMO-2-LUMO gap,Dipole moment X,Dipole moment Y,Dipole moment Z,DFT D
0,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,0.002741,0.754769,0.799982,0.818617,0.73737,0.00101,0.00022,12.4592
1,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,0.002728,0.759317,0.801882,0.820084,0.73798,0.02313,-0.14678,11.572
2,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,0.00271,0.771684,0.807258,0.825151,0.74018,0.07353,-0.27773,10.3318
3,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,0.004035,0.808651,0.822278,0.828396,0.75421,0.10964,-0.33578,8.73293
4,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,0.00365,0.815905,0.82606,0.840039,0.74436,0.20193,-0.42857,9.40897
...,...,...,...,...,...,...,...,...,...,...
95,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,0.002513,0.786829,0.805835,0.825885,-0.23576,0.15914,-0.33938,5.35872
96,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,0.002523,0.789995,0.804919,0.827778,-0.21066,0.21324,-0.36572,4.22296
97,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,0.002544,0.792762,0.804507,0.829741,-0.18043,0.26644,-0.37303,-3.38417
98,Co 0.000000 0.000000 0.000000\n O...,Co 0 0 0 0.000000000000 0.000...,0.002603,0.794674,0.804031,0.830394,-0.14028,0.30627,-0.36514,-3.42954
