In [1]:
import re

In [None]:
def parse_blocks(data):
    
    info = {}
    
    if '$XVEC' in data:

        block = data.split('$XVEC\n')[1].split('$END\n')[0]
        lines = block.strip().split('\n')

        excited_state_energy = []
        excited_state_vector = []

    # Regular expression patterns
    state_pattern = re.compile(r'STATE #\s+(\d+)\s+ENERGY =\s+([-+]?\d+\.\d*(?:E[-+]?\d+)?)')
    vector_pattern = re.compile(r'[-+]?\d+\.\d*(?:E[-+]?\d+)?')

    for line in lines:
        state_match = state_pattern.search(line)
        if state_match:
            if current_state_vector:  # save previous state's vector before starting a new one
                state_data[-1]['vector'] = current_state_vector
                current_state_vector = []
            state_num = int(state_match.group(1))
            energy = float(state_match.group(2))
            state_data.append({'state_num': state_num, 'energy': energy})
        else:
            vector_parts = vector_pattern.findall(line)
            current_state_vector.extend([float(i) for i in vector_parts])

    if current_state_vector:  # save last state's vector
        state_data[-1]['vector'] = current_state_vector

    return state_data

In [None]:
def parse_xvec_block(data):

    block = data.split('$XVEC\n')[1].split('$END\n')[0]
    lines = block.split('\n')

    xvectors = []
    energies = []

    pattern = re.compile(r'[-+]?\d+\.\d*(?:E[-+]?\d+)?|\d+')
    pattern = re.compile(r'[A-Za-z]+|[-+]?\d*\.\d+|\d+')
    
    for line in lines:
        parts = pattern.findall(line)

        if not parts: # skip if line empty. sometime need
            continue

        # Reminder about GAMESS
        # Orbital starts from two integer, but energy starts from float point value.
        if parts[0].isdigit() and parts[1].isdigit():
            xvectors_parts = [float(i) for i in parts[2:]]
            if len(orbitals) > int(parts[0]) - 1:
                xvectors[int(parts[0]) - 1].extend(orbital_parts)
            else:
                xvectors.append(orbital_parts)
        else:
            energies.extend([float(i) for i in parts])

    return orbitals, energies

In [3]:
def parse_data_block(data):

    block = data.split('$DATA\n')[1].split('$END\n')[0]
    lines = block.split('\n')

    geometry = []

    pattern = re.compile(r'[-+]?\d*\.\d+|\d+')            # get with atomic number as float point
#   pattern = re.compile(r'[A-Za-z]+|[-+]?\d*\.\d+|\d+')  # get as previous and also short name of element.

    for line in lines:

        parts = pattern.findall(line)

        if not parts:
            continue

#       parts[1] = int(float(parts[1])) # get as integer atomic number ( second patern )
        parts[0] = int(float(parts[0])) # get as integer atomic number ( first patern )

        geometry.append(parts)

    return geometry

In [22]:
def parse_vec_block(data):

    block = data.split('$VEC\n')[1].split('$END\n')[0]
    lines = block.split('\n')

    orbitals = []
    energies = []

    pattern = re.compile(r'[-+]?\d+\.\d*(?:E[-+]?\d+)?|\d+')

    for line in lines:
        parts = pattern.findall(line)

        if not parts: # skip if line empty. sometime need
            continue

        # Reminder about GAMESS
        # Orbital starts from two integer, but energy starts from float point value.
        if parts[0].isdigit() and parts[1].isdigit():
            orbital_parts = [float(i) for i in parts[2:]]
            if len(orbitals) > int(parts[0]) - 1:
                orbitals[int(parts[0]) - 1].extend(orbital_parts)
            else:
                orbitals.append(orbital_parts)
        else:
            energies.extend([float(i) for i in parts])

    return orbitals, energies

In [4]:
def parse_grad_block(data):

    block = data.split('$GRAD  \n')[1].split('$END\n')[0]
    lines = block.split('\n')

    gradient = []

    pattern = re.compile(r'[A-Za-z]+|[-+]?\d*\.\d+|\d+')

    for line in lines:
        parts = pattern.findall(line)

        if not parts:
            continue

        gradient.append(parts)

    return gradient

In [5]:
def parse_nact_block(data):

    if '$NACT' in data:
        block = data.split('$NACT\n')[1].split('$END\n')[0]
        lines = block.split('\n')

        nact = []

        pattern = re.compile(r'[-+]?\d*\.\d+|\d+')

        for line in lines:
            parts = pattern.findall(line)

            if not parts:
                continue

            nact.append(parts)

        return nact
    else:
        return None

In [6]:
def parse_total_energy(data):

    pattern = re.compile(r"TOTAL ENERGY\s*=\s*(-?\d+\.\d+)")

    match = pattern.search(data)

    return float(match.group(1))

In [7]:
def save_to_file(output_file_path, geometry, orbitals, energies, gradient, total_energy, nact):
    with open(output_file_path, 'w') as file:
        file.write("Geometry:\n")
        for geo in geometry:
            geo_str = [x for x in geo]
            file.write(str(geo_str) + '\n')

        file.write("Orbitals:\n")
        for orbital in orbitals:
            orbital_str = ["{:.10f}".format(x) for x in orbital]
            file.write(str(orbital_str) + '\n')

        file.write("Orbtal_Energies:\n")
        energy_str = [x for x in energies]
        file.write(str(energy_str) + '\n')

        file.write("Gradient:\n")
        for grad in gradient:
            grad_str = [x for x in grad]
            file.write(str(grad_str) + '\n')

        file.write("Total_Energy:\n")
        file.write(str(total_energy) + '\n')

        if nact != None:
            file.write("NACT:\n")
            for nac in nact:
                nact_str = [x for x in nac]
                file.write(str(nact_str) + '\n')

In [8]:
file_path = "butadiene-gradnac-mrsf-singlet_step_1.wrf"
output_file_path = "output_data.txt"
print(file_path)
print(output_file_path)

In [31]:
with open(file_path, 'r') as file:
    data = file.read()
orbitals, energies = parse_vec_block(data)
geometry = parse_data_block(data)
gradient = parse_grad_block(data)
total_energy = parse_total_energy(data)
nact = parse_nact_block(data)
save_to_file(output_file_path, geometry, orbitals, energies, gradient, total_energy, nact)

In [32]:
file_input = open("input-gms-mrsf.inc", 'r').read()
print(file_input)

 $contrl
   runtyp=gradnac
   scftyp=rohf
   mult=3
   dfttyp=bhhlyp
   icharg=0
   tddft=mrsf
   maxit=200
   coord=unique
 $end
 $dft
   sg1=.t.
 $end
 $scf
   dirscf=.t.
   diis=.t.
   soscf=.f.
   fdiff=.f.
   damp=.f.
   shift=.f.
   nconv=6
   npunch=2
 $end
 $tddft
   nstate=3
   iroot=2
   tammd=.t.
   mult=1
 $end
 $basis
   gbasis=n31
   ngauss=6
   ndfunc=1
 $end
 $system
   timlim=999999100
   mwords=500
 $end



In [33]:
def parse_xvec_block(data):
    if '$XVEC' not in data:
        return None, None

    block = data.split('$XVEC\n')[1].split('$END\n')[0]
    lines = block.strip().split('\n')
    
    print(block)

    state_data = []
    current_state_vector = []

    # Regular expression patterns
    state_pattern = re.compile(r'STATE #\s+(\d+)\s+ENERGY =\s+([-+]?\d+\.\d*(?:E[-+]?\d+)?)')
    vector_pattern = re.compile(r'[-+]?\d+\.\d*(?:E[-+]?\d+)?')

    for line in lines:
        state_match = state_pattern.search(line)
        if state_match:
            if current_state_vector:  # save previous state's vector before starting a new one
                state_data[-1]['vector'] = current_state_vector
                current_state_vector = []
            state_num = int(state_match.group(1))
            energy = float(state_match.group(2))
            state_data.append({'state_num': state_num, 'energy': energy})
        else:
            vector_parts = vector_pattern.findall(line)
            current_state_vector.extend([float(i) for i in vector_parts])

    if current_state_vector:  # save last state's vector
        state_data[-1]['vector'] = current_state_vector

    return state_data

In [34]:
X =parse_xvec_block(data)


 STATE #   1  ENERGY =      -78.4942523570
     0.00007728    -0.00005901     0.00210555    -0.00816243     0.01548138
    -0.00388690    -0.00040649    -0.00047558    -0.99115282     0.00003085
     0.00000988     0.00179708    -0.00063090     0.00001474     0.00120604
    -0.01136870    -0.11708484     0.00017805    -0.00013118     0.00162159
    -0.00985612     0.00025269    -0.00223715     0.00292083     0.00233860
    -0.00411609     0.00007951    -0.00004867     0.00086026    -0.00431659
    -0.00037563    -0.00086172    -0.00581872    -0.00468033    -0.00159025
    -0.00019692    -0.00021745    -0.00982550    -0.00155590     0.00124297
     0.00408728    -0.00056562    -0.00127431     0.00016420     0.00000876
     0.00000691     0.00045466    -0.00010988     0.00458287    -0.00092074
    -0.00024973    -0.00047291    -0.00739116     0.00013003     0.00011076
     0.00551014    -0.00018378    -0.00011098     0.00222421    -0.00009426
    -0.00025277    -0.00115908    -0.00004226

In [38]:
result = parse_xvec_block(data)

for state_info in result:
    print("State #:", state_info['state_num'])
    print("Energy:", state_info['energy'])
    print("Vector:", state_info['vector'])
    print('---')

 STATE #   1  ENERGY =      -78.4942523570
     0.00007728    -0.00005901     0.00210555    -0.00816243     0.01548138
    -0.00388690    -0.00040649    -0.00047558    -0.99115282     0.00003085
     0.00000988     0.00179708    -0.00063090     0.00001474     0.00120604
    -0.01136870    -0.11708484     0.00017805    -0.00013118     0.00162159
    -0.00985612     0.00025269    -0.00223715     0.00292083     0.00233860
    -0.00411609     0.00007951    -0.00004867     0.00086026    -0.00431659
    -0.00037563    -0.00086172    -0.00581872    -0.00468033    -0.00159025
    -0.00019692    -0.00021745    -0.00982550    -0.00155590     0.00124297
     0.00408728    -0.00056562    -0.00127431     0.00016420     0.00000876
     0.00000691     0.00045466    -0.00010988     0.00458287    -0.00092074
    -0.00024973    -0.00047291    -0.00739116     0.00013003     0.00011076
     0.00551014    -0.00018378    -0.00011098     0.00222421    -0.00009426
    -0.00025277    -0.00115908    -0.00004226