In [2]:
import os
import time
import pandas as pd
from tqdm import tqdm
import shutil

In [3]:
bohr_to_angstrom_value = 0.52917721092
def bohr_to_angstrom(x):
    return x * bohr_to_angstrom_value

In [4]:
def read_xyz(filename, name_4_column):
    """Reads an xyz file and returns a dataframe with the coordinates"""
    #print(filename)
    header = []
    body = []

    with open(filename, 'r') as f:
        for no, line in enumerate(f):
            #print(line)
            if no in range(11):
                header.append(line)
            else:
                body.append(line)

        coords = []
        for line in body:
            coords.append([float(x) for x in line.split()[:4]])
        #print(coords)
        df = pd.DataFrame(coords, columns=['x', 'y', 'z', name_4_column])
        df.index.name = 'idx'

        return header, df

In [5]:
def search_file(filename, path):
    """Searches for a file in a path and returns the full path"""
    for root, dirs, files in os.walk(path):
        if filename in files:
            return os.path.join(root, filename)

In [6]:
def save_xyz(path, df, header):
    """Saves a dataframe as an xyz file"""
    with open(path, 'w') as f:
        f.write(''.join(str(line) for line in header))
    df.to_csv(path, mode='a', header=False, index=False, sep='\t', float_format='%.8f')

In [7]:
def copy_file_to_folder(from_file, to_file, filename):
    """Copies a file to a folder"""
    shutil.copy(from_file, os.path.join(to_file, filename))

In [8]:
def create_directory(path):
    """Creates a directory if it does not exist"""
    if not os.path.exists(path):
        os.makedirs(path)

In [13]:
### Bohr to Angstrom
in_path = os.path.join(os.getcwd(), 'charge_0')
out_path = '/home/xb/uni/MA/14_esp_mp2_opti/pot_den_angstrom'

file_1 = 'coord.xyz'
file_2 = 'dscf.out'
file_3 = 'td.xyz'
file_4 = 'tp.xyz'

folder_list = os.listdir(in_path)

with tqdm(folder_list, total=len(folder_list)) as pbar:
        for f in pbar:
                current_dir = os.path.join(in_path, f)
                
                header, density = read_xyz(search_file(file_3, current_dir), 'density')
                _, potential = read_xyz(search_file(file_4, current_dir), 'potenial')
                
                header = header[:-1] + ['# cartesian coordinates x,y,z | density | potential\n']
                #print(header)
                # combine coord, density and potential in one dataframe
                df = density.merge(potential, on=['idx', 'x', 'y', 'z'], how='inner')

                # convert bohr to angstrom
                df['x'] = df['x'].apply(bohr_to_angstrom)
                df['y'] = df['y'].apply(bohr_to_angstrom)
                df['z'] = df['z'].apply(bohr_to_angstrom)

                create_directory(out_path+'/'+f[:-4])
                coord = search_file(file_1, current_dir)
                dscf = search_file(file_2, current_dir)
                
                copy_file_to_folder(coord, out_path+'/'+f[:-4], 'coord.xyz')
                copy_file_to_folder(dscf, out_path+'/'+f[:-4], 'dscf.out')
                
                #t0 = time.time()
                save_xyz(out_path+'/'+f[:-4]+'/'+f'{f[:-4]}.xyz', df, header)
                #d = time.time() - t0
                #print("write duration: %.2f s." % d)

100%|██████████| 134/134 [09:15<00:00,  4.14s/it]
