## Processes XES and XANES .dat files into spectra

In [2]:
import numpy as np
import os
import shutil
import subprocess
import timeit

# Must have .dat files in correct directory!
### See below.

In [3]:
Types = np.array([1,2,3,4,5])

ROOTDIR = os.getcwd()
TYPEdir = "Categories/"

TEST_XESdir = "Data/dat_files/TESTdatfiles/XES/"
TEST_XANESdir = "Data/dat_files/TESTdatfiles/XANES/"

XESdir = "Data/dat_files/TRAINdatfiles/XES/"
XANESdir = "Data/dat_files/TRAINdatfiles/XES/"

## Run this once at the very beginning:
# for t in Types:
#     os.makedirs(f"Type{t}")

## Get lists of compounds to process

In [4]:
test_list = [f.replace('.dat','') for f in os.listdir(TEST_XESdir) if f.endswith('.dat')]
print(len(test_list))

53


In [5]:
# xes_list = [f.replace('.dat','') for f in os.listdir(XESdir) if f.endswith('.dat')]
# xanes_list = [f.replace('.dat','') for f in os.listdir(XANESdir) if f.endswith('.dat')]

# Test data processing

In [6]:
def make_test_directories(c):
    os.chdir('Data/TEST/')
    os.makedirs(c)
    os.chdir(f'{c}')
    os.makedirs('XANES')
    os.makedirs('XES')
    os.chdir(ROOTDIR)

def process_TEST(process_list=test_list, mode='XES'):
    
    # grab appropriate directories
    if mode == 'XES':
        directory = TEST_XESdir
    elif mode == 'XANES':
        directory = TEST_XANESdir
    else:
        print(f"Invalid mode {mode}. Must be 'XES' or 'XANES'.")
        return
    
    i = 1
    for c in process_list:
        
        # we don't want to proccess everything
        process = False
        
        # if .dat file exists
        if os.path.exists(f'{directory}{c}.dat'):
            
            # make directories in TEST folder
            if not os.path.exists(f'Data/TEST/{c}'):
                make_test_directories(c)
            else:
                # directory already exists
                pass
            
            shutil.copyfile(f'{directory}{c}.dat', f'{c}.dat')
            process = True
        else:
            print(f"Cannot locate {directory}{c}.dat")
            return

        # only process if .dat file available  
        if process:
            
            if mode == 'XES':
                subprocess.call(['python', 'tddftoutputparser.py', '-f', f'{c}.dat',
                                 '-l', '0.6', '-g', '0.3', '-emin', '2445', '-emax', '2480', '-eshift',
                                 '-mode', 'XES'])
            elif mode == 'XANES':
                shutil.copyfile(f'{TEST_XESdir}{c}.dat', f'xes_{c}.dat')
                subprocess.call(['python', 'tddftoutputparser.py', '-f', f'{c}.dat', '-eshift', '-lb',
                                 '-mode', 'XANES']) 
                os.remove(f'xes_{c}.dat')
       
        # check spectrum was correctly processed       
        if os.path.exists(f'{c}.processedspectrum'):
            # if old processed spectrum exitss, remove it first before replacing
            if os.path.exists(f'Data/TEST/{c}/{mode}/{c}.processedspectrum'):
                os.remove(f'Data/TEST/{c}/{mode}/{c}.processedspectrum')
            # move processed spectrum file
            shutil.move(f'{c}.processedspectrum', f'Data/TEST/{c}/{mode}/{c}.processedspectrum')
        else:
            print("\t ! No processed spectrum file")
        
        # check if dat file already in dat{a directory
        if os.path.exists(f'Data/TEST/{c}/{mode}/{c}.dat'):
            os.remove(f'Data/TEST/{c}/{mode}/{c}.dat')
        # now move copied .dat file over
        shutil.move(f'{c}.dat', f'Data/TEST/{c}/{mode}/{c}.dat')
                    
        print(f'{i}\r', end="")
        i += 1

In [9]:
start = timeit.default_timer()

process_TEST(process_list=test_list, mode='XES')

stop = timeit.default_timer()
print(f"Runtime: {(stop - start)/60} min")

Runtime: 1.3627255200000004 min


In [8]:
start = timeit.default_timer()

process_TEST(process_list=test_list, mode='XANES')

stop = timeit.default_timer()
print(f"Runtime: {(stop - start)/60} min")

Runtime: 1.7043436800000034 min


# Training Data Processing

In [7]:
def make_train_directories(c, t):
    os.chdir(f'Data/Type{t}')
    os.makedirs(c)
    os.chdir(f'{c}')
    os.makedirs('XANES')
    os.makedirs('XES')
    os.chdir(ROOTDIR)

def process_spectra(process_list=None, mode='XES'):
    
    # grab appropriate directories
    if mode == 'XES':
        directory = XESdir
    elif mode == 'XANES':
        directory = XANESdir
    else:
        print(f"Invalid mode {mode}. Must be 'XES' or 'XANES'.")
        return
    
    i = 1
    for t in Types:
        
        file_name = f"{TYPEdir}Type{t}/Type{t}.txt"
        file = open(file_name, 'r')

        for line in file:
            c = line.replace('\n','')
            
            # process everything if no list
            if process_list is None or c in process_list:
               
                # check location of dat file
                
                # not processed yet
                if os.path.exists(f'{directory}{c}.dat'):
                    # direcotries need to be created
                    if not os.path.exists(f'Data/Type{t}/{c}'):
                        make_train_directories(c)
                    shutil.copyfile(f'{directory}{c}.dat', f'{c}.dat')
                    processed = False
                    
                # grab already processed dat file  
                elif os.path.exists(f'Data/Type{t}/{c}/{mode}/{c}.dat'):
                    shutil.copyfile(f'Data/Type{t}/{c}/{mode}/{c}.dat', f'{c}.dat')
                    processed = True
                    
                # data file not in the two expected locations
                else:
                    print(f"Cannot find {directory}{c}.dat")
                    return

                if mode == 'XES':
                    subprocess.call(['python', 'tddftoutputparser.py', '-f', f'{c}.dat',
                                     '-l', '0.6', '-g', '0.3', '-emin', '2445', '-emax', '2480', '-eshift',
                                     '-mode', 'XES'])
                elif mode == 'XANES':
                    # copy xes over as well to mnormalize by k alpha
                    if processed:
                        xes_dir = f'Data/Type{t}/{c}/XES/'
                    else:
                        xes_dir = XESdir
                    shutil.copyfile(f'{xes_dir}{c}.dat', f'xes_{c}.dat')
                    subprocess.call(['python', 'tddftoutputparser.py', '-f', f'{c}.dat', '-eshift', '-lb',
                                     '-mode', 'XANES']) 
                    os.remove(f'xes_{c}.dat')

                # check spectrum was correctly processed       
                if os.path.exists(f'{c}.processedspectrum'):
                    # if old processed spectrum exitss, remove it first before replacing
                    if os.path.exists(f'Data/Type{t}/{c}/{mode}/{c}.processedspectrum'):
                        os.remove(f'Data/Type{t}/{c}/{mode}/{c}.processedspectrum')
                    # move processed spectrum file
                    shutil.move(f'{c}.processedspectrum', f'Data/Type{t}/{c}/{mode}/{c}.processedspectrum')
                else:
                    print("\t ! No processed spectrum file")

                # check if dat file already in dat{a directory
                if os.path.exists(f'Data/Type{t}/{c}/{mode}/{c}.dat'):
                    os.remove(f'Data/Type{t}/{c}/{mode}/{c}.dat')
                # now move copied .dat file over
                shutil.move(f'{c}.dat', f'Data/Type{t}/{c}/{mode}/{c}.dat')

                print(f'{i}\r', end="")
                i += 1

In [10]:
start = timeit.default_timer()

process_spectra(mode='XES')
# process_spectra(process_list=['2-Fluorothiophenol'], mode='XES')

stop = timeit.default_timer()
print(f"Runtime: {(stop - start)/60} min")

Runtime: 22.770089465 min


In [5]:
start = timeit.default_timer()

# process_spectra(process_list=['2-Fluorothiophenol'], mode='XANES')
process_spectra(mode='XANES')

stop = timeit.default_timer()
print(f"Runtime: {(stop - start)/60} min")

Runtime: 21.659081800000003 min
