In [None]:
import os
import time
import numpy as np
from ase.io import read
from dscribe.descriptors import SOAP

In [None]:
import ase
ase.__version__

## System of interest name

In [None]:
SYS_NAME_DIR = 'ExampleCase-300K-E=0.05/' 
SUB_DIR = ''
if SUB_DIR:
    SYS_TRAJ_DIR = SYS_NAME_DIR+'traj_reduced/'+SUB_DIR
else:
    SYS_TRAJ_DIR = SYS_NAME_DIR+'traj_reduced/'

## Reading trajectories files

* #### traj reading parameters
The range goes from `START` to `END - 1`, it is not inclusive!

In [None]:
traj_reading_prmters = dict(START=0,
                            END=20000,
                            STRIDE=50)
print(traj_reading_prmters)

* #### read and store traj files (and box if present)

In [None]:
XYZ_TRAJ = []
BOX = []
for entry in os.listdir(SYS_TRAJ_DIR):
    if os.path.isfile(os.path.join(SYS_TRAJ_DIR, entry)):
        if entry.endswith('.xyz'):
            XYZ_TRAJ.append(entry)
        elif entry.endswith('.box'):
            BOX.append(entry)
            
XYZ_TRAJ = sorted(XYZ_TRAJ)
BOX = sorted(BOX)

print(f"Traj files {len(XYZ_TRAJ)}:\n{XYZ_TRAJ}")
print(f"\nBox files (for pbc) {len(BOX)}:\n{BOX}")

* #### traj reader

    - `traj` is already cropped following `START,END,STRIDE`
    - `boxfile` it is not cropped and need to be matched

The code does the corrections, but care is advised anyway!

In [None]:
def traj_reader(wdir,trajfile,**kwargs):
    print(f"--- Reading {trajfile}")
    
    # traj reading parameters
    INDEX=':'
    START=kwargs['START']
    END=kwargs['END']
    STRIDE=kwargs['STRIDE']
    
    if END is not None:
        INDEX = f"{START}:{END}:{STRIDE}"

    return read(wdir+trajfile,index=INDEX,format="xyz")

# add pbc correction

def traj_pbc(wdir,traj,boxfile,**kwargs):
    print(f"--- Applying PBC {boxfile}\n")

    START=kwargs['START']
    END=kwargs['END']
    STRIDE=kwargs['STRIDE']
    
    boxfile_ = np.loadtxt(wdir+boxfile)

# ts range goes from 0 - to the len(traj) - dummy indexing
    for ts in range(int((END-START)/STRIDE)):
        # the boxfile item selected is accordi to the real indexing
        traj[ts].set_cell(boxfile_[START+ts*STRIDE])
        traj[ts].set_pbc([1, 1, 1])
    return traj

def get_multipos(pos,**kwargs):
    START=kwargs['START']
    END=kwargs['END']
    STRIDE=kwargs['STRIDE']
    return [POS for x in range(int((END-START)/STRIDE))]

def get_soap_descriptor(**kwargs):
    return SOAP(**kwargs)

In [None]:
t0 = time.time()
ASE_TRAJ = {'syst_'+str(num): traj_reader(SYS_TRAJ_DIR,traj,**traj_reading_prmters) for num, traj in enumerate(XYZ_TRAJ)}
t1 = time.time()
print(f"-->\tTIME= {np.round(t1-t0, 2)} s \n")

In [None]:
if all([BOX]):

    for j,pbc in enumerate(ASE_TRAJ):
        print(f"Updating traj {pbc} = {XYZ_TRAJ[j]}")
        ASE_TRAJ[pbc] = traj_pbc(SYS_TRAJ_DIR,ASE_TRAJ[pbc],BOX[j],**traj_reading_prmters)
    
if not all([BOX]):
    print("No PBC correction provided for the system")

In [None]:
ASE_TRAJ

## SOAP descriptor calculation

* #### SOAP parameters and modes

In [None]:
SOAP_MODE = 'SINGLE' # MULTI or SINGLE
SOAP_AVE = False

# POS = [p for p in range(1,4608,4)]

soap_input_parameters = dict(species = ["N"],
                             periodic = False,
                             rcut = 65,
                             nmax = 8,
                             lmax = 8,
                             average = 'off')
if all([BOX]):
    soap_input_parameters.update(periodic = True)

if SOAP_AVE:
    soap_input_parameters.update(average = 'inner')
    
print(soap_input_parameters)

if SOAP_MODE == 'MULTI':
    MULTI_SOAP = get_multipos(POS,**traj_reading_prmters)
    print("'MULTI' option ON")

* #### create the soap dir for storing the files if its not already there

In [None]:
if SUB_DIR:
    SOAP_DIR = SYS_NAME_DIR+'soap_files/'+SUB_DIR+'rcut'+str(soap_input_parameters['rcut'])+'/'
else:
    SOAP_DIR = SYS_NAME_DIR+'soap_files/rcut'+str(soap_input_parameters['rcut'])+'/'
    
if os.path.exists(SOAP_DIR):
    print(f"Folder already exist!\n{SOAP_DIR}")
if not os.path.exists(SOAP_DIR):
    os.makedirs(SOAP_DIR)
    print(f"Created folder\n{SOAP_DIR}")

* #### output init

In [None]:
import datetime
import json

today = datetime.datetime.now().strftime("%b-%d-%Y-%H-%M")
fname_output = f"soap.output_{today}"

with open(SOAP_DIR+fname_output, 'w') as file:
    file.write("Traj reading values:\n")
    file.write(json.dumps(traj_reading_prmters))
    file.write("\nSOAP parameters:\n")
    file.write(json.dumps(soap_input_parameters))
    file.write("\n")
    file.write(f"\n SOAP MODE: {SOAP_MODE}")

* #### SOAP calculations following instructions

In [None]:
dscr_soap = get_soap_descriptor(**soap_input_parameters)

for i,soap in enumerate(ASE_TRAJ):
    print(f"Calc for {soap} : {XYZ_TRAJ[i]}")
    with open(SOAP_DIR+fname_output, 'a') as file:
        file.write(f"\nCalc for {soap} : {XYZ_TRAJ[i]}\n")
    
    if SOAP_MODE == 'MULTI':
        print("!Multi position calculation")
        t0 = time.time()
        soap_tmp = dscr_soap.create(ASE_TRAJ[soap], positions=MULTI_SOAP)
        t1 = time.time()
        
    elif SOAP_MODE == 'SINGLE':
        print("!Single position calculation")
        t0 = time.time()
        soap_tmp = dscr_soap.create(ASE_TRAJ[soap])
        t1 = time.time()
        
    print(f"-->\tTIME= {np.round(t1-t0, 2)} s \n")
    with open(SOAP_DIR+fname_output, 'a') as file:
        file.write(f"-->\tTIME= {np.round(t1-t0, 2)} s \n")
    
    if SOAP_AVE:
        np.save(SOAP_DIR+f"SOAP_AVE_{XYZ_TRAJ[i][:-4]}_rcut{soap_input_parameters['rcut']}_trj{traj_reading_prmters['START']}-{traj_reading_prmters['END']}-{traj_reading_prmters['STRIDE']}",
               soap_tmp)
    else:
        np.save(SOAP_DIR+f"SOAP_{XYZ_TRAJ[i][:-4]}_rcut{soap_input_parameters['rcut']}_trj{traj_reading_prmters['START']}-{traj_reading_prmters['END']}-{traj_reading_prmters['STRIDE']}",
               soap_tmp)