In [119]:
# autoreload to cast any changes to the module files into the notebook
%load_ext autoreload
%autoreload 2
import re, math, os, csv, timeit
from datetime import date 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [120]:
# configurations
# this script is applicatble only for ring-molecules
NUM_CARBON = 8
NUM_ATOMS = NUM_CARBON + 1*4 + 2*4  # C*8 + H*4 + (2H)*4

# represent C-H bonds
# C6-H9
# C7-H10
# C8-H11
# C1-H12
# C5-H13 & C5-H14
# C4-H15 & C4-H16
# C3-H17 & C3-H20
# C2-H18 & C2-H19
carbon_list = [6,7,8,1,5,5,4,4,3,3,2,2]
hydrogen_list = [9,10,11,12,13,14,15,16,17,20,18,19]

In [121]:
# Program flow
# --------------------------- Preprocess Data -------------------------
# loop file 
    # catch patterns
    # read 20 lines into a block (list of lines/strings)
    # append to list_of_blocks
    
# --------------------------- Perform Computation -------------------------
# loop through list of blocks 
# for index in len(list_of_blocks)
    # compute(block)
        
    # create new entries in list_of_dictionaries "Traj : 1, C1-C2: (double), C2-C3 (double), ... "
        
# --------------------------- Write to csv ---------------------------

In [122]:
# --------------------------- Preprocess Data -------------------------
# loop file 
    # catch patterns
    # read 20 lines into a block (list of lines/strings)
    # append to list_of_blocks
    
# Input: filename
# Output: List_of_blocks_in_file, each block representing 1 directory
def preprocess_data(fileName):    
    searchfile = open(fileName, "r",encoding='utf-8')
    line_list = searchfile.readlines()
    line_list = [line for line in line_list if line != ' {} \n'.format(NUM_ATOMS)]    # strip all lines with " NUM_ATOMS \n"

    list_of_blocks_in_file = []
    for index, line in enumerate(line_list):
        if re.search(r'TRAJ\d+', line):
            # create new block for each trajectory
            block_for_trajectory = read_a_trajectory(index,line_list)

            # append to list
            list_of_blocks_in_file.append(block_for_trajectory)
    return list_of_blocks_in_file        
    
# create new block / read a trajetory 
# Input: index, list_of_lines_in_file
# Output: if(match regex | EOF), return a block 
def read_a_trajectory(index, line_list):
    list_of_coords_per_block = [line_list[index][:-1]] # add line with trajectory 
    for line in line_list[index+1:]:
        if (re.search(r'TRAJ\d+', line)):
            break
        else:
            atom,x,y,z = line.split()
            list_of_coords_per_block.append([float(x), float(y), float(z)])
            
    return list_of_coords_per_block

In [123]:
# --------------------------- Perform Computation -------------------------
# compute block 
# Input: block
# Output: dictionary_for_trajectory - in format "TRAJ : 1, C1-C2: (double), C2-C3 (double), ... "

def compute(block):
    dictionary_for_trajectory = {}
    dictionary_for_trajectory['TRAJ'] = re.findall('\d+', block[0])[0]   # findall the numbers in the string block[0], return a list --> index [0] to get string value
    # ---------------- C - C ----------------
    for carbon in list(range(1, NUM_CARBON+1)): 
        if carbon == NUM_CARBON:
            bond_title ="C{}-C{}".format(carbon,1)
            dictionary_for_trajectory[bond_title] = compute_bondlength(block[carbon],block[1])
        elif carbon != NUM_CARBON:     
            bond_title ="C{}-C{}".format(carbon,carbon+1)
            dictionary_for_trajectory[bond_title] = compute_bondlength(block[carbon],block[carbon + 1]) 
    # ---------------- C - H ----------------
    for carbon,hydro in zip(carbon_list,hydrogen_list):
        bond_title ="C{}-H{}".format(carbon,hydro)
        dictionary_for_trajectory[bond_title] = compute_bondlength(block[carbon],block[hydro])    
    return dictionary_for_trajectory

# compute bondlength
# Input: 2 lines of coordinates
# Output: bondlength (float type)
def compute_bondlength(*coords):
    X,Y,Z = 0,1,2
    coord1, coord2 = coords[0], coords[1]    
    return math.sqrt(  (coord1[X] - coord2[X])**2 
                     + (coord1[Y] - coord2[Y])**2 
                     + (coord1[Z] - coord2[Z])**2 ) 

In [124]:
# compute C-C-C angles
# Input: 2 lines of coordinates
# Output: angle (degree / radian)
def compute_C_C_C_angle(*coords):
    coordinates = []
    for coord in coords:
        atom,x,y,z = coord.split()
        coordinates.append([float(x), float(y), float(z)])
        
# compute dihedral angles
# https://azevedolab.net/resources/dihedral_angle.pdf

In [125]:



# This one starts with two cross products to get a vector perpendicular to
# b2 and b1 and another perpendicular to b2 and b3. The angle between those vectors
# is the dihedral angle.

def compute_dihedral_angle(*coords):
    return
# def dihedral3(p):
#     b = p[:-1] - p[1:]
#     b[0] *= -1
#     v = np.array( [np.cross(v,b[1]) for v in [b[0], b[2]] ] )
#     # Normalize vectors
#     v /= np.sqrt(np.einsum('...i,...i', v, v)).reshape(-1,1)
#     return np.degrees(np.arccos( v[0].dot(v[1]) ))

In [126]:
def compute_geometric_params(inputFile):
    # --------------------------- Preprocess Data -------------------------
    list_of_blocks_in_file = preprocess_data(inputFile)
    # --------------------------- Perform Computation -------------------------
    list_of_traj_dictionaries = []
    for block in list_of_blocks_in_file:

        # compute(block)
        dictionary_for_trajectory = compute(block)

        # create new entries in list_of_dictionaries "Traj : 1, C1-C2: (double), C2-C3 (double), ... "
        list_of_traj_dictionaries.append(dictionary_for_trajectory)
    # --------------------------- Write to csv ---------------------------
    date_computed = str(date.today())
    file_path = os.path.join(".", "output", date_computed + "_trajectory_bondlength_2" + ".csv")
    os.makedirs(os.path.dirname(file_path), exist_ok=True) # create 'output' folder if not existed yet

    with open(file_path, 'w', newline='', encoding='utf-8') as csvFile:
        field_names = list(list_of_traj_dictionaries[0].keys())
        writer = csv.DictWriter(csvFile, fieldnames=field_names, dialect='excel')

        writer.writeheader()
        for trajectory in list_of_traj_dictionaries:
            writer.writerow(trajectory)        

In [127]:
def main():
    compute_geometric_params(os.path.join('.','input','IC-angs-conflow.xyz'))

if __name__ == "__main__":
    main()  

In [128]:
# references
'''
http://google.github.io/styleguide/pyguide.html
https://visualgit.readthedocs.io/en/latest/pages/naming_convention.html
https://developers.google.com/edu/python/regular-expressions
'''

'\nhttp://google.github.io/styleguide/pyguide.html\nhttps://visualgit.readthedocs.io/en/latest/pages/naming_convention.html\nhttps://developers.google.com/edu/python/regular-expressions\n'