In [343]:
# In this workshop, we are again gonna use water.xyz file.

import os
import numpy
xyz_file_path = os.path.join('CMS Workshp (MolSSI data)', 'water.xyz')
xyz_file = numpy.genfromtxt(fname = xyz_file_path , skip_header=2,  dtype='unicode')
molecule = xyz_file[:,0]
coordinates = xyz_file[:, 1:]
coordinates = coordinates.astype(numpy.float64)
#print(coordinates)
# 1st column is x_coordinates, 2nd column is y, 3rd column is z.
# 1st row is for O_atoms, 2nd for H1, and 3rd for H2

row_num = len(coordinates[:,0])     # All rows of 1st column, so row_num is basically 3
#print(row_num)
for i_1 in range(0, row_num):       # So, values of i_1 = 0, 1 , 2 for 1st, 2nd & 3rd row
    for i_2 in range(0, row_num):   # So, values of i_2 = 0, 1 , 2 for 1st, 2nd & 3rd row
        if i_1<i_2:
            x_distances = coordinates[i_1, 0] - coordinates[i_2, 0]   # subtraction of 2 x values for 1st col, this command will vary row from 1st-3rd for 1st col
            y_distances = coordinates[i_1, 1] - coordinates[i_2, 1]
            z_distances = coordinates[i_1, 2] - coordinates[i_2, 2]
            bond_len = numpy.sqrt(x_distances**2 + y_distances**2 + z_distances**2)
            if bond_len <= 1.5 and bond_len > 0:
                real_bond_len = bond_len
                print(F'{molecule[i_1]} to {molecule[i_2]}:', real_bond_len)


O to H1: 0.9690005374652793
O to H2: 0.9690003348647513


In [345]:
# Let's try thr same using a function command.

import os
import numpy
xyz_file_path = os.path.join('CMS Workshp (MolSSI data)', 'water.xyz')
xyz_file = numpy.genfromtxt(fname = xyz_file_path , skip_header=2,  dtype='unicode')
molecule = xyz_file[:,0]
coordinates = xyz_file[:, 1:]
coordinates = coordinates.astype(numpy.float64)
#print(coordinates)
# 1st column is x_coordinates, 2nd column is y, 3rd column is z.
# 1st row is for O_atoms, 2nd for H1, and 3rd for H2

# In below code, atom1 can be O, H1 or H2 and same for atom2.
def atomic_dist(atom1_coord, atom2_coord):  
    """Purpose of this function: Calculate the distance between two three-dimensional points."""
    x_distances = atom1_coord[0] - atom2_coord[0]      # To handle O, H1, H2 for 1st column (x coordinate values)
    y_distances = atom1_coord[1] - atom2_coord[1]      # To handle O, H1, H2 for 2nd column (y coordinate values)
    z_distances = atom1_coord[2] - atom2_coord[2]      # To handle O, H1, H2 for 3rd column (z coordinate values)
    bond_len = numpy.sqrt(x_distances**2 + y_distances**2 + z_distances**2)   # We defined bond_len in terms of atom1_coord & atom2_coord 
    return bond_len
# Using a function like distances(atom1_coord, atom2_coord) adds flexibility, and reusability (for multiple files).
# In above code, we just defined the formulas for calculating a bond length which remains the same every single file.

# Now we need a code for each specific file to read its data for bond_len calculation using the above formula.
# This code simply calls the function "distances" and utilizes the x, y, z coordinates of two atoms 
# (coordinates[i_1] and coordinates[i_2])from the file as arguments.

row_num = len(coordinates[:,0])     # All rows of 1st column, so row_num is basically 3
#print(row_num)
for i_1 in range(0, row_num):       # So, values of i_1 = 0, 1 , 2 for 1st, 2nd & 3rd row
    for i_2 in range(0, row_num):   # So, values of i_2 = 0, 1 , 2 for 1st, 2nd & 3rd row
        if i_1 < i_2:
            bond_len = atomic_dist(coordinates[i_1] , coordinates[i_2])    # This is our result
            if bond_len <= 1.5 and bond_len > 0:
                real_bond_len = bond_len
                print(F'{molecule[i_1]} to {molecule[i_2]}:', real_bond_len)


O to H1: 0.9690005374652793
O to H2: 0.9690003348647513


In [93]:
# Next, let’s write another function that checks to see if a particular bond distance represents a bond.
# We can call this function "bond_check"

def bond_check(real_bond_len):
    if real_bond_len >0 and real_bond_len <=1.5:
        return True
    else:
        return False
result = bond_check(real_bond_len)
print (result)

True


In [101]:
# We can re-write the above function as follows:

min_length = 0
max_length = 1.5

def bond_check(real_bond_len):
    if real_bond_len > min_length and real_bond_len <= max_length:
        return True
    else:
        return False
result = bond_check(real_bond_len)
print (result)

True


In [111]:
help(distances)

Help on function distances in module __main__:

distances(atom1_coord, atom2_coord)
    Purpose of this function: Calculate the distance between two three-dimensional points.



In [115]:
# We can also define the default values, in case we do not specify over and above:

# min_length = 0
# max_length = 1.5

def bond_check(real_bond_len, min_length = 0 , max_length = 0.5):
    if real_bond_len > min_length and real_bond_len <= max_length:
        return True
    else:
        return False
result = bond_check(real_bond_len)
print (result)

False


In [131]:
# We can also ruun the function like this:

def bond_check(real_bond_len, min_length = 0 , max_length = 0.5):
    if real_bond_len > min_length and real_bond_len <= max_length:
        return True
    else:
        return False
result = bond_check(1.5)
print(result)

#or 
print(bond_check(1.5))

# We can overwrite minimum_length / maximum_length by either explicitly defining at start or in this way.

print(bond_check(1.5 , max_length=1.5))


False
False
True


In [347]:

import os
import numpy
xyz_file_path = os.path.join('CMS Workshp (MolSSI data)', 'water.xyz')
xyz_file = numpy.genfromtxt(fname = xyz_file_path , skip_header=2,  dtype='unicode')
molecule = xyz_file[:,0]
coordinates = xyz_file[:, 1:]
coordinates = coordinates.astype(numpy.float64)
#print(coordinates)
# 1st column is x_coordinates, 2nd column is y, 3rd column is z.
# 1st row is for O_atoms, 2nd for H1, and 3rd for H2

# In below code, atom1 can be O, H1 or H2 and same for atom2.
def atomic_dist(atom1_coord, atom2_coord):  
    """Purpose of this function: Calculate the distance between two three-dimensional points."""
    x_distances = atom1_coord[0] - atom2_coord[0]      # To handle O, H1, H2 for 1st column (x coordinate values)
    y_distances = atom1_coord[1] - atom2_coord[1]      # To handle O, H1, H2 for 2nd column (y coordinate values)
    z_distances = atom1_coord[2] - atom2_coord[2]      # To handle O, H1, H2 for 3rd column (z coordinate values)
    bond_len = numpy.sqrt(x_distances**2 + y_distances**2 + z_distances**2)
    return bond_len
# Using a function like distances(atom1_coord, atom2_coord) adds flexibility, and reusability (for multiple files).
# In above code, we just defined the formulas for calculating a bond length which remains the same every single file.

def bond_check(bond_len):
    if bond_len >0 and bond_len <=1.5:
        return True
    else:
        return False

# Now we need a code for each specific file to read its data for bond_len calculation using the above formula.
# This code simply calls the function "atomic_dist" and utilizes the x, y, z coordinates of two atoms 
# (coordinates[i_1] and coordinates[i_2])from the file as arguments.

row_num = len(coordinates[:,0])     # All rows of 1st column, so row_num is basically 3
#print(row_num)
for i_1 in range(0, row_num):       # So, values of i_1 = 0, 1 , 2 for 1st, 2nd & 3rd row
    for i_2 in range(0, row_num):   # So, values of i_2 = 0, 1 , 2 for 1st, 2nd & 3rd row
        if i_1 < i_2:
            bond_len = atomic_dist(coordinates[i_1] , coordinates[i_2])    # This is our result
            if bond_check(bond_len) is True:
                print(F'{molecule[i_1]} to {molecule[i_2]}:', bond_len)


O to H1: 0.9690005374652793
O to H2: 0.9690003348647513


In [349]:
# Let's write a function "open_xyz" which takes an xyz file as a parameter and returns the symbols and coordinates.

def open_xyz(xyz_file_path):
    any_xyz_file = numpy.genfromtxt(fname = xyz_file_path , skip_header=2,  dtype='unicode')
    molecule = any_xyz_file[:,0]
    coordinates = any_xyz_file[:, 1:]
    coordinates = coordinates.astype(numpy.float64)
    return molecule , coordinates

# Now lets use the defined function by providing "xyz_file_path" as our input "to print the file":
xyz_file_path = os.path.join('CMS Workshp (MolSSI data)', 'benzene.xyz')
print(open_xyz(xyz_file_path ))  

# or
# print(open_xyz(xyz_file_path = os.path.join('CMS Workshp (MolSSI data)', 'benzene.xyz')))

# or
# xyz_file_path = os.path.join('CMS Workshp (MolSSI data)', 'benzene.xyz')
# result = open_xyz(xyz_file_path)
# print(result)



(array(['C', 'H', 'C', 'H', 'C', 'H', 'C', 'H', 'C', 'H', 'C', 'H'],
      dtype='<U8'), array([[ 0.     ,  1.40272,  0.     ],
       [ 0.     ,  2.49029,  0.     ],
       [-1.21479,  0.70136,  0.     ],
       [-2.15666,  1.24515,  0.     ],
       [-1.21479, -0.70136,  0.     ],
       [-2.15666, -1.24515,  0.     ],
       [ 0.     , -1.40272,  0.     ],
       [ 0.     , -2.49029,  0.     ],
       [ 1.21479, -0.70136,  0.     ],
       [ 2.15666, -1.24515,  0.     ],
       [ 1.21479,  0.70136,  0.     ],
       [ 2.15666,  1.24515,  0.     ]]))


In [375]:
# Now let's combine all the above functions to open an xyz file and calculate bond lengths for its atoms:

import os
import numpy

""" Function to get/open the file"""
def open_xyz(xyz_file_path):           # taking xyz_file_path as an input
    xyz_file = numpy.genfromtxt(fname = xyz_file_path, skip_header = 2, dtype = 'unicode')
    molecules = xyz_file[:,0]
    coordinates = xyz_file[:,1:]
    coordinates = coordinates.astype(numpy.float64)
    return molecules, coordinates

""" Function to calculate the bond_len for any_xyz_file"""
def atomic_dist(atom1_coord, atom2_coord):
    x_distances = atom1_coord[0] - atom2_coord[0]
    y_distances = atom1_coord[1] - atom2_coord[1]
    z_distances = atom1_coord[2] - atom2_coord[2]
    bond_len = numpy.sqrt(x_distances**2 + y_distances**2 + z_distances**2)
    return bond_len

""" Function to check if a distance qualifies as a bond define the max/min value for bond_len""" 
def bond_check(bond_len, min_length = 0, max_length = 1.5):
    if bond_len > 0 and bond_len <= 1.5:
        return True
    else:
        return False

""" Function to call already defined "atomic_dist" funct for bond_length calculation using xyz file data"""
""" This is the main script to calculate and print bond lengths """
# Inputs:
benzene_file = os.path.join('CMS Workshp (MolSSI data)', 'benzene.xyz')
molecules, coordinates = open_xyz(benzene_file)   # This assignment makes the variables available in the main script.

# Main Script:
row_num = len(coordinates[:,0])
for i_1 in range(0, row_num):
    for i_2 in range(0, row_num):
        if i_1 < i_2:
            bond_len = atomic_dist(coordinates[i_1] , coordinates[i_2])
            if bond_check(bond_len) is True:
                print(F'{molecule[i_1]} to {molecule[i_2]}:' , bond_len)
                

C to H: 1.08757
C to C: 1.402719000263417
C to C: 1.402719000263417
C to H: 1.087578347062868
C to C: 1.40272
C to H: 1.087578347062868
C to C: 1.402719000263417
C to H: 1.08757
C to C: 1.402719000263417
C to H: 1.087578347062868
C to C: 1.40272
C to H: 1.087578347062868


In [369]:
# Now let's generate a generalized function to open any xyz file and calculate bond lengths for its atoms:

import os
import numpy

""" Function to get/open the file"""
def open_xyz(xyz_file_path):
# taking xyz_file_path as an input
    xyz_file = numpy.genfromtxt(fname = xyz_file_path, skip_header = 2, dtype = 'unicode')
    molecules = xyz_file[:,0]
    coordinates = xyz_file[:,1:]
    coordinates = coordinates.astype(numpy.float64)
    return molecules, coordinates

""" Function to calculate the bond_len for any_xyz_file"""
def atomic_dist(atom1_coord, atom2_coord):
    x_distances = atom1_coord[0] - atom2_coord[0]
    y_distances = atom1_coord[1] - atom2_coord[1]
    z_distances = atom1_coord[2] - atom2_coord[2]
    bond_len = numpy.sqrt(x_distances**2 + y_distances**2 + z_distances**2)
    return bond_len

""" Function to check if a distance qualifies as a bond define the max/min value for bond_len""" 
def bond_check(bond_len, min_length = 0, max_length = 1.5):
    if bond_len > 0 and bond_len <= 1.5:
        return True
    else:
        return False

""" Function to call already defined "atomic_dist" funct for bond_length calculation using xyz file data"""
""" This is the main script to calculate and print bond lengths """

# Main Script: We have to create a function to utilize it for multiple files.
def print_bond_len(atom_molecules , atom_coordinates):
    
    row_num = len(atom_molecules)    # We can also say    len(atom_coordinates[:,0]) 
    for i_1 in range(0, row_num):
        for i_2 in range(0, row_num):
            if i_1 < i_2:
                bond_len = atomic_dist(atom_coordinates[i_1] , atom_coordinates[i_2])
                if bond_check(bond_len) is True:
                    print(F'{atom_molecules[i_1]} to {atom_molecules[i_2]}:' , bond_len)

## Main script to define file_path for handling multiple files:

# A dictionary is defined where "water_file or benzene_file" are keys and file paths are the values.
both_xyz_file_paths = { 
    "water_file": os.path.join('CMS Workshp (MolSSI data)', 'water.xyz')  , 
    "benzene_file": os.path.join('CMS Workshp (MolSSI data)', 'benzene.xyz')
}

# To iterates over all items (key-value pairs) in the xyz_file_paths dictionary and calling our function:
for filenames, file_paths in both_xyz_file_paths.items():  
    print(F'Printing bonds for {filenames}, from "{file_paths}"')
    molecules, coordinates = open_xyz(file_paths)  # This time, we have to keep it in the for loop.
    print_bond_len(molecules , coordinates)


# Why is the function defined as print_bonds(atom_symbols, atom_coordinates) but called as print_bonds(symbols, coordinates)?
# Atom_symbols & atom_coordinates are just the placeholders, while symbols & coordinates are actual data.
# When calling the function, you pass actual data (called arguments) to replace the placeholders.

# Similarly, in "molecules, coordinates = open_xyz(file_path)" the variable containing actual data 
# is "file_path" and not "xyz_file_path" that we just defined as a placeholder.  
# If you use xyz_file_path, the it will take data from the previous cells where you used it for benzene file.



Printing bonds for water_file, from "CMS Workshp (MolSSI data)/water.xyz"
O to H1: 0.9690005374652793
O to H2: 0.9690003348647513
Printing bonds for benzene_file, from "CMS Workshp (MolSSI data)/benzene.xyz"
C to H: 1.08757
C to C: 1.402719000263417
C to C: 1.402719000263417
C to H: 1.087578347062868
C to C: 1.40272
C to H: 1.087578347062868
C to C: 1.402719000263417
C to H: 1.08757
C to C: 1.402719000263417
C to H: 1.087578347062868
C to C: 1.40272
C to H: 1.087578347062868


In [367]:
# Let's use glob to print bonds for all the xyz files.

import os
import numpy
import glob

""" Function to get/open the file"""
def open_xyz(xyz_file_path):
# taking xyz_file_path as an input
    xyz_file = numpy.genfromtxt(fname = xyz_file_path, skip_header = 2, dtype = 'unicode')
    molecules = xyz_file[:,0]
    coordinates = xyz_file[:,1:]
    coordinates = coordinates.astype(numpy.float64)
    return molecules, coordinates

""" Function to calculate the bond_len for any_xyz_file"""
def atomic_dist(atom1_coord, atom2_coord):
    x_distances = atom1_coord[0] - atom2_coord[0]
    y_distances = atom1_coord[1] - atom2_coord[1]
    z_distances = atom1_coord[2] - atom2_coord[2]
    bond_len = numpy.sqrt(x_distances**2 + y_distances**2 + z_distances**2)
    return bond_len

""" Function to check if a distance qualifies as a bond define the max/min value for bond_len""" 
def bond_check(bond_len, min_length = 0, max_length = 1.5):
    if bond_len > 0 and bond_len <= 1.5:
        return True
    else:
        return False

""" Function to call already defined "atomic_dist" funct for bond_length calculation using xyz file data"""
""" This is the main script to calculate and print bond lengths """

# Main Script: We have to create a function to utilize it for multiple files.
def print_bond_len(atom_molecules , atom_coordinates):
    
    row_num = len(atom_molecules)    # We can also say    len(atom_coordinates[:,0]) 
    for i_1 in range(0, row_num):
        for i_2 in range(0, row_num):
            if i_1 < i_2:
                bond_len = atomic_dist(atom_coordinates[i_1] , atom_coordinates[i_2])
                if bond_check(bond_len) is True:
                    print(F'{atom_molecules[i_1]} to {atom_molecules[i_2]}:' , bond_len)

## Main script to define file_path for handling multiple files:

# A dictionary is defined where "water_file or benzene_file" are keys and file paths are the values.
all_xyz_file_paths = glob.glob('CMS Workshp (MolSSI data)/*.xyz')
    
# To iterates over all items (key-value pairs) in the xyz_file_paths dictionary and calling our function:
for file_paths in all_xyz_file_paths:
    print(F'Printing bonds from "{file_paths}"')
    molecules, coordinates = open_xyz(file_paths)  # This time, we have to keep it in the for loop.
    print_bond_len(molecules , coordinates)

    

Printing bonds from "CMS Workshp (MolSSI data)/benzene.xyz"
C to H: 1.08757
C to C: 1.402719000263417
C to C: 1.402719000263417
C to H: 1.087578347062868
C to C: 1.40272
C to H: 1.087578347062868
C to C: 1.402719000263417
C to H: 1.08757
C to C: 1.402719000263417
C to H: 1.087578347062868
C to C: 1.40272
C to H: 1.087578347062868
Printing bonds from "CMS Workshp (MolSSI data)/buckminsterfullerene.xyz"
C to C: 1.453387123240054
C to C: 1.453387123240054
C to C: 1.395483919649381
C to C: 1.4533298490019395
C to C: 1.3954210045717386
C to C: 1.4534
C to C: 1.3953961229701048
C to C: 1.4533298490019395
C to C: 1.3953961229701048
C to C: 1.3954210045717386
C to C: 1.4533897412600656
C to C: 1.4533707372862574
C to C: 1.3954903367633902
C to C: 1.4533327595564616
C to C: 1.4533679678594817
C to C: 1.4533707372862574
C to C: 1.4533897412600656
C to C: 1.3954
C to C: 1.4533663646857937
C to C: 1.4533897412600656
C to C: 1.4533663646857937
C to C: 1.4533679678594817
C to C: 1.3954903367633902
C