In [None]:
"""This notebook acts as a place to test the geometry
module of kaplan.

Here, the caffeine molecule is found in caffeine.xyz
in the same directory as this notebook. It is converted
into an Xyz object using vetee. Then, openbabel is used
to convert this geometry into zmatrix format. Numpy is
used to randomly generate a list of dihedral angles. Then,
these dihedral angles replace the original dihedral
angles found in the zmatrix. Lastly, the zmatrix with
the updated dihedral angles is converted into xyz
(cartesian) coordinates using openbabel. The final
format of the xyz coordinates is as a list of lists,
where we have:
result = [[a1,x1,y1,z1], [a2,x2,y2,z2], ..., [an,xn,yn,zn]]

In result, the a's are atom strings (for example "Sn" would
be Tin) and the x's, y's, and z's are components of the
cartesian coordinates for that atom.

Note: pybel and openbabel are related; pybel is a python
wrapper for openbabel with a few syntactic simplicities.

"""

In [24]:
import pybel
import os
import vetee
import openbabel
import numpy as np
from vetee.gaussian_options import periodic_table

In [2]:
notebook = os.path.dirname(os.path.realpath("__file__"))
caffeine = os.path.join(notebook, "../testfiles/caffeine.xyz")
print(notebook)
print(os.path.isfile(caffeine))

/home/jenefre/Documents/python/pybel
True


In [3]:
caff_str = ""
with open(caffeine, 'r') as f:
    for line in f:
        caff_str += line
print(caff_str)

24
caffeine from pubchem
O    0.4700    2.5688    0.0006
O   -3.1271   -0.4436   -0.0003
N   -0.9686   -1.3125    0.0000
N    2.2182    0.1412   -0.0003
N   -1.3477    1.0797   -0.0001
N    1.4119   -1.9372    0.0002
C    0.8579    0.2592   -0.0008
C    0.3897   -1.0264   -0.0004
C    0.0307    1.4220   -0.0006
C   -1.9061   -0.2495   -0.0004
C    2.5032   -1.1998    0.0003
C   -1.4276   -2.6960    0.0008
C    3.1926    1.2061    0.0003
C   -2.2969    2.1881    0.0007
H    3.5163   -1.5787    0.0008
H   -1.0451   -3.1973   -0.8937
H   -2.5186   -2.7596    0.0011
H   -1.0447   -3.1963    0.8957
H    4.1992    0.7801    0.0002
H    3.0468    1.8092   -0.8992
H    3.0466    1.8083    0.9004
H   -1.8087    3.1651   -0.0003
H   -2.9322    2.1027    0.8881
H   -2.9346    2.1021   -0.8849



In [4]:
caff_mol = pybel.readstring("xyz", caff_str)

In [5]:
print(caff_mol)

O=c1n(c(=O)n(c2ncn(c12)C)C)C	caffeine from pubchem



In [6]:
vmol = vetee.xyz.Xyz(caffeine)
vmol.charge = 0
vmol.multip = 1

In [7]:
# from vetee
obmol = openbabel.OBMol()
# add coordinates for each atom
for atom in vmol.coords:
    obatom = openbabel.OBAtom()
    atomicnum = vetee.gaussian_options.periodic_table(atom[0])
    obatom.SetAtomicNum(atomicnum)
    obatom.SetVector(atom[1], atom[2], atom[3])
    obmol.AddAtom(obatom)
# set charge, multiplicity, and comments (title)
obmol.SetTotalCharge(vmol.charge)
obmol.SetTotalSpinMultiplicity(vmol.multip)
obmol.SetTitle(vmol.comments)
# convert the obmol to a pybel Molecule
pybelmol = pybel.Molecule(obmol)

In [8]:
zmatrix = pybelmol.write("gzmat")
print(zmatrix)

#Put Keywords Here, check Charge and Multiplicity.

 caffeine from pubchem

0  1
O
O  1  r2
N  2  r3  1  a3
N  1  r4  2  a4  3  d4
N  3  r5  1  a5  2  d5
N  4  r6  1  a6  2  d6
C  4  r7  1  a7  2  d7
C  7  r8  4  a8  1  d8
C  1  r9  2  a9  3  d9
C  2  r10  1  a10  3  d10
C  6  r11  4  a11  1  d11
C  3  r12  2  a12  1  d12
C  4  r13  1  a13  2  d13
C  5  r14  2  a14  1  d14
H  11  r15  6  a15  4  d15
H  12  r16  3  a16  2  d16
H  12  r17  3  a17  2  d17
H  12  r18  3  a18  2  d18
H  13  r19  4  a19  1  d19
H  13  r20  4  a20  1  d20
H  13  r21  4  a21  1  d21
H  14  r22  5  a22  2  d22
H  14  r23  5  a23  2  d23
H  14  r24  5  a24  2  d24
Variables:
r2= 4.6919
r3= 2.3268
a3=  61.87
r4= 2.9916
a4=  85.81
d4=   0.02
r5= 2.4221
a5=  29.34
d5=   0.01
r6= 2.2293
a6= 123.04
d6= 359.96
r7= 1.3654
a7=  49.28
d7=   0.03
r8= 1.3682
a8= 105.05
d8= 180.06
r9= 1.2281
a9=  29.10
d9=   0.10
r10= 1.2363
a10=  30.91
d10= 359.97
r11= 1.3171
a11=  34.75
d11= 180.06
r12= 1.4577
a12=  93.57
d12= 179.96
r13=

In [17]:
dihedrals = [np.random.randint(0, 360) for i in range(21)]
print(dihedrals)

[130, 116, 127, 355, 161, 145, 25, 109, 277, 164, 207, 240, 282, 116, 20, 292, 186, 83, 240, 177, 45]


In [22]:
zmatrix_list = zmatrix.split('\n')
dihedral_num = 0
for i, line in enumerate(zmatrix_list):
    if line.startswith('d') and '=' in line:
        line = line[:line.index('=')+1] + str(dihedrals[dihedral_num])
        zmatrix_list[i] = line
        dihedral_num += 1
        print(line)

d4=130
d5=116
d6=127
d7=355
d8=161
d9=145
d10=25
d11=109
d12=277
d13=164
d14=207
d15=240
d16=282
d17=116
d18=20
d19=292
d20=186
d21=83
d22=240
d23=177
d24=45


In [23]:
new_zmatrix = '\n'.join(zmatrix_list)
print(new_zmatrix)

#Put Keywords Here, check Charge and Multiplicity.

 caffeine from pubchem

0  1
O
O  1  r2
N  2  r3  1  a3
N  1  r4  2  a4  3  d4
N  3  r5  1  a5  2  d5
N  4  r6  1  a6  2  d6
C  4  r7  1  a7  2  d7
C  7  r8  4  a8  1  d8
C  1  r9  2  a9  3  d9
C  2  r10  1  a10  3  d10
C  6  r11  4  a11  1  d11
C  3  r12  2  a12  1  d12
C  4  r13  1  a13  2  d13
C  5  r14  2  a14  1  d14
H  11  r15  6  a15  4  d15
H  12  r16  3  a16  2  d16
H  12  r17  3  a17  2  d17
H  12  r18  3  a18  2  d18
H  13  r19  4  a19  1  d19
H  13  r20  4  a20  1  d20
H  13  r21  4  a21  1  d21
H  14  r22  5  a22  2  d22
H  14  r23  5  a23  2  d23
H  14  r24  5  a24  2  d24
Variables:
r2= 4.6919
r3= 2.3268
a3=  61.87
r4= 2.9916
a4=  85.81
d4=130
r5= 2.4221
a5=  29.34
d5=116
r6= 2.2293
a6= 123.04
d6=127
r7= 1.3654
a7=  49.28
d7=355
r8= 1.3682
a8= 105.05
d8=161
r9= 1.2281
a9=  29.10
d9=145
r10= 1.2363
a10=  30.91
d10=25
r11= 1.3171
a11=  34.75
d11=109
r12= 1.4577
a12=  93.57
d12=277
r13= 1.4434
a13=  78.22
d13=164
r14= 1.45

In [28]:
# regenerate an xyz coordinates specification from the zmatrix
#caff_mol = pybel.readstring("xyz", caff_str)
caff_mol = pybel.readstring("gzmat", new_zmatrix)

result_geom = []

#mol = pybel.readfile('gzmat', com_file).__next__()
for atom in caff_mol.atoms:
    result_geom.append([periodic_table(atom.atomicnum), atom.coords[0], atom.coords[1], atom.coords[2]])

In [29]:
print(result_geom)

[['O', 0.0, 0.0, 0.0], ['O', 4.6919, 0.0, 0.0], ['N', 3.5948750014016104, 1.2564623099511773e-16, 2.051958672208142], ['N', 0.21857865669282045, 2.285573406140432, -1.917823802121036], ['N', 1.503252913986564, 1.0666955765784696, 1.457109111104285], ['N', -0.8142802326941884, 2.317781994753094, -3.8931575995926977], ['C', 1.1816502665747366, 1.6053279268547342, -1.2292917969456587], ['C', 2.348523652532836, 1.8743823744892163, -1.8910979961843668], ['C', 1.0730796671098546, 0.3425791267036132, -0.48925369695321963], ['C', 3.63118518260272, -0.2683947951742589, 0.5755744956799438], ['C', -0.7833230730049165, 2.8289306558630303, -2.679682791238815], ['C', 3.3957192270227066, 1.4440268975054498, 2.0484108824277607], ['C', -1.1575842207564815, 1.8861139074952171, -2.0910617993172953], ['C', 2.1442235004775005, 2.26951321337531, 1.9785905871549023], ['H', -1.5899054244761082, 2.7864514805518152, -1.960322665877337], ['H', 4.333460915826014, 1.9322688136865867, 2.331203591605515], ['H', 2.33