-
Notifications
You must be signed in to change notification settings - Fork 0
/
sdf_parser.py
48 lines (43 loc) · 1.65 KB
/
sdf_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
## a script to convert simple molecules from mol to csv
import json
import pandas as pd
colors = json.loads(open("colors.json").read())
colors = {key:[e[0]/255,e[1]/255,e[2]/255] for key,e in colors.items()}
## normalize colors
## make a rep of the molecule using https://en.wikipedia.org/wiki/Chemical_table_file
# seems to be based on leading spaces
with open("ATP_model.sdf","r") as phile:
atom_block =[]
bond_block = []
for lineind,line in enumerate(phile):
# number of leading white space chars
for i,char in enumerate(line):
if not char.isspace():
break
print(lineind,i,line)
if i == 1:
# we are in atom counter
pass
elif i == 3:
## fancy trick for removing consecutive white spaces
atom_block.append(' '.join(line.split()).split())
elif i == 2:
bond_block.append(' '.join(line.split()).split())
print(atom_block)
print()
print(bond_block)
# loop over bonds make an entry for each
# this is complete with starting position and color and connection index
# TODO think about how to prevent redrawing molecules that have multiple bonds
rows = []
for bond in bond_block:
start_ind = int(bond[0])
end_ind = int(bond[1])
bond_type = int(bond[2])
start_atom = atom_block[start_ind][:4]
start_color = color[start_atom[3]]
end_atom = atom_block[end_ind][:4]
# could include something to indicate whether the start atom need's being drawn the first time or not?
row_data = dict(
X =
)