/
site_water_analysis.py
624 lines (584 loc) · 33.6 KB
/
site_water_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
import sys
import time
import datetime
import os
import numpy as np
from scipy import stats, spatial
import mdtraj as md
import parmed as pmd
from progressbar import Bar, Percentage, ProgressBar, ETA
from water_analysis import WaterAnalysis, NeighborSearch, function_timer
#import _sstmap_ext_old as calc_old
import _sstmap_ext as calc
class SiteWaterAnalysis(WaterAnalysis):
"""
"""
def __init__(self, topology_file, trajectory, start_frame=0, num_frames=0,
ligand_file=None, cluster_center_file=None,
desmond_helper_file=None, prefix="test"):
super(SiteWaterAnalysis, self).__init__(topology_file, trajectory,
start_frame, num_frames, desmond_helper_file)
self.prefix = prefix
self.site_waters = None
if cluster_center_file == None and ligand_file == None:
sys.exit("Please provide either a ligand file for clustering or\
a cluster center file to generate hydration sites.")
if ligand_file != None:
cluster_coords, self.site_waters = self.generate_clusters(ligand_file)
self.hsa_data, self.hsa_dict = self.initialize_site_data(cluster_coords)
if cluster_center_file != None:
clusters_pdb_file = md.load_pdb(cluster_center_file)
cluster_coords = md.utils.in_units_of(clusters_pdb_file.xyz[0, :, :], "nanometers", "angstroms")
self.hsa_data, self.hsa_dict = self.initialize_site_data(cluster_coords)
def initialize_site_data(self, cluster_coords):
self.data_titles = ["index", "x", "y", "z",
"nwat", "occupancy",
"Esw", "EswLJ", "EswElec", "Eww", "EwwLJ", "EwwElec", "Etot", "Ewwnbr",
"TSsw", "TSww", "TStot",
"Nnbrs", "Nhbww", "Nhbsw", "Nhbtot",
"f_hb_ww", "f_enc",
"Acc_ww", "Don_ww", "Acc_sw", "Don_sw",
"solute_acceptors", "solute_donors"]
n_sites = cluster_coords.shape[0]
site_array = np.zeros((n_sites, len(self.data_titles)))
site_dict = {}
for site_i in xrange(n_sites):
site_array[site_i, 0] = site_i
site_array[site_i, 1] = cluster_coords[site_i, 0]
site_array[site_i, 2] = cluster_coords[site_i, 1]
site_array[site_i, 3] = cluster_coords[site_i, 2]
site_dict[site_i] = [[] for i in range(len(self.data_titles))]
return site_array, site_dict
@function_timer
def generate_clusters(self, ligand_file):
# Obtain binding site solute atoms using ligand atom coordinates
ligand = md.load_pdb(ligand_file)
ligand_coords = md.utils.in_units_of(ligand.xyz[0, :, :], "nanometers", "angstroms", inplace=True)
first_frame = md.load_frame(self.trajectory, 0, top=self.topology_file)
solute_pos = md.utils.in_units_of(first_frame.xyz[0, self.non_water_atom_ids, :], "nanometers", "angstroms")
search_space = NeighborSearch(solute_pos, 5.0)
near_indices = search_space.query_nbrs_multiple_points(ligand_coords)
binding_site_atom_indices = [self.non_water_atom_ids[nbr_index] for nbr_index in near_indices]
# Obtain water molecules solvating the binding site
stride = 10
print "Reading in trajectory for clustering."
trj = md.load(self.trajectory, top=self.topology)
trj_short = trj[self.start_frame:self.start_frame + trj.n_frames:stride]
print "Obtaining a superconfiguration of all water molecules found in the binding site throught the trajectory."
binding_site_waters = md.compute_neighbors(trj_short, 0.50, binding_site_atom_indices, haystack_indices=self.wat_oxygen_atom_ids)
# generate a list of all waters with their frame ids
water_id_frame_list = [(i, nbr) for i in range(len(binding_site_waters)) for nbr in binding_site_waters[i]]
# Set up clustering loop
print "Performing clustering on the superconfiguration."
cutoff = trj_short.n_frames * 2 * 0.1401
if np.ceil(cutoff) - cutoff <= 0.5:
cutoff = np.ceil(cutoff)
else:
cutoff = np.floor(cutoff)
n_wat = 3 * cutoff
cluster_list = []
cluster_iter = 0
sphere_radius = 1.0
# Build KDTree and get initial neighbor count for all waters
water_coordinates = np.ma.array([trj_short.xyz[wat[0], wat[1], :] for wat in water_id_frame_list], mask=False)*10.0
tree = spatial.cKDTree(water_coordinates)
nbr_list = tree.query_ball_point(water_coordinates, sphere_radius)
nbr_count_list = np.ma.array([len(nbrs) for nbrs in nbr_list], mask=False)
# Clustering loop
initial_time = time.time()
while n_wat > cutoff:
cluster_iter += 1
# get water with max nbrs and retrieve its nbrs, which are later marked for exclusion
print "\tCluster iteration: ", cluster_iter
max_index = np.argmax(nbr_count_list)
to_exclude = np.array(nbr_list[max_index])
# set current water count to current neighbors plus one for the water itself
n_wat = len(to_exclude) + 1
cluster_list.append(water_id_frame_list[max_index])
# Mask current water and its nbrs so that they are not considered in the next iteration
nbr_count_list.mask[to_exclude] = True
nbr_count_list.mask[max_index] = True
# Create a version of water coordinates, after deleting the coordinates of waters set for exclsuion
updated_wat_coords = np.delete(water_coordinates, to_exclude, 0)
updated_wat_coords = np.delete(updated_wat_coords, max_index, 0)
# For each members of this cluster, get its neighbors, create a unique set out of this list
nbrs_of_to_exclude = np.unique(np.array([n_excluded for excluded_nbrs in nbr_list[to_exclude] for n_excluded in excluded_nbrs]))
# Remove original members of the cluster from this list to get the final list of waters to update
to_update = np.setdiff1d(nbrs_of_to_exclude, to_exclude)
to_update = np.setdiff1d(to_update, np.asarray(max_index))
# Update the neighbor count for each water in the update list
tree = spatial.cKDTree(updated_wat_coords)
updated_nbr_list = tree.query_ball_point(water_coordinates[to_update], sphere_radius)
# for each updated member, get its original index and update the original neighbor search list
for index, nbrs in enumerate(updated_nbr_list):
if not nbr_count_list.mask[to_update[index]]:
nbr_count_list[to_update[index]] = len(nbrs)
init_cluster_coords = [trj_short.xyz[cluster[0], cluster[1], :]*10.0 for cluster in cluster_list]
print "Initial number of clusters: ", len(init_cluster_coords)
with open("initial_clustercenterfile.pdb", "w") as f:
header = "REMARK Initial number of clusters: {0}\n".format(len(init_cluster_coords))
f.write(header)
for cluster in init_cluster_coords:
pdb_line = "ATOM 1 O WAT A 1 {0[0]:2.3f} {0[1]:2.3f} {0[2]:2.3f} 0.00 0.00\n".format(cluster)
f.write(pdb_line)
print "Refining initial cluster positions by considering %d frames." % trj.n_frames
binding_site_waters = md.compute_neighbors(trj, 0.50, binding_site_atom_indices, haystack_indices=self.wat_oxygen_atom_ids)
water_id_frame_list = [(i, nbr) for i in range(len(binding_site_waters)) for nbr in binding_site_waters[i]]
water_coordinates = np.array([trj.xyz[wat[0], wat[1], :] for wat in water_id_frame_list])*10.0
tree = spatial.cKDTree(water_coordinates)
nbr_list = tree.query_ball_point(init_cluster_coords, sphere_radius)
final_cluster_coords = []
cutoff = int(trj.n_frames * 2 * 0.1401)
if np.ceil(cutoff) - cutoff <= 0.5:
cutoff = np.ceil(cutoff)
else:
cutoff = np.floor(cutoff)
# for each cluster, set cluster center equal to geometric center of all waters in the cluster
site_waters = []
for cluster in nbr_list:
cluster_water_coords = water_coordinates[cluster]
if len(cluster) > cutoff:
waters = [water_id_frame_list[water] for water in cluster]
site_waters.append(waters)
com = np.zeros(3)
masses = np.ones(cluster_water_coords.shape[0])
masses /= masses.sum()
com[:] = water_coordinates[cluster].T.dot(masses)
cluster_center = com[:]
final_cluster_coords.append(cluster_center)
with open("clustercenterfile.pdb", "w") as f:
header = "REMARK Initial number of clusters: {0}\n".format(
len(final_cluster_coords))
f.write(header)
for cluster in final_cluster_coords:
pdb_line = "ATOM 1 O WAT A 1 {0[0]:2.3f} {0[1]:2.3f} {0[2]:2.3f} 0.00 0.00\n".format(cluster)
f.write(pdb_line)
print "Final number of clusters: ", len(final_cluster_coords)
return np.asarray(final_cluster_coords), site_waters
@function_timer
def calculate_site_quantities(self, energy=True, hbonds=True, entropy=True):
'''
TODO: replace TIP3P nbr count constant with variable that depends on water model
'''
pbar = ProgressBar(widgets=[Percentage(), Bar(), ETA()], maxval=self.start_frame + self.num_frames).start()
for frame_i in xrange(self.start_frame, self.start_frame + self.num_frames):
frame = md.load_frame(self.trajectory, frame_i, top=self.topology)
pos = md.utils.in_units_of(frame.xyz, "nanometers", "angstroms")
pbc = md.utils.in_units_of(frame.unitcell_lengths, "nanometers", "angstroms")
oxygen_pos = pos[0, self.wat_oxygen_atom_ids, :]
cluster_search_space = NeighborSearch(oxygen_pos, 1.0)
water_search_space = NeighborSearch(oxygen_pos, 3.5)
for site_i in xrange(self.hsa_data.shape[0]):
wat_O = None
if self.site_waters is not None:
if len(self.site_waters[site_i]) != 0:
if self.site_waters[site_i][0][0] == frame_i:
wat_O = self.site_waters[site_i].pop(0)[1]
self.hsa_data[site_i, 4] += 1
else:
cluster_center_coords = (self.hsa_data[site_i, 1], self.hsa_data[site_i, 2], self.hsa_data[site_i, 3])
nbr_indices = cluster_search_space.query_nbrs_single_point(cluster_center_coords)
cluster_wat_oxygens = [self.wat_oxygen_atom_ids[nbr_index] for nbr_index in nbr_indices]
if len(cluster_wat_oxygens) != 0:
wat_O = cluster_wat_oxygens[0]
self.hsa_data[site_i, 4] += 1
if wat_O is not None and (energy or hbonds):
distance_matrix = np.zeros((self.water_sites, self.all_atom_ids.shape[0]), np.float_)
calc.get_pairwise_distances(np.asarray([site_i, wat_O]), self.all_atom_ids, pos, pbc, distance_matrix)
wat_nbrs = self.wat_oxygen_atom_ids[np.where((distance_matrix[0, :][self.wat_oxygen_atom_ids] <= 3.5) & (distance_matrix[0, :][self.wat_oxygen_atom_ids] > 0.0))]
prot_nbrs = self.non_water_atom_ids[np.where(distance_matrix[0, :][self.non_water_atom_ids] <= 3.5)]
prot_nbrs = np.asarray([prot_nbr for prot_nbr in prot_nbrs if self.topology.atom(prot_nbr).name[0] not in ["C", "H"]])
self.hsa_dict[site_i][17].append(wat_nbrs.shape[0])
#TODO: 5.25 should be replaced with a variable
self.hsa_dict[site_i][22].append(1.0 - (wat_nbrs.shape[0]/5.25))
if energy:
energy_lj, energy_elec = self.calculate_energy(distance_matrix)
e_lj_sw = np.sum(energy_lj[:self.wat_oxygen_atom_ids[0]:])
e_elec_sw = np.sum(energy_elec[:, self.non_water_atom_ids])
e_lj_ww = np.nansum(energy_lj[self.wat_oxygen_atom_ids[0]:])
e_elec_ww = np.sum(energy_elec[:, self.wat_atom_ids[0]:wat_O]) + np.sum(energy_elec[:, wat_O + self.water_sites:])
self.hsa_dict[site_i][7].append(e_lj_sw)
self.hsa_dict[site_i][8].append(e_elec_sw)
self.hsa_dict[site_i][10].append(e_lj_ww)
self.hsa_dict[site_i][11].append(e_elec_ww)
self.hsa_dict[site_i][6].append(e_lj_sw + e_elec_sw)
self.hsa_dict[site_i][9].append(e_lj_ww + e_elec_ww)
self.hsa_dict[site_i][12].append(e_lj_sw + e_elec_sw + e_lj_ww + e_elec_ww)
#print "Solute-water LJ Energy of this water: ", e_lj_sw
#print "Solute-water Elec Energy of this water: ", e_elec_sw
#print "Water-water LJ Energy of this water: ", e_lj_ww
#print "Water-water Elec Energy of this water: ", e_elec_ww
# calc Enbr and other water structure metrics here
for nbr_i in wat_nbrs:
e_nbr_i = 0.0
e_nbr_i += energy_lj[self.wat_oxygen_atom_ids[0]:][(nbr_i - self.wat_oxygen_atom_ids[0])/3]
for i in xrange(self.water_sites):
e_nbr_i += np.sum(energy_elec[:, nbr_i + i])
self.hsa_dict[site_i][13].append(e_nbr_i)
if hbonds:
hb_ww, hb_sw = self.calculate_hydrogen_bonds(frame, wat_O, wat_nbrs, prot_nbrs)
acc_ww = hb_ww[:, 0][np.where(hb_ww[:, 0] == wat_O)].shape[0]
don_ww = hb_ww.shape[0] - acc_ww
acc_sw = hb_sw[:, 0][np.where(hb_sw[:, 0] == wat_O)].shape[0]
don_sw = hb_sw.shape[0] - acc_sw
# FIXME: Spurious atom names showing up in summary
don_sw_ids = hb_sw[:, 1][np.where(hb_sw[:, 0] == wat_O)]
acc_sw_ids = hb_sw[:, 0][np.where(hb_sw[:, 0] != wat_O)]
self.hsa_dict[site_i][18].append(hb_ww.shape[0])
self.hsa_dict[site_i][19].append(hb_sw.shape[0])
self.hsa_dict[site_i][20].append(hb_ww.shape[0] + hb_sw.shape[0])
self.hsa_dict[site_i][23].append(acc_ww)
self.hsa_dict[site_i][24].append(don_ww)
self.hsa_dict[site_i][25].append(acc_sw)
self.hsa_dict[site_i][26].append(don_sw)
self.hsa_dict[site_i][27].extend(acc_sw_ids)
self.hsa_dict[site_i][28].extend(don_sw_ids)
if wat_nbrs.shape[0]!= 0 and hb_ww.shape[0] != 0:
self.hsa_dict[site_i][28].append(wat_nbrs.shape[0]/hb_ww.shape[0])
pbar.update(frame_i)
pbar.finish()
# normalize site quantities
print "Start Normalization"
sphere_volume = (4 / 3) * np.pi
bulk_water_per_site = self.rho_bulk * sphere_volume * self.num_frames
skip_normalization = ["index", "x", "y", "z", "nwat", "occupancy", "gO",
"TSsw", "TSww", "TStot", "solute_acceptors", "solute_donors"]
for site_i in xrange(self.hsa_data.shape[0]):
n_wat = self.hsa_data[site_i, 4]
if n_wat != 0:
self.hsa_data[site_i, 5] = n_wat/(self.start_frame + self.num_frames)
for quantity_i in xrange(len(self.data_titles)):
if self.data_titles[quantity_i] not in skip_normalization:
if self.data_titles[quantity_i] in ["Esw", "EswLJ", "EswElec", "Eww", "EwwLJ", "EwwElec", "Etot"]:
self.hsa_data[site_i, quantity_i] = (np.sum(self.hsa_dict[site_i][quantity_i])/n_wat)*0.5
elif self.data_titles[quantity_i] in ["Ewwnbr"]:
self.hsa_data[site_i, quantity_i] = (np.sum(self.hsa_dict[site_i][quantity_i])/len(self.hsa_dict[site_i][quantity_i]))*0.5
else:
self.hsa_data[site_i, quantity_i] = np.sum(self.hsa_dict[site_i][quantity_i])/n_wat
if self.data_titles[quantity_i] in ["solute_acceptors", "solute_donors"]:
self.hsa_dict[site_i][quantity_i] = np.unique(self.hsa_dict[site_i][quantity_i])
print "End Normalization"
def calculate_angular_structure(self, dist_cutoff=6.0, site_indices=[]):
'''
Returns energetic quantities for each hydration site
'''
pbar = ProgressBar(widgets=[Percentage(), Bar(), ETA()], maxval=self.start_frame + self.num_frames).start()
angular_strucure_data = [[] for site in self.hsa_data]
for i in xrange(self.start_frame, self.start_frame + self.num_frames):
# print "Processing frame: ", i+1, "..."
frame = md.load_frame(self.trajectory, i, top=self.topology)
pos = frame.xyz[0, :, :] * 10.0
pbc = frame.unitcell_lengths[0] * 10.0
#pos = self.trj[i].xyz[0,:,:]*10.0
# obtain coords of O-atoms
oxygen_pos = pos[self.wat_oxygen_atom_ids]
cluster_search_space = NeighborSearch(oxygen_pos, 1.0)
water_search_space = NeighborSearch(oxygen_pos, dist_cutoff)
for site_i, site_data in enumerate(self.hsa_data):
#cluster_center_coords = (site_data["x"], site_data["y"], site_data["z"])
cluster_center_coords = (site_data[1][0], site_data[1][1], site_data[1][2])
nbr_indices = cluster_search_space.query_nbrs_single_point(cluster_center_coords)
cluster_wat_oxygens = [self.wat_oxygen_atom_ids[nbr_index] for nbr_index in nbr_indices]
# begin iterating over water oxygens found in this cluster in
# current frame
for wat_O in cluster_wat_oxygens:
cluster_water_all_atoms = self.topology.select("resid " + str(self.topology.atom(wat_O).residue.index))
nbr_indices = water_search_space.query_point_and_distance(pos[wat_O])
firstshell_wat_oxygens = [self.wat_oxygen_atom_ids[nbr_index[0]] for nbr_index in nbr_indices]
nbr_dist = [nbr_index[1] for nbr_index in nbr_indices]
#nbr_angle = []
if len(firstshell_wat_oxygens) != 0:
nbr_water_residues = [self.topology.select("resid " + str(self.topology.atom(
nbr_wat_O).residue.index)) for nbr_wat_O in firstshell_wat_oxygens]
for idx, water_res in enumerate(nbr_water_residues):
hbangle_combinations = np.asarray([[wat_O, water_res[0], water_res[1]], [wat_O, water_res[0],
water_res[2]], [water_res[0], wat_O, wat_O + 1], [water_res[0], wat_O, wat_O + 2]])
theta_list = md.compute_angles(frame, hbangle_combinations) * DEG_PER_RAD
#nbr_angle.append(np.min(theta_list)) # min angle is a potential Hbond
angular_strucure_data[site_i].append([nbr_dist[idx], np.min(theta_list)])
pbar.update(i + 1)
pbar.finish()
cwd = os.getcwd()
# create directory to store detailed data for individual columns in HSA
directory = cwd + "/" + self.prefix + "_angular_data"
if not os.path.exists(directory):
os.makedirs(directory)
os.chdir(directory)
for site_i, data in enumerate(angular_strucure_data):
with open("%03d_r_theta.txt" % site_i, "w") as f:
for d in data:
line = "{0[0]:.3f} {0[1]:.3f}\n".format(d)
f.write(line)
def calculate_lonranged_ww_energy(self, site_indices=[], shell_radii=[3.5, 5.5, 8.5]):
if len(site_indices) == 0:
site_indices = [int(i) for i in self.hsa_data[:, 0]]
else:
for index in site_indices:
if index > self.hsa_data[:, 0][-1]:
sys.exit("Site %d does not exits, please provide valid site indices." % index)
n_sites = len(site_indices)
n_columns = 2*len(shell_radii)
shells = [(0.0, shell_radii[0])]
for i in xrange(1, len(shell_radii)):
shells.append((shell_radii[i - 1], shell_radii[i]))
shells.append((shell_radii[-1], 100.0))
longranged_data = np.zeros((n_sites, 2*len(shells)))
pbar = ProgressBar(widgets=[Percentage(), Bar(), ETA()], maxval=self.start_frame + self.num_frames).start()
for frame_i in xrange(self.start_frame, self.start_frame + self.num_frames):
frame = md.load_frame(self.trajectory, frame_i, top=self.topology)
pos = md.utils.in_units_of(frame.xyz, "nanometers", "angstroms")
pbc = md.utils.in_units_of(frame.unitcell_lengths, "nanometers", "angstroms")
oxygen_pos = pos[0, self.wat_oxygen_atom_ids, :]
cluster_search_space = NeighborSearch(oxygen_pos, 1.0)
water_search_space = NeighborSearch(oxygen_pos, 3.5)
for index, site_i in enumerate(site_indices):
wat_O = None
if self.site_waters is not None:
if len(self.site_waters[site_i]) != 0:
if self.site_waters[site_i][0][0] == frame_i:
wat_O = self.site_waters[site_i].pop(0)[1]
self.hsa_data[site_i, 4] += 1
else:
cluster_center_coords = (self.hsa_data[site_i, 1], self.hsa_data[site_i, 2], self.hsa_data[site_i, 3])
nbr_indices = cluster_search_space.query_nbrs_single_point(cluster_center_coords)
cluster_wat_oxygens = [self.wat_oxygen_atom_ids[nbr_index] for nbr_index in nbr_indices]
if len(cluster_wat_oxygens) != 0:
wat_O = cluster_wat_oxygens[0]
self.hsa_data[site_i, 4] += 1
if wat_O is not None:
distance_matrix = np.zeros((self.water_sites, self.all_atom_ids.shape[0]), np.float_)
calc.get_pairwise_distances(np.asarray([site_i, wat_O]), self.all_atom_ids, pos, pbc, distance_matrix)
energy_lj, energy_elec = self.calculate_energy(distance_matrix)
for shell_index, shell in enumerate(shells):
wat_nbrs = self.wat_oxygen_atom_ids[np.where((distance_matrix[0, :][self.wat_oxygen_atom_ids] <= shell[1]) & (distance_matrix[0, :][self.wat_oxygen_atom_ids] > shell[0]))]
longranged_data[index, 2*shell_index] += wat_nbrs.shape[0]
for i in xrange(self.water_sites):
longranged_data[index, (2*shell_index) + 1] += np.sum(energy_elec[:, wat_nbrs + i])
pbar.update(frame_i)
pbar.finish()
# normalize site quantities
for index, site_i in enumerate(site_indices):
n_wat = self.hsa_data[site_i, 4]
if n_wat != 0:
longranged_data[index, :] /= n_wat*2.0
# write data
with open(self.prefix + "_longrange_Eww_summary.txt", "w") as f:
header = "index "
formatted_output = "{0:.0f} "
for shell_index, shell in enumerate(shells):
if shell_index + 1 == len(shells):
header += "Nnbr_>" + str(shell_index) + " E_shell_>" + str(shell_index) + "\n"
formatted_output += "{1[%d]:.6f} {1[%d]:.6f}\n" % (2*shell_index, (2*shell_index) + 1)
else:
header += "Nnbr_" + str(shell_index + 1) + " E_shell_" + str(shell_index + 1) + " "
formatted_output += "{1[%d]:.6f} {1[%d]:.6f} " % (2*shell_index, (2*shell_index) + 1)
f.write(header)
for index, site_i in enumerate(site_indices):
n_wat = self.hsa_data[site_i, 4]
site_data_line = formatted_output.format(self.hsa_data[site_i, 0], longranged_data[index, :])
f.write(site_data_line)
def print_system_summary(self):
print "System information:"
print "\tParameter file: %s\n" % self.topology_file
print "\tTrajectory: %s\n" % self.trajectory
print "\tNumber of clusters: %d\n" % len(self.hsa_data)
print "\tPeriodic Box: %s\n" % self.box_type
print "\tFrames: %d, Total Atoms: %d, Waters: %d, Solute Atoms: %d\n" \
% (self.num_frames, self.all_atom_ids.shape[0], self.wat_oxygen_atom_ids.shape[0], self.non_water_atom_ids.shape[0])
print "\tH-bond Donors: %d, H-bond Acceptors: %d, H-bond Donors & Acceptors: %d\n" \
% (self.solute_don_ids.shape[0], self.solute_acc_ids.shape[0], self.solute_acc_don_ids.shape[0])
@function_timer
def write_summary(self):
with open(self.prefix + "_hsa_summary.txt", "w") as f:
header = " ".join(self.data_titles) + "\n"
f.write(header)
# format first six columns
formatted_output = "{0[0]:.0f} {0[1]:.2f} {0[2]:.2f} {0[3]:.2f} {0[4]:.0f} {0[5]:.2f} "
# format site energetic, entropic and structural data
for quantity_i in xrange(6, len(self.data_titles) - 2):
formatted_output += "{0[%d]:.6f} " % quantity_i
# format solute acceptors and donors
formatted_output += "{1} {2}\n"
for site_i in xrange(self.hsa_data.shape[0]):
solute_acceptors = [str(self.topology.atom(acceptor)) for acceptor in self.hsa_dict[site_i][27]]
solute_donors = [str(self.topology.atom(donor)) for donor in self.hsa_dict[site_i][28]]
site_data_line = formatted_output.format(self.hsa_data[site_i, :],
",".join(solute_acceptors),
",".join(solute_donors))
f.write(site_data_line)
@function_timer
def write_data(self):
"""
TODO: output energy quantities in half
"""
skip_write_data = ["x", "y", "z", "nwat", "occupancy", "gO",
"TSsw", "TSww", "TStot", "solute_acceptors", "solute_donors"]
cwd = os.getcwd()
# create directory to store detailed data for individual columns in HSA
directory = cwd + "/" + self.prefix + "_hsa_data"
if not os.path.exists(directory):
os.makedirs(directory)
os.chdir(directory)
# for each cluster, go through time series data
for site_i in xrange(self.hsa_data.shape[0]):
site_index = "%03d_" % site_i
for quantity_i in xrange(len(self.data_titles)):
if self.data_titles[quantity_i] not in skip_write_data and len(
self.hsa_dict[site_i][quantity_i]) != 0:
data_file_name = site_index + self.prefix + "_" + self.data_titles[quantity_i] + ".txt"
with open(data_file_name, "w") as data_file:
data_file.writelines("%s\n" % item for item in self.hsa_dict[site_i][quantity_i])
os.chdir("../")
def plot_enbr_distribution(site_data, data_dir, sites=[], nbr_norm = False):
"""
generate an Enbr plot for an arbitrary list of sites. First site should be the reference system.
sites: a list of keys which represent site labels
data: a dictionary of sites
x_values: data points on x-axis
nbr_norm: Normalize by number of neighbors
outname: name of output file
"""
colors = ["green", "blue", "red", "orange"]
for k in data.keys():
"generating Enbr plot for: ", k
y_values = data[k][0]*data[k][1]
#for val in range(len(x_values)):
# if x_values[val] <= -3.26:
# y_values[val] = 0.0
fig, ax = plt.subplots(1)
fig.set_size_inches(3, 3)
# set some basic settings for fonts and rendering
# set x and y axes limits, hard coded for now
plt.xlim(-4.0, 2.5)
plt.ylim(0.0, 1.0)
# set x and y axes label titles and font sizes
x_label = r'$\mathit{E_{n} (kcal/mol)}$'
y_label = r'$\mathit{\rho(E_{n})}$'
# set some features for the tick marks and labels
ax.xaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f'))
ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f'))
start, end = ax.get_xlim()
ax.xaxis.set_ticks(np.arange(start, end, 2.0))
#start, end = ax.get_ylim()
#ax.yaxis.set_ticks(np.arange(start, end, 0.1))
if nbr_norm:
y_label = r'$\mathit{\rho(E_{n})/N^{nbr}}$'
plt.ylim(0.0, 0.5)
start, end = ax.get_ylim()
ax.yaxis.set_ticks(np.arange(start, end, 0.1))
ax.set_xlabel(x_label, size=14)
ax.set_ylabel(y_label, size=14)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
plt.minorticks_on()
plt.tick_params(which='major', width=1, length=4, direction='in')
plt.tick_params(which='minor', width=1, length=2, direction='in')
plt.tick_params(axis='x', labelsize=12)
plt.tick_params(axis='y', labelsize=12)
ax.yaxis.tick_left()
ax.xaxis.tick_bottom()
# set legend locations
# save figure with optimal settings
plt.tight_layout()
#plt.plot(x_1, y_1, antialiased=True, linewidth=1.0, color="red", label="Methane")
plt.plot(x_values, y_values, antialiased=True, linewidth=1.0, color="red", label=k)
plt.legend(loc='upper right', prop={'size':10}, frameon=False)
plt.savefig(save_dir + "/" + k + "_Enbr_plot.png", dpi=300)
plt.close()
"""
set sites to all sites if sites==[] (take all sites from site_data)
for each site in sites
get enbr_data into an array
construct kernel density estimate
get a range of x_values
evaluate density at x_values
set plot formatting
plot data
save plot
"""
#site_data, data_dir, sites=[],
def plot_rtheta_distribution(rtheta_file, site_data, legend_label, nwat):
integ_counts = 16.3624445886
#integ_counts = 22560
fig = plt.figure()
ax = fig.gca(projection='3d')
theta_data = np.loadtxt(theta_file)
r_data = np.loadtxt(r_file)
Nnbr = len(r_data)/nwat
print nwat, Nnbr
# generate index matrices
X, Y = np.mgrid[0:130:131j, 2.0:5.0:31j]
# generate kernel density estimates
values = np.vstack([theta_data, r_data])
kernel = stats.gaussian_kde(values)
positions = np.vstack([X.ravel(), Y.ravel()])
Z = np.reshape(kernel(positions).T, X.shape)
Z *= integ_counts*0.1
#Z /= integ_counts
sum_counts_kernel = 0
#print kernel.n
# correct Z
for i in xrange(0, Y.shape[1]):
d = Y[0,i]
# get shell_vol
d_low = d - 0.1
vol = (4.0/3.0)*np.pi*(d**3)
vol_low = (4.0/3.0)*np.pi*(d_low**3)
shell_vol = vol - vol_low
counts_bulk = 0.0329*shell_vol
sum_counts_kernel += np.sum(Z[:,i])
#Z[:,i] /= counts_bulk
Z[:,i] = Z[:,i]/counts_bulk
print sum_counts_kernel
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, linewidth=0.5, antialiased=True, alpha=1.0, cmap=cm.coolwarm, label=legend_label)
x_label = r"$\theta^\circ$"
y_label = r"$r (\AA)$"
ax.set_xlabel(x_label)
ax.set_xlim(0, 130)
ax.set_ylabel(y_label)
ax.set_ylim(2.0, 5.0)
z_label = r'$\mathrm{P(\theta, \AA)}$'
ax.set_zlabel(z_label)
ax.legend(legend_label, loc='upper left', prop={'size':6})
ax.set_zlim(0.0, 0.15)
plt.savefig(legend_label + ".png", dpi=300)
plt.close()
def read_hsa_summary(hsa_data_file):
'''
Returns a dictionary with hydration site index as keys and a list of various attributes as values.
Parameters
----------
hsa_data_file : string
Text file containing
Returns
-------
'''
f = open(hsa_data_file, 'r')
data = f.readlines()
hsa_header = data[0]
data_keys = hsa_header.strip("\n").split()
hsa_data = {}
for l in data[1:]:
float_converted_data = [float(x) for x in l.strip("\n").split()[1:27]]
hsa_data[int(l.strip("\n").split()[0])] = float_converted_data
f.close()
return hsa_data
def main():
test_1 = False
if test_1:
hsa = SiteWaterAnalysis("/Users/kamranhaider/arg_gist_calcs/mdtraj/arg.prmtop", "/Users/kamranhaider/arg_gist_calcs/mdtraj/md10ns.ncdf", start_frame=0, num_frames=1000, ligand_file="/Users/kamranhaider/arg_gist_calcs/mdtraj/ligand_arg.pdb", prefix="test_1")
hsa.print_system_summary()
hsa.calculate_site_quantities()
hsa.write_summary()
hsa.write_data()
else:
hsa = SiteWaterAnalysis("/Users/kamranhaider/arg_gist_calcs/mdtraj/arg.prmtop", "/Users/kamranhaider/arg_gist_calcs/mdtraj/md10ns.ncdf", start_frame=0, num_frames=1000, cluster_center_file="/Users/kamranhaider/arg_gist_calcs/03_organizewaters/clustercenterfile.pdb", prefix="test_2")
hsa.print_system_summary()
#hsa.calculate_lonranged_ww_energy([1, 3])
hsa.calculate_site_quantities()
hsa.write_summary()
hsa.write_data()
def entry_point():
main()
if __name__ == '__main__':
entry_point()