# Calculate Solvent Entropy for a capped amino acid (Solvent)

## Load Data

1. Load data into MDAnalysis Universe

In [1]:
import MDAnalysis as mda
# set the working dir to the root of repo inorder to use these path
tprfile = "data/molecules.prmtop"
trrfile = "data/data.trr"
u = mda.Universe(tprfile, trrfile)

In [2]:
import dask
import dask.multiprocessing
dask.config.set(scheduler='multiprocessing')
from dask.distributed import Client, progress
client = Client()
client

2022-07-27 07:35:14,977 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-lyz47d0p', purging


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 16,Total memory: 7.70 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:34737,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 16
Started: Just now,Total memory: 7.70 GiB

0,1
Comm: tcp://127.0.0.1:45771,Total threads: 4
Dashboard: http://127.0.0.1:42899/status,Memory: 1.92 GiB
Nanny: tcp://127.0.0.1:41617,
Local directory: /tmp/dask-worker-space/worker-2ddr2vpy,Local directory: /tmp/dask-worker-space/worker-2ddr2vpy

0,1
Comm: tcp://127.0.0.1:38855,Total threads: 4
Dashboard: http://127.0.0.1:40223/status,Memory: 1.92 GiB
Nanny: tcp://127.0.0.1:41809,
Local directory: /tmp/dask-worker-space/worker-v_zyqpab,Local directory: /tmp/dask-worker-space/worker-v_zyqpab

0,1
Comm: tcp://127.0.0.1:46257,Total threads: 4
Dashboard: http://127.0.0.1:35559/status,Memory: 1.92 GiB
Nanny: tcp://127.0.0.1:43237,
Local directory: /tmp/dask-worker-space/worker-yor9ctjy,Local directory: /tmp/dask-worker-space/worker-yor9ctjy

0,1
Comm: tcp://127.0.0.1:41335,Total threads: 4
Dashboard: http://127.0.0.1:46301/status,Memory: 1.92 GiB
Nanny: tcp://127.0.0.1:35259,
Local directory: /tmp/dask-worker-space/worker-sl9eby0f,Local directory: /tmp/dask-worker-space/worker-sl9eby0f


In [3]:
import sys
import logging
import gc

from collections import Counter
from datetime import datetime

from joblib import delayed

from CodeEntropy.poseidon.extractData.readFiles import populateTopology, getCoordsForces, getDistArray
# # Energy is not needed
# from CodeEntropy.poseidon.extractData.readFiles import populateEnergy, UAEnergyGroup
from CodeEntropy.poseidon.extractData.HBRAD import distCutoffNc, UALevelRAD, HBCalc
from CodeEntropy.poseidon.extractData.forceTorques import calculateFTMatrix
from CodeEntropy.poseidon.extractData.dihedrals import calculateDihedrals
from CodeEntropy.poseidon.extractData.nearestNonlike2 import getShellAssignment, moleculePositionRankingRAD 
from CodeEntropy.poseidon.extractData.outputFiles import moleculeObjectPopulation
from CodeEntropy.poseidon.extractData.mainClass import clearClass

from CodeEntropy.poseidon.analysis.populateClasses import classPopulation
from CodeEntropy.poseidon.analysis.EECalculation import processEE
from CodeEntropy.poseidon.analysis.helper import memoryInfo, weightingPopulation

from CodeEntropy.FunctionCollection.PoseidonHelper import frame_iteration

In [4]:
# def load(container, start=0, end=-1, 
#     step=1, pureAtomNum=1, cutShell=None, 
#     excludedResnames=None,
#     water='WAT', verbose=False, thread=4):
#     """
#     Warning!!!

#         This requires a lot of memory to run with very little performance gain.

#     This is a initialization function to collect information from a MDanalysis universe into a data container for analysis using POSEIDON.

#     Parameters
#     ----------
#     container : MDAnalyse.Universe
#         A Universe object will all topology, dihedrals,coordinates and force information Check ``Example/create_new_universe.py`` on how to create a universe from existing data.
#     start : int or None, Optional, default: 0
#         Frame id to start analysis.
#     end : int or None, Optional, default: -1
#         Frame id to end analysis.
#     step : int, Optional, default: 1
#         Steps between frame.
#     pureAtomNum : int, Optional, default: 1
#         Reference molecule resid for pure liquid.
#     cutShell : float or None, Optional, default: 1
#         Explicit cut off shell. Default to None which uses the relative angular distance (RAD) algorithm. See Jonathan Higham and Richard H. Henchman , "Locally adaptive method to define coordination shell", J. Chem. Phys. 145, 084108 (2016)
#     excludedResnames : list of str or None, Optional, default: None
#         List of resnames to exclude from nearest non alike analysis.
#     water : list of str, Optional, default: "WAT"
#         Resname for water molecules.
#     verbose : bool, Optional, default: False
#         Print out progress of each step.
#     thread : int, Optional, default: 4
#         How many multiprocess thread to spawn

#     """
container=u
start=0
end=10
step=1
pureAtomNum=1
cutShell=None
excludedResnames=None,
water='WAT'
verbose=True
thread=4
startTime = datetime.now()
print(startTime)
verbosePrint = print if verbose else lambda *a, **k: None

waterTuple = ('SOL', 'WAT', 'HOH', 'TIP3') #needed for pdb as top file 
if water != 'WAT':
    waterTuple = (water)

iterations = 0

all_data = []

populateTopology(container, all_data, waterTuple)
verbosePrint('TOPOLOGY')
verbosePrint(datetime.now() - startTime)
sys.stdout.flush()


resids = Counter([(i.resname) for i in all_data])
verbosePrint(resids.keys())
if len(resids.keys()) == 1:
    verbosePrint('Pure system with reference ID: %s' % (pureAtomNum))

if excludedResnames != None:
    verbosePrint('EXCLUDED RESNAMES: %s' % (excludedResnames))


dimensions = None
allMoleculeList = []

#fix end frame
if end == -1:
    end = len(container.trajectory)
sys.setrecursionlimit(3000000)
job_list = []
for frame in range(int(start), int(end), int(step)):
    job_list.append(dask.delayed(frame_iteration(
        container, 
        all_data, 
        dimensions, 
        startTime, 
        verbosePrint, 
        waterTuple, 
        cutShell, 
        excludedResnames,
        frame)))
data = dask.compute(*job_list)

print("Done Threading Work")
#print(data)
# data = []
# for frame in range(int(start), int(end), int(step)):
#     data.append(fi_partial(frame))

print(len(data))
for item in data:
    allMoleculeList = moleculeObjectPopulation(item[0], 
            allMoleculeList, item[1], item[2])
# #writing file here 
# with gzip.GzipFile('moleculeListAll.obj', 'wb') as pickleFile:
#     pickle.dump((allMoleculeList), pickleFile, protocol=2)
#     pickleFile.close()

print(datetime.now() - startTime)




2022-07-27 07:35:42.869084
TOPOLOGY
0:00:00.801938
dict_keys(['ACE', 'ARG', 'NME', 'WAT'])
EXCLUDED RESNAMES: None
frame = 0
< Timestep 0 with unit cell dimensions [30.67833  30.40052  29.490604 90.       90.       90.      ] >
COORDS-FORCES
0:00:00.852477
DIH
0:00:00.859203
NEAREST ARRAYS
0:00:01.184122
RAD
0:00:01.840073
HB
0:00:02.094277
NEAREST NON-LIKE ASSIGNMENT
0:00:02.096734
PROX
0:00:02.098553
FTMATRIX
0:00:02.535373
ORIENTS
0:00:02.539700
frame = 1
< Timestep 1 with unit cell dimensions [30.682182 30.404337 29.494312 90.       90.       90.      ] >
COORDS-FORCES
0:00:02.592686
DIH
0:00:02.599517
NEAREST ARRAYS
0:00:02.887840
RAD
0:00:03.648667
HB
0:00:03.896246
NEAREST NON-LIKE ASSIGNMENT
0:00:03.899294
PROX
0:00:03.901879
FTMATRIX
0:00:04.350654
ORIENTS
0:00:04.354834
frame = 2
< Timestep 2 with unit cell dimensions [30.713255 30.435127 29.524178 90.       90.       90.      ] >
COORDS-FORCES
0:00:04.396243
DIH
0:00:04.402641
NEAREST ARRAYS
0:00:04.695812
RAD
0:00:05.377131

  ([[0 H1 1.008 0.11230000108480453 1 ACE [1], 1 CH3 ... ype=float32)],)
Consider scattering large objects ahead of time
with client.scatter to reduce scheduler burden and 
keep data on workers

    future = client.submit(func, big_data)    # bad

    big_future = client.scatter(big_data)     # good
    future = client.submit(func, big_future)  # good


Done Threading Work
10
0:00:33.516714


In [11]:
job_list[0]

Delayed('tuple-ffd34808-f68c-40c6-ad98-900c62642712')

2. Load data into POSEIDON object

In [5]:
from CodeEntropy.ClassCollection.PoseidonClass import Poseidon
startTime = datetime.now()
poseidon_object = Poseidon(container=u, start=0, end=10)
print(datetime.now() - startTime)

2022-07-26 19:28:24.507828
frame = 0
< Timestep 0 with unit cell dimensions [30.67833  30.40052  29.490604 90.       90.       90.      ] >
0:00:02.192713
frame = 1
< Timestep 1 with unit cell dimensions [30.682182 30.404337 29.494312 90.       90.       90.      ] >
0:00:03.918281
frame = 2
< Timestep 2 with unit cell dimensions [30.713255 30.435127 29.524178 90.       90.       90.      ] >
0:00:05.502399
frame = 3
< Timestep 3 with unit cell dimensions [30.598316 30.321232 29.41369  90.       90.       90.      ] >
0:00:06.949409
frame = 4
< Timestep 4 with unit cell dimensions [30.618622 30.341354 29.43321  90.       90.       90.      ] >
0:00:08.473875
frame = 5
< Timestep 5 with unit cell dimensions [30.619095 30.341824 29.433666 90.       90.       90.      ] >
0:00:10.128160
frame = 6
< Timestep 6 with unit cell dimensions [30.738136 30.459784 29.5481   90.       90.       90.      ] >
0:00:11.705685
frame = 7
< Timestep 7 with unit cell dimensions [30.694881 30.416918 29.5065

## Calculate Entropy

### Whole Molecule level

In [3]:
result_wm = poseidon_object.run_analysis(level_list = ['moleculeLevel'], verbose=False, forceUnits="Kcal") # this is because the forces value supplied in this trajectory is in Kcal
print(result_wm)

2022-06-30 14:03:04.107787

solvent: ['WAT', 'wat', 'SOL', 'H2O', 'h2o', 'WAT_O', 'TIP3']

water: ['WAT', 'wat', 'SOL', 'H2O', 'h2o', 'WAT_O', 'TIP3']

1. Populate Dictionaries

memory use: 0.231 GB
0:00:00.001139
memory use: 0.231 GB
0:00:01.374287
0:00:01.374803
memory use: 0.231 GB

Total number of frames: 20.0
Number of atoms in each frame: 916
Number of variables in each list: 19

2. Process Dictionaries
['moleculeLevel']
---level: moleculeLevel


0:00:01.557718
{'moleculeLevel': {'solventData':    nearest assigned shell_num   variable      value count
0      ACE      WAT       1.0  Sor_test2   4.489916   155
1      ACE      WAT       1.0     Strans  47.114194   155
2      ACE      WAT       1.0       Srot  21.518819   155
3      ACE      WAT       1.0      count       7.75   155
4      ARG      WAT       1.0  Sor_test2   4.389286   358
5      ARG      WAT       1.0     Strans  46.659296   358
6      ARG      WAT       1.0       Srot  21.813924   358
7      ARG      WAT       1.0 

  w = w ** 0.5


### Residue Level

In [4]:
result_res = poseidon_object.run_analysis(level_list = ['residLevel_resname'], verbose=False, forceUnits="Kcal") # this is because the forces value supplied in this trajectory is in Kcal
print(result_res)

2022-06-30 14:03:08.891780

solvent: ['WAT', 'wat', 'SOL', 'H2O', 'h2o', 'WAT_O', 'TIP3']

water: ['WAT', 'wat', 'SOL', 'H2O', 'h2o', 'WAT_O', 'TIP3']

1. Populate Dictionaries

memory use: 0.232 GB
0:00:00.001426
memory use: 0.232 GB
0:00:01.297753
0:00:01.298211
memory use: 0.232 GB

Total number of frames: 20.0
Number of atoms in each frame: 916
Number of variables in each list: 19

2. Process Dictionaries
['residLevel_resname']
---level: residLevel_resname


0:00:01.458315
{'residLevel_resname': {'solventData':    nearest assigned shell_num   variable      value count
0    ACE_1      WAT       1.0  Sor_test2   4.489916   155
1    ACE_1      WAT       1.0     Strans  47.114194   155
2    ACE_1      WAT       1.0       Srot  21.518819   155
3    ACE_1      WAT       1.0      count       7.75   155
4    ARG_2      WAT       1.0  Sor_test2   4.389286   358
5    ARG_2      WAT       1.0     Strans  46.659296   358
6    ARG_2      WAT       1.0       Srot  21.813924   358
7    ARG_2     

  w = w ** 0.5


### United Atom Level

In [5]:
result_ua = poseidon_object.run_analysis(level_list = ['atomLevel'], verbose=False, forceUnits="Kcal") # this is because the forces value supplied in this trajectory is in Kcal
print(result_ua)

2022-06-30 14:03:13.104061

solvent: ['WAT', 'wat', 'SOL', 'H2O', 'h2o', 'WAT_O', 'TIP3']

water: ['WAT', 'wat', 'SOL', 'H2O', 'h2o', 'WAT_O', 'TIP3']

1. Populate Dictionaries

memory use: 0.232 GB
0:00:00.002788
memory use: 0.233 GB
0:00:01.388482
0:00:01.388816
memory use: 0.233 GB

Total number of frames: 20.0
Number of atoms in each frame: 916
Number of variables in each list: 19

2. Process Dictionaries
['atomLevel']
---level: atomLevel


0:00:01.590813
{'atomLevel': {'solventData':    nearest assigned shell_num   variable      value count
0    ACE_C    WAT_O       1.0  Sor_test2   3.859492    90
1    ACE_C    WAT_O       1.0     Strans   46.89642    90
2    ACE_C    WAT_O       1.0       Srot  21.477797    90
3    ACE_C    WAT_O       1.0      count        4.5    90
4    ACE_O    WAT_O       1.0  Sor_test2   1.383652    65
5    ACE_O    WAT_O       1.0     Strans  47.442017    65
6    ACE_O    WAT_O       1.0       Srot  21.607751    65
7    ACE_O    WAT_O       1.0      count  

  w = w ** 0.5


### Solute Contact

In [6]:
result_solcon = poseidon_object.run_analysis(level_list = ['soluteContacts'], verbose=False, forceUnits="Kcal") # this is because the forces value supplied in this trajectory is in Kcal
print(result_solcon)

2022-06-30 14:03:19.556689

solvent: ['WAT', 'wat', 'SOL', 'H2O', 'h2o', 'WAT_O', 'TIP3']

water: ['WAT', 'wat', 'SOL', 'H2O', 'h2o', 'WAT_O', 'TIP3']

1. Populate Dictionaries

memory use: 0.233 GB
0:00:00.001247
memory use: 0.233 GB
0:00:00.091347
0:00:00.091990
memory use: 0.233 GB

Total number of frames: 20.0
Number of atoms in each frame: 916
Number of variables in each list: 19

2. Process Dictionaries
['soluteContacts']
---level: soluteContacts


0:00:00.190073
{'soluteContacts': {'solventData':         nearest assigned shell_num   variable      value count
0   ACE_1_ACE_1      WAT       1.0  Sor_test2   0.747622    19
1   ACE_1_ACE_1      WAT       1.0     Strans  54.849501    19
2   ACE_1_ACE_1      WAT       1.0       Srot   21.83436    19
3   ACE_1_ACE_1      WAT       1.0      count       0.95    19
4   ACE_1_ARG_2      WAT       1.0  Sor_test2   0.129079    26
5   ACE_1_ARG_2      WAT       1.0     Strans  53.464565    26
6   ACE_1_ARG_2      WAT       1.0       Srot  21.