# DataSetList

This is an important class in pytraj/cpptraj. It holds data from calculation. Such as
```python
    calc_radgyr(traj, "@CA", dslist=dslist)
```

There are few methods and attributres of DataSetList make anlyzing data easier.
```pytraj
      dslist.tolist() # try to cast to Python list
      dslist.to_ndarray() # try to cast to numpy array
      dslist.to_dict() # try to cast to Python Dict
      dslist.groupby(key, mode='legend') # try to group datasets having `key` in `mode`. (example will be shown later)

Note: this notebook is not final version and methods can be changed. :D

## 1. Perform dihedral search

In [1]:
# load sample data from pytraj

from pytraj import load_sample_data

traj = load_sample_data()
print (traj)
print (traj.top)
print (traj.top.residue_names)

<pytraj.TrajectoryIterator with 1 frames: <Topology with 1 mols, 3 residues, 34 atoms, 33 bonds, non-PBC>>
           
<Topology with 1 mols, 3 residues, 34 atoms, 33 bonds, non-PBC>
{'ALA ', 'ALH '}


In [2]:
# let's do dihedral search and hold data in DataSetList object
from pytraj import calculate
# from pytraj.misc import show_code as show

#show(calculate)
dslist = calculate('multidihedral', traj, "", top=traj.top) # search all possible dihedrals
print (dslist)

# dlist be haves similiar to Python dict and list
print (dslist.keys()) # get `key` for indexing like a dictionary
print (dslist['phi:2']) # print phi for residue 2 (when using string for indexing, the index starts from 1)

<pytraj.DataSetList.DataSetList object at 0x2aaac7323d10>
['psi:1', 'phi:2', 'psi:2', 'omega:2', 'phi:3', 'omega:3']
<pytraj.datasets.DataSet_double.DataSet_double object at 0x2b1d945c48a0>


In [3]:
# behave like a python list
print (dslist[0])

# want to get raw data?
print (dslist[0].tolist())

# convert to real Python dict?
print (dslist.to_dict())

<pytraj.datasets.DataSet_double.DataSet_double object at 0x2b1d945c4900>
[179.99992142498905]
{'phi:3': [-179.99977572713476], 'psi:2': [179.99992587395795], 'phi:2': [-179.99976823544196], 'omega:3': [179.99992169448015], 'psi:1': [179.99992142498905], 'omega:2': [179.99992243336035]}


In [4]:
# group by keyword?
print (dslist.groupby("phi").to_dict())

{'phi:2': [-179.99976823544196], 'phi:3': [-179.99977572713476]}


In [5]:
# gete ndarray?
print (dslist.to_ndarray())

[[ 179.99992142]
 [-179.99976824]
 [ 179.99992587]
 [ 179.99992243]
 [-179.99977573]
 [ 179.99992169]]


In [6]:
# remove dataset?
# size before
print (dslist.size)
dslist.remove_set(dslist['psi:2'])

# after
print (dslist.size)
print (dslist.keys())

6
5
['psi:1', 'phi:2', 'omega:2', 'phi:3', 'omega:3']


## 2. Perform DSSP analysis

In [7]:
# let's load DPDP peptide simulation from cpptraj test
# you can find the traj with topology files in either
# 
# $AMBERHOME/AmberTools/test/cpptraj/

# or from cpptraj-dev version in github
# https://github.com/mojyt/cpptraj/tree/master/test/DPDP.*

from pytraj import io
traj = io.load("../tests/data/DPDP.nc", "../tests/data/DPDP.parm7")
print (traj)
print (traj.top.atom_names)
print (traj.top.residue_names)
print (traj.top.n_residues)

<pytraj.Trajectory with 100 frames: <Topology with 1 mols, 22 residues, 332 atoms, 335 bonds, non-PBC>>
           
{'O   ', 'HE2 ', 'CH3 ', 'HG22', 'HZ3 ', 'N   ', 'CE2 ', 'HG12', 'HB3 ', 'HG3 ', 'HN1 ', 'HD21', 'HG21', 'HG1 ', 'HZ1 ', 'OE2 ', 'HD12', 'HA2 ', 'HD23', 'HE1 ', 'HG  ', 'HD3 ', 'HE21', 'HD11', 'CA  ', 'CZ  ', 'HA  ', 'HG23', 'CD  ', 'OE1 ', 'NE2 ', 'HH33', 'HE22', 'OG1 ', 'CE  ', 'HH  ', 'HD22', 'CG2 ', 'HD2 ', 'HH31', 'C   ', 'HB  ', 'HZ  ', 'CD1 ', 'OG  ', 'HD1 ', 'HG2 ', 'HG13', 'HA3 ', 'HZ2 ', 'HN2 ', 'NZ  ', 'CG1 ', 'OH  ', 'CB  ', 'CG  ', 'HH32', 'HE3 ', 'CE1 ', 'HB2 ', 'HD13', 'H   ', 'HG11', 'CD2 '}
{'GLU ', 'GLY ', 'VAL ', 'TYR ', 'GLN ', 'NHE ', 'THR ', 'PRO ', 'ILE ', 'LYS ', 'SER ', 'ACE ', 'LEU ', 'PHE '}
22


In [8]:
# let do dssp analysis

dslist = traj.calc_dssp(":2-15", dtype='dataset') # perform analysis for residue 2 to 15 (index starts from 1)
# dtype = 'str' | 'int' | 'dataset'
print (dslist)
print (dslist.keys())

# print average beta content for first 10 frames
print (dslist.groupby("Anti").to_ndarray()[:10])

<pytraj.DataSetList.DataSetList object at 0x2aaac7323d50>
['DSSP_00000[None]', 'DSSP_00000[Para]', 'DSSP_00000[Anti]', 'DSSP_00000[3-10]', 'DSSP_00000[Alpha]', 'DSSP_00000[Pi]', 'DSSP_00000[Turn]', 'DSSP_00000[Bend]', 'VAL:2', 'PHE:3', 'ILE:4', 'THR:5', 'SER:6', 'PRO:7', 'GLY:8', 'LYS:9', 'THR:10', 'TYR:11', 'THR:12', 'GLU:13', 'VAL:14', 'PRO:15']
[ 0.5714286   0.5714286   0.5714286   0.71428573  0.5714286   0.42857143
  0.5714286   0.5714286   0.5714286   0.5714286 ]


In [9]:
# what's about specific residue?
arr = (dslist.groupby("PHE").to_ndarray())

In [10]:
# we need to convert to meaningful thing 
from pytraj.common_actions import to_string_ss

print ("PHE")
print (to_string_ss(arr))

print ("")
print ("SER")
print (to_string_ss(dslist.groupby("SER").to_ndarray()))

PHE
['B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', '0', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B']

SER
['S', 'S', '0', 'B', 'T', 'G', '0', 'S', 'S', 'S', 'S', 'S', '0', 'B', 'B', '0', 'S', 'B', 'S', 'S', '0', 'B', '0', 'B', 'B', 'B', '0', 'B', 'B', 'B', 'S', 'S', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', '0', 'B', 'B', 'S', 'B', 'B', 'B', '0', 'B', '0', '0', '0', '0', '0', '0', '0', 'B', 'T', 'G', 'G', 'T', 'G', 'T', 'S', '0', 'G', 'T', '0', '0', 'T', 'G', 'G', 'T', '0', '0', 'S', '0', 'B', 'S', 'G', 'G', 'T', 'T', '0', 'B', '0', '0', 'B', '0', 'T', 'T', 'B', 'B', 'G', 'T', 'B', 'B',

In [11]:
# in case you're not patient to use "to_string_ss" method, you can follow below

ss_dict = traj.calc_dssp(dtype='ndarray')
print (ss_dict)

[['0' 'B' 'B' ..., 'B' '0' '0']
 ['0' 'B' 'B' ..., 'B' 'B' '0']
 ['0' 'B' 'B' ..., 'B' '0' '0']
 ..., 
 ['0' 'B' 'B' ..., 'B' '0' '0']
 ['0' 'B' 'B' ..., 'B' '0' '0']
 ['0' 'B' 'B' ..., 'B' '0' '0']]


## 3. Perform hbonds analysis

In [12]:
import pytraj.io as io

traj = io.load("../tests/data/tz2.ortho.nc", "../tests/data/tz2.ortho.parm7")
print (traj)

<pytraj.Trajectory with 10 frames: <Topology with 1692 mols, 1704 residues, 5293 atoms, 5300 bonds, PBC with box type = ortho>>
           


In [13]:
# need to do autoimage first
traj.autoimage()

In [14]:
# search all hbonds
dslist = traj.search_hbonds()

In [15]:
dslist.keys()

['HB_00000[UU]',
 'LYS_12@O-SER_1@N-H3',
 'LYS_8@O-GLU_5@N-H',
 'GLU_5@O-LYS_8@N-H',
 'SER_1@O-LYS_12@N-H',
 'GLU_5@OE2-LYS_12@NZ-HZ1',
 'GLU_5@OE1-LYS_12@NZ-HZ1',
 'THR_10@O-THR_3@N-H',
 'TRP_2@O-THR_3@OG1-HG1',
 'THR_3@O-THR_10@N-H']

In [16]:
# total hbonds for each frames
print (dslist[0].tolist())
# equal to dslist['HB_00000[UU]']

[5, 5, 5, 4, 4, 5, 6, 5, 5, 4]


In [17]:
# do basic math
print (dslist.sum())

[48  7  8  3 10  9  2  2  3  4]


In [18]:
# do basic math: exclude first dataset (HB_0000[UU])
dslist[1:].sum()

array([ 7,  8,  3, 10,  9,  2,  2,  3,  4])

In [19]:
dslist[1:].mean()

array([ 0.7,  0.8,  0.3,  1. ,  0.9,  0.2,  0.2,  0.3,  0.4])

In [20]:
# only care about THR residue?
dslist.groupby("THR").mean()

array([ 0.2,  0.3,  0.4])