# DataSetList

This is an important class in pytraj/cpptraj. It holds data from calculation. Such as
```python
    calc_radgyr("@CA", traj, dslist=dslist)
```

There are few methods and attributres of DataSetList make anlyzing data easier.
```pytraj
      dslist.tolist() # try to cast to Python list
      dslist.to_ndarray() # try to cast to numpy array
      dslist.to_dict() # try to cast to Python Dict
      dslist.groupby(key, mode='legend') # try to group datasets having `key` in `mode`. (example will be shown later)

Note: this notebook is not final version and methods can be changed. :D

## 1. Perform dihedral search

In [1]:
# load sample data from pytraj

from pytraj import load_sample_data

traj = load_sample_data()
print (traj)
print (traj.top)
print (traj.top.get_unique_resname())

TrajReadOnly instance with 1 frames, 34 atoms/frame
           
Topology instance with 34 atoms. ID = 0x2aaac4062810
{'ALA ', 'ALH '}


In [2]:
# let's do dihedral search and hold data in DataSetList object
from pytraj import calculate
dslist = calculate('multidihedral', "", traj) # search all possible dihedrals
print (dslist)

# dlist be haves similiar to Python dict and list
print (dslist.keys()) # get `key` for indexing like a dictionary
print (dslist['phi:2']) # print phi for residue 2 (when using string for indexing, the index starts from 1)

<pytraj.DataSetList.DataSetList object at 0x2aaac6a06b90>
['psi:1', 'phi:2', 'psi:2', 'omega:2', 'phi:3', 'omega:3']
<pytraj.datasets.DataSet_double.DataSet_double object at 0x2b3e2aa2e8a0>


In [3]:
# behave like a python list
print (dslist[0])

# want to get raw data?
print (dslist[0].tolist())

# convert to real Python dict?
print (dslist.to_dict())

<pytraj.datasets.DataSet_double.DataSet_double object at 0x2b3e2aa2e900>
[179.99992142498905]
{'psi:1': [179.99992142498905], 'phi:3': [-179.99977572713476], 'psi:2': [179.99992587395795], 'omega:2': [179.99992243336035], 'omega:3': [179.99992169448015], 'phi:2': [-179.99976823544196]}


In [4]:
# group by keyword?
print (dslist.groupby("phi").to_dict())

{'phi:2': [-179.99976823544196], 'phi:3': [-179.99977572713476]}


In [5]:
# gete ndarray?
print (dslist.to_ndarray())

[[ 179.99992142]
 [-179.99976824]
 [ 179.99992587]
 [ 179.99992243]
 [-179.99977573]
 [ 179.99992169]]


In [6]:
# remove dataset?
# size before
print (dslist.size)
dslist.remove_set(dslist['psi:2'])

# after
print (dslist.size)
print (dslist.keys())

6
5
['psi:1', 'phi:2', 'omega:2', 'phi:3', 'omega:3']


In [7]:
help(dslist)

Help on DataSetList object:

class DataSetList(builtins.object)
 |  Methods defined here:
 |  
 |  __call__(self, /, *args, **kwargs)
 |      Call self as a function.
 |  
 |  __getitem__(...)
 |      return a DataSet instance
 |      Memory view is applied (which mean this new insance is just alias of self[idx])
 |      Should we use a copy instead?
 |  
 |  __iadd__(self, value, /)
 |      Return self+=value.
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __len__(self, /)
 |      Return len(self).
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.
 |  
 |  add_existing_set(...)
 |      DataSetList.add_existing_set(self, DataSet ds)
 |  
 |  add_set(...)
 |      DataSetList.add_set(self, dtype=None, name='', default_name='')
 |  
 |  add_set_aspect(...)
 |      DataSetList.add_set_aspect(self, dtype, name=None, aspect=None)
 |      add new dataset
 |              Paramters
 |          

## 1. Perform DSSP analysis

In [8]:
# let's load DPDP peptide simulation from cpptraj test
# you can find the traj with topology files in either
# 
# $AMBERHOME/AmberTools/test/cpptraj/

# or from cpptraj-dev version in github
# https://github.com/mojyt/cpptraj/tree/master/test/DPDP.*

from pytraj import io
traj = io.load("../tests/data/DPDP.nc", "../tests/data/DPDP.parm7")
print (traj)
print (traj.top.get_unique_atomname())
print (traj.top.get_unique_resname())
print (traj.top.n_residues)

TrajReadOnly instance with 100 frames, 332 atoms/frame
           
{'HA  ', 'OE2 ', 'HA2 ', 'CH3 ', 'HZ3 ', 'HN1 ', 'HZ2 ', 'OE1 ', 'HD11', 'HB3 ', 'HD22', 'C   ', 'HG23', 'HD1 ', 'OH  ', 'HB  ', 'HG11', 'HG1 ', 'HG2 ', 'HE21', 'CE2 ', 'OG  ', 'NE2 ', 'CE1 ', 'CE  ', 'HD2 ', 'HD21', 'CB  ', 'HD3 ', 'HA3 ', 'HH  ', 'HE22', 'HD12', 'HG12', 'HH33', 'HG3 ', 'O   ', 'HE2 ', 'HH31', 'CG1 ', 'CD2 ', 'HH32', 'HD23', 'CZ  ', 'HN2 ', 'NZ  ', 'HG13', 'HE1 ', 'HG  ', 'CD  ', 'HD13', 'HB2 ', 'HZ1 ', 'HE3 ', 'HZ  ', 'CG2 ', 'CA  ', 'N   ', 'CG  ', 'OG1 ', 'CD1 ', 'H   ', 'HG21', 'HG22'}
{'ACE ', 'PHE ', 'GLU ', 'LEU ', 'GLN ', 'SER ', 'LYS ', 'ILE ', 'VAL ', 'PRO ', 'GLY ', 'TYR ', 'NHE ', 'THR '}
22


In [9]:
# let do dssp analysis

dslist = traj.calc_dssp(":2-15", dtype='dataset') # perform analysis for residue 2 to 15 (index starts from 1)
# dtype = 'str' | 'int' | 'dataset'
print (dslist)
print (dslist.keys())

# print average beta content for first 10 frames
print (dslist.groupby("Anti").to_ndarray()[:10])

<pytraj.DataSetList.DataSetList object at 0x2aaac8d82ab0>
['DSSP_00000[None]', 'DSSP_00000[Para]', 'DSSP_00000[Anti]', 'DSSP_00000[3-10]', 'DSSP_00000[Alpha]', 'DSSP_00000[Pi]', 'DSSP_00000[Turn]', 'DSSP_00000[Bend]', 'VAL:2', 'PHE:3', 'ILE:4', 'THR:5', 'SER:6', 'PRO:7', 'GLY:8', 'LYS:9', 'THR:10', 'TYR:11', 'THR:12', 'GLU:13', 'VAL:14', 'PRO:15']
[ 0.5714286   0.5714286   0.5714286   0.71428573  0.5714286   0.42857143
  0.5714286   0.5714286   0.5714286   0.5714286 ]


In [10]:
# what's about specific residue?
arr = (dslist.groupby("PHE").to_ndarray())

In [11]:
# we need to convert to meaningful thing 
from pytraj.common_actions import to_string_ss

print ("PHE")
print (to_string_ss(arr))

print ("")
print ("SER")
print (to_string_ss(dslist.groupby("SER").to_ndarray()))

PHE
['B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', '0', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B']

SER
['S', 'S', '0', 'B', 'T', 'G', '0', 'S', 'S', 'S', 'S', 'S', '0', 'B', 'B', '0', 'S', 'B', 'S', 'S', '0', 'B', '0', 'B', 'B', 'B', '0', 'B', 'B', 'B', 'S', 'S', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', '0', 'B', 'B', 'S', 'B', 'B', 'B', '0', 'B', '0', '0', '0', '0', '0', '0', '0', 'B', 'T', 'G', 'G', 'T', 'G', 'T', 'S', '0', 'G', 'T', '0', '0', 'T', 'G', 'G', 'T', '0', '0', 'S', '0', 'B', 'S', 'G', 'G', 'T', 'T', '0', 'B', '0', '0', 'B', '0', 'T', 'T', 'B', 'B', 'G', 'T', 'B', 'B',

In [12]:
# in case you're not patient to use "to_string_ss" method, you can follow below

ss_dict = traj.calc_dssp(dtype='ndarray')
print (ss_dict)

[['0' 'B' 'B' ..., 'B' '0' '0']
 ['0' 'B' 'B' ..., 'B' 'B' '0']
 ['0' 'B' 'B' ..., 'B' '0' '0']
 ..., 
 ['0' 'B' 'B' ..., 'B' '0' '0']
 ['0' 'B' 'B' ..., 'B' '0' '0']
 ['0' 'B' 'B' ..., 'B' '0' '0']]
