From 10fa12e9c94892456d25539fb5d826ef81e57fae Mon Sep 17 00:00:00 2001
From: rasbt <mail@sebastianraschka.com>
Date: Thu, 11 May 2017 14:43:58 -0400
Subject: [PATCH] v0.2.1

---
 biopandas/__init__.py                         |   2 +-
 docs/sources/CHANGELOG.md                     |   2 +-
 docs/sources/CONTRIBUTING.md                  |   6 +-
 .../sources/api_subpackages/biopandas.mol2.md |   2 +-
 docs/sources/api_subpackages/biopandas.pdb.md |   2 +-
 .../api_subpackages/biopandas.testutils.md    |   2 +-
 ...king_with_MOL2_Structures_in_DataFrames.md |  30 ++-
 ...rking_with_PDB_Structures_in_DataFrames.md | 189 +++++++++++-------
 8 files changed, 155 insertions(+), 80 deletions(-)

diff --git a/biopandas/__init__.py b/biopandas/__init__.py
index ec93cfa..2431007 100644
--- a/biopandas/__init__.py
+++ b/biopandas/__init__.py
@@ -4,5 +4,5 @@
 # Project Website: http://rasbt.github.io/biopandas/
 # Code Repository: https://github.com/rasbt/biopandas
 
-__version__ = '0.2.1.dev0'
+__version__ = '0.2.1'
 __author__ = "Sebastian Raschka <mail@sebastianraschka.com>"
diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
index f91b22c..16ffb2a 100755
--- a/docs/sources/CHANGELOG.md
+++ b/docs/sources/CHANGELOG.md
@@ -3,7 +3,7 @@
 The CHANGELOG for the current development version is available at
 [https://github.com/rasbt/biopandas/blob/master/docs/sources/CHANGELOG.md](https://github.com/rasbt/biopandas/blob/master/docs/sources/CHANGELOG.md).
 
-### 0.2.1dev
+### 0.2.1  (2017-05-11)
 
 ##### Downloads
 
diff --git a/docs/sources/CONTRIBUTING.md b/docs/sources/CONTRIBUTING.md
index 070ab57..9e86edc 100755
--- a/docs/sources/CONTRIBUTING.md
+++ b/docs/sources/CONTRIBUTING.md
@@ -262,7 +262,7 @@ For example,
 Please note that documents containing code examples are generated from IPython Notebook files and converted to markdown via
 
 ```bash
-~/github/biopandas/docs/examples$ nbconvert --to markdown <file.ipynb>
+~/github/biopandas/docs/sources/tutorials$ nbconvert --to markdown <file.ipynb>
 ```
 
 The markdown file should be placed into the documentation directory at `biopandas/docs/sources` to build the documentation via  MkDocs.
@@ -349,7 +349,7 @@ $ pip uninstall biopandas
 Consider deploying the package to the PyPI test server first. The setup instructions can be found [here](https://wiki.python.org/moin/TestPyPI).
 
 ```bash
-$ python setup.py sdist upload -r https://testpypi.python.org/pypi
+$ python setup.py sdist bdist_wheel upload -r https://testpypi.python.org/pypi
 ```
 
 Test if it can be installed from there by executing
@@ -367,7 +367,7 @@ $ pip uninstall biopandas
 After this dry-run succeeded, repeat this process using the "real" PyPI:
 
 ```bash
-$ python setup.py sdist upload
+$ python setup.py sdist bdist_wheel upload
 ```
 
 #### 4. Removing the virtual environment
diff --git a/docs/sources/api_subpackages/biopandas.mol2.md b/docs/sources/api_subpackages/biopandas.mol2.md
index a65a4df..69ddfc4 100644
--- a/docs/sources/api_subpackages/biopandas.mol2.md
+++ b/docs/sources/api_subpackages/biopandas.mol2.md
@@ -1,4 +1,4 @@
-biopandas version: 0.2.1.dev0
+biopandas version: 0.2.1
 ## PandasMol2
 
 *PandasMol2()*
diff --git a/docs/sources/api_subpackages/biopandas.pdb.md b/docs/sources/api_subpackages/biopandas.pdb.md
index 3671c7f..bde02f1 100644
--- a/docs/sources/api_subpackages/biopandas.pdb.md
+++ b/docs/sources/api_subpackages/biopandas.pdb.md
@@ -1,4 +1,4 @@
-biopandas version: 0.2.1.dev0
+biopandas version: 0.2.1
 ## PandasPdb
 
 *PandasPdb()*
diff --git a/docs/sources/api_subpackages/biopandas.testutils.md b/docs/sources/api_subpackages/biopandas.testutils.md
index 12229fb..8555212 100644
--- a/docs/sources/api_subpackages/biopandas.testutils.md
+++ b/docs/sources/api_subpackages/biopandas.testutils.md
@@ -1 +1 @@
-biopandas version: 0.2.1.dev0
+biopandas version: 0.2.1
diff --git a/docs/sources/tutorials/Working_with_MOL2_Structures_in_DataFrames.md b/docs/sources/tutorials/Working_with_MOL2_Structures_in_DataFrames.md
index b54364c..c57cf03 100644
--- a/docs/sources/tutorials/Working_with_MOL2_Structures_in_DataFrames.md
+++ b/docs/sources/tutorials/Working_with_MOL2_Structures_in_DataFrames.md
@@ -1,3 +1,31 @@
+
+BioPandas
+
+Author: Sebastian Raschka <mail@sebastianraschka.com>  
+License: BSD 3 clause  
+Project Website: http://rasbt.github.io/biopandas/  
+Code Repository: https://github.com/rasbt/biopandas  
+
+
+```python
+%load_ext watermark
+%watermark -d -u -p pandas,biopandas
+```
+
+    last updated: 2017-04-02 
+    
+    pandas 0.19.2
+    biopandas 0.2.0.dev0
+
+
+
+```python
+from biopandas.mol2 import PandasMol2
+import pandas as pd
+pd.set_option('display.width', 600)
+pd.set_option('display.max_columns', 8)
+```
+
 # Working with MOL2 Structures in DataFrames
 
 The Tripos MOL2 format is a common format for working with small molecules. In this tutorial, we will go over some examples that illustrate how we can use Biopandas' MOL2 DataFrames to analyze molecules conveniently.
@@ -569,7 +597,7 @@ A list of all the allowed atom types that can be found in Tripos MOL2 files is p
     S.3        sulfur sp3
     S.2        sulfur sp2
     S.O        sulfoxide sulfur
-    S.O2       sulfone sulfur
+    S.O2/S.o2  sulfone sulfur
     P.3        phosphorous sp3
     F          fluorine
     H          hydrogen
diff --git a/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.md b/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.md
index 92b7b97..f1bec39 100644
--- a/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.md
+++ b/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.md
@@ -1,3 +1,31 @@
+
+BioPandas
+
+Author: Sebastian Raschka <mail@sebastianraschka.com>  
+License: BSD 3 clause  
+Project Website: http://rasbt.github.io/biopandas/  
+Code Repository: https://github.com/rasbt/biopandas  
+
+
+```python
+%load_ext watermark
+%watermark -d -u -p pandas,biopandas
+```
+
+    last updated: 2017-04-12 
+    
+    pandas 0.19.2
+    biopandas 0.2.1.dev0
+
+
+
+```python
+from biopandas.pdb import PandasPdb
+import pandas as pd
+pd.set_option('display.width', 600)
+pd.set_option('display.max_columns', 8)
+```
+
 # Working with PDB Structures in DataFrames
 
 ## Loading PDB Files
@@ -29,7 +57,7 @@ ppdb.read_pdb('./data/3eiy.pdb')
 
 
 
-    <biopandas.pdb.pandas_pdb.PandasPdb at 0x106795898>
+    <biopandas.pdb.pandas_pdb.PandasPdb at 0x10462bf28>
 
 
 
@@ -45,7 +73,7 @@ ppdb.read_pdb('./data/3eiy.pdb.gz')
 
 
 
-    <biopandas.pdb.pandas_pdb.PandasPdb at 0x106795898>
+    <biopandas.pdb.pandas_pdb.PandasPdb at 0x10462bf28>
 
 
 
@@ -207,7 +235,7 @@ ppdb.df.keys()
 
 
 
-    dict_keys(['HETATM', 'ANISOU', 'ATOM', 'OTHERS'])
+    dict_keys(['ATOM', 'HETATM', 'ANISOU', 'OTHERS'])
 
 
 
@@ -1142,81 +1170,100 @@ Residues in the `residue_name` field can be converted into 1-letter amino acid c
 
 ```python
 from biopandas.pdb import PandasPdb
-ppdb = PandasPdb().read_pdb('./data/3eiy.pdb.gz')
-ppdb.amino3to1()
-# By default, `amino3to1` returns a pandas Series object,
-# and to convert it into a Python list, you can wrap it in list
-# constructor, e.g.,
-# `list(ppdb.amino3to1())`
+ppdb = PandasPdb().fetch_pdb('5mtn')
+sequence = ppdb.amino3to1()
+sequence.tail()
 ```
 
 
 
 
-    0       S
-    6       F
-    17      S
-    23      N
-    31      V
-    38      P
-    45      A
-    50      G
-    54      K
-    63      D
-    71      L
-    79      P
-    86      Q
-    95      D
-    103     F
-    114     N
-    122     V
-    129     I
-    137     I
-    145     E
-    154     I
-    162     P
-    169     A
-    174     Q
-    183     S
-    189     E
-    198     P
-    205     V
-    212     K
-    221     Y
-           ..
-    1100    E
-    1109    K
-    1114    G
-    1118    K
-    1127    W
-    1141    V
-    1148    K
-    1153    V
-    1160    E
-    1169    G
-    1173    W
-    1187    D
-    1195    G
-    1199    I
-    1207    D
-    1215    A
-    1220    A
-    1225    H
-    1235    K
-    1244    E
-    1253    I
-    1261    T
-    1268    D
-    1276    G
-    1280    V
-    1287    A
-    1292    N
-    1300    F
-    1311    K
-    1320    K
-    Name: residue_name, dtype: object
+<div>
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>chain_id</th>
+      <th>residue_name</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>1378</th>
+      <td>B</td>
+      <td>I</td>
+    </tr>
+    <tr>
+      <th>1386</th>
+      <td>B</td>
+      <td>N</td>
+    </tr>
+    <tr>
+      <th>1394</th>
+      <td>B</td>
+      <td>Y</td>
+    </tr>
+    <tr>
+      <th>1406</th>
+      <td>B</td>
+      <td>R</td>
+    </tr>
+    <tr>
+      <th>1417</th>
+      <td>B</td>
+      <td>T</td>
+    </tr>
+  </tbody>
+</table>
+</div>
+
+
+
+As shown above, the `amino3to1` method returns a `DataFrame` containing the `chain_id` and `residue_name` of the translated 1-letter amino acids. If you like to work with the sequence as a Python list of string characters, you could do the following:
+
+
+```python
+sequence_list = list(sequence.loc[sequence['chain_id'] == 'A', 'residue_name'])
+sequence_list[-5:] # last 5 residues of chain A
+```
+
+
+
+
+    ['V', 'R', 'H', 'Y', 'T']
+
+
+
+And if you prefer to work with the sequence as a string, you can use the `join` method: 
 
 
+```python
+''.join(sequence.loc[sequence['chain_id'] == 'A', 'residue_name'])
+```
+
+
+
+
+    'SLEPEPWFFKNLSRKDAERQLLAPGNTHGSFLIRESESTAGSFSLSVRDFDQGEVVKHYKIRNLDNGGFYISPRITFPGLHELVRHYT'
+
+
+
+To iterate over the sequences of multi-chain proteins, you can use the `unique` method as shown below:
+
+
+```python
+for chain_id in sequence['chain_id'].unique():
+    print('\nChain ID: %s' % chain_id)
+    print(''.join(sequence.loc[sequence['chain_id'] == chain_id, 'residue_name']))
+```
+
+    
+    Chain ID: A
+    SLEPEPWFFKNLSRKDAERQLLAPGNTHGSFLIRESESTAGSFSLSVRDFDQGEVVKHYKIRNLDNGGFYISPRITFPGLHELVRHYT
+    
+    Chain ID: B
+    SVSSVPTKLEVVAATPTSLLISWDAPAVTVVYYLITYGETGSPWPGGQAFEVPGSKSTATISGLKPGVDYTITVYAHRSSYGYSENPISINYRT
+
 
 ## Wrapping it up - Saving PDB structures