From 10fa12e9c94892456d25539fb5d826ef81e57fae Mon Sep 17 00:00:00 2001 From: rasbt Date: Thu, 11 May 2017 14:43:58 -0400 Subject: [PATCH] v0.2.1 --- biopandas/__init__.py | 2 +- docs/sources/CHANGELOG.md | 2 +- docs/sources/CONTRIBUTING.md | 6 +- .../sources/api_subpackages/biopandas.mol2.md | 2 +- docs/sources/api_subpackages/biopandas.pdb.md | 2 +- .../api_subpackages/biopandas.testutils.md | 2 +- ...king_with_MOL2_Structures_in_DataFrames.md | 30 ++- ...rking_with_PDB_Structures_in_DataFrames.md | 189 +++++++++++------- 8 files changed, 155 insertions(+), 80 deletions(-) diff --git a/biopandas/__init__.py b/biopandas/__init__.py index ec93cfa..2431007 100644 --- a/biopandas/__init__.py +++ b/biopandas/__init__.py @@ -4,5 +4,5 @@ # Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas -__version__ = '0.2.1.dev0' +__version__ = '0.2.1' __author__ = "Sebastian Raschka " diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md index f91b22c..16ffb2a 100755 --- a/docs/sources/CHANGELOG.md +++ b/docs/sources/CHANGELOG.md @@ -3,7 +3,7 @@ The CHANGELOG for the current development version is available at [https://github.com/rasbt/biopandas/blob/master/docs/sources/CHANGELOG.md](https://github.com/rasbt/biopandas/blob/master/docs/sources/CHANGELOG.md). -### 0.2.1dev +### 0.2.1 (2017-05-11) ##### Downloads diff --git a/docs/sources/CONTRIBUTING.md b/docs/sources/CONTRIBUTING.md index 070ab57..9e86edc 100755 --- a/docs/sources/CONTRIBUTING.md +++ b/docs/sources/CONTRIBUTING.md @@ -262,7 +262,7 @@ For example, Please note that documents containing code examples are generated from IPython Notebook files and converted to markdown via ```bash -~/github/biopandas/docs/examples$ nbconvert --to markdown +~/github/biopandas/docs/sources/tutorials$ nbconvert --to markdown ``` The markdown file should be placed into the documentation directory at `biopandas/docs/sources` to build the documentation via MkDocs. @@ -349,7 +349,7 @@ $ pip uninstall biopandas Consider deploying the package to the PyPI test server first. The setup instructions can be found [here](https://wiki.python.org/moin/TestPyPI). ```bash -$ python setup.py sdist upload -r https://testpypi.python.org/pypi +$ python setup.py sdist bdist_wheel upload -r https://testpypi.python.org/pypi ``` Test if it can be installed from there by executing @@ -367,7 +367,7 @@ $ pip uninstall biopandas After this dry-run succeeded, repeat this process using the "real" PyPI: ```bash -$ python setup.py sdist upload +$ python setup.py sdist bdist_wheel upload ``` #### 4. Removing the virtual environment diff --git a/docs/sources/api_subpackages/biopandas.mol2.md b/docs/sources/api_subpackages/biopandas.mol2.md index a65a4df..69ddfc4 100644 --- a/docs/sources/api_subpackages/biopandas.mol2.md +++ b/docs/sources/api_subpackages/biopandas.mol2.md @@ -1,4 +1,4 @@ -biopandas version: 0.2.1.dev0 +biopandas version: 0.2.1 ## PandasMol2 *PandasMol2()* diff --git a/docs/sources/api_subpackages/biopandas.pdb.md b/docs/sources/api_subpackages/biopandas.pdb.md index 3671c7f..bde02f1 100644 --- a/docs/sources/api_subpackages/biopandas.pdb.md +++ b/docs/sources/api_subpackages/biopandas.pdb.md @@ -1,4 +1,4 @@ -biopandas version: 0.2.1.dev0 +biopandas version: 0.2.1 ## PandasPdb *PandasPdb()* diff --git a/docs/sources/api_subpackages/biopandas.testutils.md b/docs/sources/api_subpackages/biopandas.testutils.md index 12229fb..8555212 100644 --- a/docs/sources/api_subpackages/biopandas.testutils.md +++ b/docs/sources/api_subpackages/biopandas.testutils.md @@ -1 +1 @@ -biopandas version: 0.2.1.dev0 +biopandas version: 0.2.1 diff --git a/docs/sources/tutorials/Working_with_MOL2_Structures_in_DataFrames.md b/docs/sources/tutorials/Working_with_MOL2_Structures_in_DataFrames.md index b54364c..c57cf03 100644 --- a/docs/sources/tutorials/Working_with_MOL2_Structures_in_DataFrames.md +++ b/docs/sources/tutorials/Working_with_MOL2_Structures_in_DataFrames.md @@ -1,3 +1,31 @@ + +BioPandas + +Author: Sebastian Raschka +License: BSD 3 clause +Project Website: http://rasbt.github.io/biopandas/ +Code Repository: https://github.com/rasbt/biopandas + + +```python +%load_ext watermark +%watermark -d -u -p pandas,biopandas +``` + + last updated: 2017-04-02 + + pandas 0.19.2 + biopandas 0.2.0.dev0 + + + +```python +from biopandas.mol2 import PandasMol2 +import pandas as pd +pd.set_option('display.width', 600) +pd.set_option('display.max_columns', 8) +``` + # Working with MOL2 Structures in DataFrames The Tripos MOL2 format is a common format for working with small molecules. In this tutorial, we will go over some examples that illustrate how we can use Biopandas' MOL2 DataFrames to analyze molecules conveniently. @@ -569,7 +597,7 @@ A list of all the allowed atom types that can be found in Tripos MOL2 files is p S.3 sulfur sp3 S.2 sulfur sp2 S.O sulfoxide sulfur - S.O2 sulfone sulfur + S.O2/S.o2 sulfone sulfur P.3 phosphorous sp3 F fluorine H hydrogen diff --git a/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.md b/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.md index 92b7b97..f1bec39 100644 --- a/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.md +++ b/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.md @@ -1,3 +1,31 @@ + +BioPandas + +Author: Sebastian Raschka +License: BSD 3 clause +Project Website: http://rasbt.github.io/biopandas/ +Code Repository: https://github.com/rasbt/biopandas + + +```python +%load_ext watermark +%watermark -d -u -p pandas,biopandas +``` + + last updated: 2017-04-12 + + pandas 0.19.2 + biopandas 0.2.1.dev0 + + + +```python +from biopandas.pdb import PandasPdb +import pandas as pd +pd.set_option('display.width', 600) +pd.set_option('display.max_columns', 8) +``` + # Working with PDB Structures in DataFrames ## Loading PDB Files @@ -29,7 +57,7 @@ ppdb.read_pdb('./data/3eiy.pdb') - + @@ -45,7 +73,7 @@ ppdb.read_pdb('./data/3eiy.pdb.gz') - + @@ -207,7 +235,7 @@ ppdb.df.keys() - dict_keys(['HETATM', 'ANISOU', 'ATOM', 'OTHERS']) + dict_keys(['ATOM', 'HETATM', 'ANISOU', 'OTHERS']) @@ -1142,81 +1170,100 @@ Residues in the `residue_name` field can be converted into 1-letter amino acid c ```python from biopandas.pdb import PandasPdb -ppdb = PandasPdb().read_pdb('./data/3eiy.pdb.gz') -ppdb.amino3to1() -# By default, `amino3to1` returns a pandas Series object, -# and to convert it into a Python list, you can wrap it in list -# constructor, e.g., -# `list(ppdb.amino3to1())` +ppdb = PandasPdb().fetch_pdb('5mtn') +sequence = ppdb.amino3to1() +sequence.tail() ``` - 0 S - 6 F - 17 S - 23 N - 31 V - 38 P - 45 A - 50 G - 54 K - 63 D - 71 L - 79 P - 86 Q - 95 D - 103 F - 114 N - 122 V - 129 I - 137 I - 145 E - 154 I - 162 P - 169 A - 174 Q - 183 S - 189 E - 198 P - 205 V - 212 K - 221 Y - .. - 1100 E - 1109 K - 1114 G - 1118 K - 1127 W - 1141 V - 1148 K - 1153 V - 1160 E - 1169 G - 1173 W - 1187 D - 1195 G - 1199 I - 1207 D - 1215 A - 1220 A - 1225 H - 1235 K - 1244 E - 1253 I - 1261 T - 1268 D - 1276 G - 1280 V - 1287 A - 1292 N - 1300 F - 1311 K - 1320 K - Name: residue_name, dtype: object +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
chain_idresidue_name
1378BI
1386BN
1394BY
1406BR
1417BT
+
+ + + +As shown above, the `amino3to1` method returns a `DataFrame` containing the `chain_id` and `residue_name` of the translated 1-letter amino acids. If you like to work with the sequence as a Python list of string characters, you could do the following: + + +```python +sequence_list = list(sequence.loc[sequence['chain_id'] == 'A', 'residue_name']) +sequence_list[-5:] # last 5 residues of chain A +``` + + + + + ['V', 'R', 'H', 'Y', 'T'] + + + +And if you prefer to work with the sequence as a string, you can use the `join` method: +```python +''.join(sequence.loc[sequence['chain_id'] == 'A', 'residue_name']) +``` + + + + + 'SLEPEPWFFKNLSRKDAERQLLAPGNTHGSFLIRESESTAGSFSLSVRDFDQGEVVKHYKIRNLDNGGFYISPRITFPGLHELVRHYT' + + + +To iterate over the sequences of multi-chain proteins, you can use the `unique` method as shown below: + + +```python +for chain_id in sequence['chain_id'].unique(): + print('\nChain ID: %s' % chain_id) + print(''.join(sequence.loc[sequence['chain_id'] == chain_id, 'residue_name'])) +``` + + + Chain ID: A + SLEPEPWFFKNLSRKDAERQLLAPGNTHGSFLIRESESTAGSFSLSVRDFDQGEVVKHYKIRNLDNGGFYISPRITFPGLHELVRHYT + + Chain ID: B + SVSSVPTKLEVVAATPTSLLISWDAPAVTVVYYLITYGETGSPWPGGQAFEVPGSKSTATISGLKPGVDYTITVYAHRSSYGYSENPISINYRT + ## Wrapping it up - Saving PDB structures