diff --git a/README.md b/README.md index 2010d69..1008dee 100644 --- a/README.md +++ b/README.md @@ -35,10 +35,10 @@ So, why don't we take pandas to the structural biology world? Working with molec ![3eiy](./docs/sources/img/index/3eiy.png) ```python -# Initialize a new PandasPDB object +# Initialize a new PandasPdb object # and fetch the PDB file from rcsb.org ->>> from biopandas.pdb import PandasPDB ->>> ppdb = PandasPDB().fetch_pdb('3eiy') +>>> from biopandas.pdb import PandasPdb +>>> ppdb = PandasPdb().fetch_pdb('3eiy') >>> ppdb.df['ATOM'].head() ``` @@ -53,10 +53,10 @@ So, why don't we take pandas to the structural biology world? Working with molec ```python # Load structures from your drive and compute the # Root Mean Square Deviation ->>> from biopandas.pdb import PandasPDB ->>> pl1 = PandasPDB().read_pdb('./docking_pose_1.pdb') ->>> pl2 = PandasPDB().read_pdb('./docking_pose_2.pdb') ->>> r = PandasPDB.rmsd(pl1.df['HETATM'], pl2.df['HETATM'], +>>> from biopandas.pdb import PandasPdb +>>> pl1 = PandasPdb().read_pdb('./docking_pose_1.pdb') +>>> pl2 = PandasPdb().read_pdb('./docking_pose_2.pdb') +>>> r = PandasPdb.rmsd(pl1.df['HETATM'], pl2.df['HETATM'], s='hydrogen', invert=True) >>> print('RMSD: %.4f Angstrom' % r) diff --git a/biopandas/mol2/__init__.py b/biopandas/mol2/__init__.py index ac7c71d..79358e2 100644 --- a/biopandas/mol2/__init__.py +++ b/biopandas/mol2/__init__.py @@ -9,7 +9,7 @@ files in pandas DataFrames. """ -from .pandas_mol2 import PandasMOL2 +from .pandas_mol2 import PandasMol2 from .mol2_io import split_multimol2 -__all__ = ["PandasMOL2", "split_multimol2"] +__all__ = ["PandasMol2", "split_multimol2"] diff --git a/biopandas/mol2/pandas_mol2.py b/biopandas/mol2/pandas_mol2.py index 844dbaa..54a671d 100644 --- a/biopandas/mol2/pandas_mol2.py +++ b/biopandas/mol2/pandas_mol2.py @@ -24,7 +24,7 @@ COLUMN_TYPES = (int, str, float, float, float, str, int, str, float) -class PandasMOL2(object): +class PandasMol2(object): """ Object for working with Tripos Mol2 structure files. Attributes diff --git a/biopandas/mol2/tests/test_pandas_mol2.py b/biopandas/mol2/tests/test_pandas_mol2.py index f6fa16c..34ebacc 100644 --- a/biopandas/mol2/tests/test_pandas_mol2.py +++ b/biopandas/mol2/tests/test_pandas_mol2.py @@ -5,7 +5,7 @@ # Code Repository: https://github.com/rasbt/biopandas import os -from biopandas.mol2 import PandasMOL2 +from biopandas.mol2 import PandasMol2 from biopandas.mol2.mol2_io import split_multimol2 this_dir = os.path.dirname(os.path.realpath(__file__)) @@ -17,7 +17,7 @@ def test_read_mol2(): data_path_2 = os.path.join(this_dir, 'data', '40_mol2_files.mol2.gz') for data_path in (data_path_1, data_path_2): - pdmol = PandasMOL2().read_mol2(data_path) + pdmol = PandasMol2().read_mol2(data_path) assert pdmol.df.shape == (65, 9) assert pdmol.code == 'ZINC38611810' @@ -32,7 +32,7 @@ def test_read_mol2_from_list(): data_path = os.path.join(this_dir, 'data', '40_mol2_files.mol2') mol2 = next(split_multimol2(data_path)) - pdmol = PandasMOL2().read_mol2_from_list(mol2_lines=mol2[1], + pdmol = PandasMol2().read_mol2_from_list(mol2_lines=mol2[1], mol2_code=mol2[0]) assert pdmol.df.shape == (65, 9) assert pdmol.code == 'ZINC38611810' @@ -42,8 +42,8 @@ def test_rmsd(): data_path_1 = os.path.join(this_dir, 'data', '1b5e_1.mol2') data_path_2 = os.path.join(this_dir, 'data', '1b5e_2.mol2') - pdmol_1 = PandasMOL2().read_mol2(data_path_1) - pdmol_2 = PandasMOL2().read_mol2(data_path_2) + pdmol_1 = PandasMol2().read_mol2(data_path_1) + pdmol_2 = PandasMol2().read_mol2(data_path_2) assert pdmol_1.rmsd(pdmol_1.df, pdmol_2.df, heavy_only=False) == 1.5523 assert pdmol_1.rmsd(pdmol_1.df, pdmol_2.df) == 1.1609 @@ -52,5 +52,5 @@ def test_rmsd(): def test_distance(): data_path = os.path.join(this_dir, 'data', '1b5e_1.mol2') - pdmol = PandasMOL2().read_mol2(data_path) + pdmol = PandasMol2().read_mol2(data_path) assert round(pdmol.distance().values[0], 3) == 31.185 diff --git a/biopandas/pdb/__init__.py b/biopandas/pdb/__init__.py index 614fbea..f11af51 100644 --- a/biopandas/pdb/__init__.py +++ b/biopandas/pdb/__init__.py @@ -9,6 +9,6 @@ files in pandas DataFrames. """ -from .pandas_pdb import PandasPDB +from .pandas_pdb import PandasPdb -__all__ = ["PandasPDB"] +__all__ = ["PandasPdb"] diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py index 679ee60..31dd547 100644 --- a/biopandas/pdb/pandas_pdb.py +++ b/biopandas/pdb/pandas_pdb.py @@ -19,7 +19,7 @@ from .engines import amino3to1dict -class PandasPDB(object): +class PandasPdb(object): """ Object for working with Protein Databank structure files. Attributes @@ -117,12 +117,12 @@ def get(self, s, df=None, invert=False): df = self._df['ATOM'] return self._get_dict[s](df, invert=invert) - def impute_element(self, sections=['ATOM', 'HETATM'], inplace=False): + def impute_element(self, sections=('ATOM', 'HETATM'), inplace=False): """Impute element_symbol from atom_name section. Parameters ---------- - sections : iterable (default: ['ATOM', 'HETATM']) + sections : iterable (default: ('ATOM', 'HETATM')) Coordinate sections for which the element symbols should be imputed. @@ -181,7 +181,7 @@ def rmsd(df1, df2, s=None, invert=False): """ if df1.shape[0] != df2.shape[0]: raise AttributeError('DataFrames have unequal lengths') - get_dict = PandasPDB._init_get_dict() + get_dict = PandasPdb._init_get_dict() if s: if s not in get_dict.keys(): raise AttributeError('s must be in ' @@ -198,11 +198,11 @@ def rmsd(df1, df2, s=None, invert=False): @staticmethod def _init_get_dict(): """Initialize dictionary for filter operations.""" - get_dict = {'main chain': PandasPDB._get_mainchain, - 'hydrogen': PandasPDB._get_hydrogen, - 'c-alpha': PandasPDB._get_calpha, - 'carbon': PandasPDB._get_carbon, - 'heavy': PandasPDB._get_heavy} + get_dict = {'main chain': PandasPdb._get_mainchain, + 'hydrogen': PandasPdb._get_hydrogen, + 'c-alpha': PandasPdb._get_calpha, + 'carbon': PandasPdb._get_carbon, + 'heavy': PandasPdb._get_heavy} return get_dict @staticmethod diff --git a/biopandas/pdb/tests/test_amino3to1.py b/biopandas/pdb/tests/test_amino3to1.py index 867b52a..16e9dce 100644 --- a/biopandas/pdb/tests/test_amino3to1.py +++ b/biopandas/pdb/tests/test_amino3to1.py @@ -4,14 +4,14 @@ # Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas -from biopandas.pdb import PandasPDB +from biopandas.pdb import PandasPdb import os def test_defaults(): TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), 'data', '1t48_995.pdb') - p1t48 = PandasPDB() + p1t48 = PandasPdb() p1t48.read_pdb(TESTDATA_1t48) expect = ['M', 'E', 'M', 'E', 'K', 'E', 'F', 'E', 'Q', 'I', 'D', 'K', 'S', 'G', 'S', 'W', 'A', 'A', diff --git a/biopandas/pdb/tests/test_distance.py b/biopandas/pdb/tests/test_distance.py index 377d5ac..2070e8e 100644 --- a/biopandas/pdb/tests/test_distance.py +++ b/biopandas/pdb/tests/test_distance.py @@ -5,7 +5,7 @@ # Code Repository: https://github.com/rasbt/biopandas import pandas as pd -from biopandas.pdb import PandasPDB +from biopandas.pdb import PandasPdb import os from nose.tools import raises @@ -14,7 +14,7 @@ def test_equal(): TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), 'data', '1t48_995.pdb') - p1t48 = PandasPDB() + p1t48 = PandasPdb() p1t48.read_pdb(TESTDATA_1t48) dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), record='ATOM') diff --git a/biopandas/pdb/tests/test_impute.py b/biopandas/pdb/tests/test_impute.py index 8c688d9..e035306 100644 --- a/biopandas/pdb/tests/test_impute.py +++ b/biopandas/pdb/tests/test_impute.py @@ -5,7 +5,7 @@ # Code Repository: https://github.com/rasbt/biopandas -from biopandas.pdb import PandasPDB +from biopandas.pdb import PandasPdb import os import numpy as np import pandas as pd @@ -14,7 +14,7 @@ TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), 'data', '3eiy_stripped_no_ele.pdb') -ppdb = PandasPDB() +ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME) diff --git a/biopandas/pdb/tests/test_read_pdb.py b/biopandas/pdb/tests/test_read_pdb.py index e0bdbe2..9d0a420 100644 --- a/biopandas/pdb/tests/test_read_pdb.py +++ b/biopandas/pdb/tests/test_read_pdb.py @@ -5,7 +5,7 @@ # Code Repository: https://github.com/rasbt/biopandas -from biopandas.pdb import PandasPDB +from biopandas.pdb import PandasPdb import os import numpy as np import pandas as pd @@ -50,7 +50,7 @@ def test__read_pdb(): """Test private _read_pdb""" - ppdb = PandasPDB() + ppdb = PandasPdb() txt = ppdb._read_pdb(TESTDATA_FILENAME) print(txt) assert txt == three_eiy @@ -60,7 +60,7 @@ def test_fetch_pdb(): """Test fetch_pdb""" try: - ppdb = PandasPDB() + ppdb = PandasPdb() txt = ppdb._fetch_pdb('3eiy') except HTTPError: pass @@ -73,14 +73,14 @@ def test_fetch_pdb(): def test__read_pdb_gz(): """Test public _read_pdb with gzip files""" - ppdb = PandasPDB() + ppdb = PandasPdb() txt = ppdb._read_pdb(TESTDATA_FILENAME_GZ) assert txt == three_eiy def test__construct_df(): """Test pandas dataframe construction""" - ppdb = PandasPDB() + ppdb = PandasPdb() dfs = ppdb._construct_df(three_eiy.splitlines()) assert set(dfs.keys()) == {'OTHERS', 'ATOM', 'ANISOU', 'HETATM'} assert set(dfs['ATOM'].columns) == set(ATOM_DF_COLUMNS) @@ -102,7 +102,7 @@ def test__construct_df(): def test_read_pdb(): """Test public read_pdb""" - ppdb = PandasPDB() + ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME) assert ppdb.pdb_text == three_eiy assert ppdb.code == '3eiy', ppdb.code @@ -110,7 +110,7 @@ def test_read_pdb(): def test_anisou_input_handling(): """Test public read_pdb""" - ppdb = PandasPDB() + ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME2) assert ppdb.pdb_text == four_eiy assert ppdb.code == '4eiy', ppdb.code @@ -118,20 +118,20 @@ def test_anisou_input_handling(): @raises(AttributeError) def test_get_exceptions(): - ppdb = PandasPDB() + ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME) ppdb.get('main-chai') def test_get_all(): - ppdb = PandasPDB() + ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME) for i in ['c-alpha', 'hydrogen', 'main chain']: ppdb.get(i) def test_get_df(): - ppdb = PandasPDB() + ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME) shape = ppdb.get('c-alpha').shape diff --git a/biopandas/pdb/tests/test_rmsd.py b/biopandas/pdb/tests/test_rmsd.py index edb680a..44529ae 100644 --- a/biopandas/pdb/tests/test_rmsd.py +++ b/biopandas/pdb/tests/test_rmsd.py @@ -4,7 +4,7 @@ # Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas -from biopandas.pdb import PandasPDB +from biopandas.pdb import PandasPdb import os from nose.tools import raises @@ -18,50 +18,50 @@ TESTDATA_lig2 = os.path.join(os.path.dirname(__file__), 'data', 'lig_conf_2.pdb') -p1t48 = PandasPDB() +p1t48 = PandasPdb() p1t48.read_pdb(TESTDATA_1t48) -p1t49 = PandasPDB() +p1t49 = PandasPdb() p1t49.read_pdb(TESTDATA_1t49) -pl1 = PandasPDB() +pl1 = PandasPdb() pl1.read_pdb(TESTDATA_lig1) -pl2 = PandasPDB() +pl2 = PandasPdb() pl2.read_pdb(TESTDATA_lig2) def test_equal(): - r = PandasPDB.rmsd(p1t48.df['ATOM'], p1t48.df['ATOM'], s=None) + r = PandasPdb.rmsd(p1t48.df['ATOM'], p1t48.df['ATOM'], s=None) assert r == 0.000, r @raises(AttributeError) def test_wrong_arg(): - PandasPDB.rmsd(p1t48.df['ATOM'].loc[1:, :], p1t48.df['ATOM'], s='bla') + PandasPdb.rmsd(p1t48.df['ATOM'].loc[1:, :], p1t48.df['ATOM'], s='bla') @raises(AttributeError) def test_incompatible(): - PandasPDB.rmsd(p1t48.df['ATOM'].loc[1:, :], p1t48.df['ATOM'], s=None) + PandasPdb.rmsd(p1t48.df['ATOM'].loc[1:, :], p1t48.df['ATOM'], s=None) @raises(AttributeError) def test_invalid_query(): - PandasPDB.rmsd(p1t48.df['ATOM'].loc[1:, :], p1t48.df['ATOM'], s='bla') + PandasPdb.rmsd(p1t48.df['ATOM'].loc[1:, :], p1t48.df['ATOM'], s='bla') def test_protein(): - r = PandasPDB.rmsd(p1t48.df['ATOM'], p1t49.df['ATOM'], + r = PandasPdb.rmsd(p1t48.df['ATOM'], p1t49.df['ATOM'], s='c-alpha', invert=False) assert r == 0.4785, r def test_ligand(): - r = PandasPDB.rmsd(pl1.df['HETATM'], pl2.df['HETATM'], + r = PandasPdb.rmsd(pl1.df['HETATM'], pl2.df['HETATM'], s='hydrogen', invert=True) assert r == 1.9959, r def test_ligand_default(): - r = PandasPDB.rmsd(pl1.df['HETATM'], pl2.df['HETATM'], + r = PandasPdb.rmsd(pl1.df['HETATM'], pl2.df['HETATM'], s=None) assert r == 2.6444, r diff --git a/biopandas/pdb/tests/test_write_pdb.py b/biopandas/pdb/tests/test_write_pdb.py index 161a415..d8bf949 100644 --- a/biopandas/pdb/tests/test_write_pdb.py +++ b/biopandas/pdb/tests/test_write_pdb.py @@ -4,7 +4,7 @@ # Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas -from biopandas.pdb import PandasPDB +from biopandas.pdb import PandasPdb import warnings import pandas as pd import os @@ -27,7 +27,7 @@ def test_defaults(): - ppdb = PandasPDB() + ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME) ppdb.to_pdb(path=OUTFILE, records=None) with open(TESTDATA_FILENAME, 'r') as f: @@ -39,7 +39,7 @@ def test_defaults(): def test_nonexpected_column(): - ppdb = PandasPDB() + ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME) ppdb.df['HETATM']['test'] = pd.Series('test', index=ppdb.df['HETATM'].index) @@ -53,7 +53,7 @@ def test_nonexpected_column(): def test_records(): """Test private _read_pdb.""" - ppdb = PandasPDB() + ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME) ppdb.to_pdb(path=OUTFILE, records=['HETATM']) with open(OUTFILE, 'r') as f: @@ -64,7 +64,7 @@ def test_records(): def test_anisou(): """Test writing ANISOU entries.""" - ppdb = PandasPDB() + ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME2) ppdb.to_pdb(path=OUTFILE, records=None) with open(OUTFILE, 'r') as f: diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md index 7d199f5..662d8a4 100755 --- a/docs/sources/CHANGELOG.md +++ b/docs/sources/CHANGELOG.md @@ -8,14 +8,14 @@ ##### New Features -- Added an `amino3to1` method to `PandasPDB` data frames to convert 3-amino acid letter codes to 1-letter codes. -- Added a `distance` method to `PandasPDB` data frames to compute the Euclidean distance between atoms and a reference point. -- Added the `PandasMOL2` class for working with Tripos MOL2 files in pandas DataFrames +- Added an `amino3to1` method to `PandasPdb` data frames to convert 3-amino acid letter codes to 1-letter codes. +- Added a `distance` method to `PandasPdb` data frames to compute the Euclidean distance between atoms and a reference point. +- Added the `PandasMol2` class for working with Tripos MOL2 files in pandas DataFrames ##### Changes -- Raises a warning if `PandasPDB` is written to PDB and ATOM and HETAM section contains unexpected columns; these columns will now be skipped. +- Raises a warning if `PandasPdb` is written to PDB and ATOM and HETAM section contains unexpected columns; these columns will now be skipped. ##### Bug Fixes @@ -31,13 +31,13 @@ ##### New Features -- Added an `impute_element` method to `PandasPDB` objects to infer the Element Symbol from the Atom Name column. -- Added two new selection types for `PandasPDB` ATOM and HETATM coordinate sections: `'heavy'` and `'carbon'`. +- Added an `impute_element` method to `PandasPdb` objects to infer the Element Symbol from the Atom Name column. +- Added two new selection types for `PandasPdb` ATOM and HETATM coordinate sections: `'heavy'` and `'carbon'`. ##### Changes - Include test data in the PyPI package; add install_requires for pandas. -- The `'hydrogen'` atom selection in `PandasPDB` methods is now based on the element type instead of the atom name. +- The `'hydrogen'` atom selection in `PandasPdb` methods is now based on the element type instead of the atom name. - By default, the RMSD is now computed on all atoms unless a specific selection is defined. ##### Bug Fixes diff --git a/docs/sources/api_subpackages/biopandas.mol2.md b/docs/sources/api_subpackages/biopandas.mol2.md index 530cca9..e69772d 100644 --- a/docs/sources/api_subpackages/biopandas.mol2.md +++ b/docs/sources/api_subpackages/biopandas.mol2.md @@ -1,7 +1,7 @@ biopandas version: 0.2.0.dev0 -## PandasMOL2 +## PandasMol2 -*PandasMOL2()* +*PandasMol2()* Object for working with Tripos Mol2 structure files. diff --git a/docs/sources/api_subpackages/biopandas.pdb.md b/docs/sources/api_subpackages/biopandas.pdb.md index 8b3421c..ea85fe4 100644 --- a/docs/sources/api_subpackages/biopandas.pdb.md +++ b/docs/sources/api_subpackages/biopandas.pdb.md @@ -1,7 +1,7 @@ biopandas version: 0.2.0.dev0 -## PandasPDB +## PandasPdb -*PandasPDB()* +*PandasPdb()* Object for working with Protein Databank structure files. diff --git a/docs/sources/index.md b/docs/sources/index.md index ade843a..67aed03 100755 --- a/docs/sources/index.md +++ b/docs/sources/index.md @@ -40,10 +40,10 @@ So, why don't we take pandas to the structural biology world? Working with molec ![3eiy](./img/index/3eiy.png) ```python -# Initialize a new PandasPDB object +# Initialize a new PandasPdb object # and fetch the PDB file from rcsb.org ->>> from biopandas.pdb import PandasPDB ->>> ppdb = PandasPDB().fetch_pdb('3eiy') +>>> from biopandas.pdb import PandasPdb +>>> ppdb = PandasPdb().fetch_pdb('3eiy') >>> ppdb.df['ATOM'].head() ``` @@ -57,10 +57,10 @@ So, why don't we take pandas to the structural biology world? Working with molec ```python # Load structures from your drive and compute the # Root Mean Square Deviation ->>> from biopandas.pdb import PandasPDB ->>> pl1 = PandasPDB().read_pdb('./docking_pose_1.pdb') ->>> pl2 = PandasPDB().read_pdb('./docking_pose_2.pdb') ->>> r = PandasPDB.rmsd(pl1.df['HETATM'], pl2.df['HETATM']) +>>> from biopandas.pdb import PandasPdb +>>> pl1 = PandasPdb().read_pdb('./docking_pose_1.pdb') +>>> pl2 = PandasPdb().read_pdb('./docking_pose_2.pdb') +>>> r = PandasPdb.rmsd(pl1.df['HETATM'], pl2.df['HETATM']) >>> print('RMSD: %.4f Angstrom' % r) RMSD: 2.6444 Angstrom ``` diff --git a/docs/sources/tutorials/Working_with_MOL2_Structures_in_DataFrames.ipynb b/docs/sources/tutorials/Working_with_MOL2_Structures_in_DataFrames.ipynb index a954fbc..43a8448 100644 --- a/docs/sources/tutorials/Working_with_MOL2_Structures_in_DataFrames.ipynb +++ b/docs/sources/tutorials/Working_with_MOL2_Structures_in_DataFrames.ipynb @@ -43,7 +43,7 @@ }, "outputs": [], "source": [ - "from biopandas.mol2 import PandasMOL2\n", + "from biopandas.mol2 import PandasMol2\n", "import pandas as pd\n", "pd.set_option('display.width', 600)\n", "pd.set_option('display.max_columns', 8)" @@ -85,9 +85,9 @@ }, "outputs": [], "source": [ - "from biopandas.mol2 import PandasMOL2\n", + "from biopandas.mol2 import PandasMol2\n", "\n", - "pmol = PandasMOL2().read_mol2('./data/1b5e_1.mol2')" + "pmol = PandasMol2().read_mol2('./data/1b5e_1.mol2')" ] }, { @@ -107,14 +107,14 @@ }, "outputs": [], "source": [ - "pmol = PandasMOL2().read_mol2('./data/40_mol2_files.mol2.gz')" + "pmol = PandasMol2().read_mol2('./data/40_mol2_files.mol2.gz')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "After the file was succesfully loaded, we have access to the following basic `PandasMOL2` attributes:" + "After the file was succesfully loaded, we have access to the following basic `PandasMol2` attributes:" ] }, { @@ -158,7 +158,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The most interesting and useful attribute, however, is the [`PandasMOL2.df`](../api/biopandas.mol2#pandasmol2df) DataFrame, which contains the ATOM section of the MOL2 structure. Let's print the first 3 lines from the `ATOM` coordinate section to see how it looks like:" + "The most interesting and useful attribute, however, is the [`PandasMol2.df`](../api/biopandas.mol2#pandasmol2df) DataFrame, which contains the ATOM section of the MOL2 structure. Let's print the first 3 lines from the `ATOM` coordinate section to see how it looks like:" ] }, { @@ -258,7 +258,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "`PandasMOL2` expects the MOL2 file to be in the standard Tripos MOL2 format, and most importantly, that the \"@ATOM\" section is consistent with the following format convention:\n", + "`PandasMol2` expects the MOL2 file to be in the standard Tripos MOL2 format, and most importantly, that the \"@ATOM\" section is consistent with the following format convention:\n", "\n", "\n", "> Format:\n", @@ -500,9 +500,9 @@ } ], "source": [ - "from biopandas.pdb import PandasPDB\n", + "from biopandas.pdb import PandasPdb\n", "\n", - "pmol = PandasMOL2()\n", + "pmol = PandasMol2()\n", "pmol.read_mol2('./data/1b5e_1.mol2')\n", "pmol.df.tail(10)" ] @@ -868,7 +868,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 10, @@ -877,9 +877,9 @@ } ], "source": [ - "from biopandas.pdb import PandasPDB\n", + "from biopandas.pdb import PandasPdb\n", "\n", - "pmol = PandasMOL2()\n", + "pmol = PandasMol2()\n", "pmol.read_mol2('./data/1b5e_1.mol2')" ] }, @@ -1044,13 +1044,13 @@ } ], "source": [ - "from biopandas.mol2 import PandasMOL2\n", + "from biopandas.mol2 import PandasMol2\n", "\n", - "l_1 = PandasMOL2().read_mol2('./data/1b5e_1.mol2')\n", - "l_2 = PandasMOL2().read_mol2('./data/1b5e_2.mol2')\n", + "l_1 = PandasMol2().read_mol2('./data/1b5e_1.mol2')\n", + "l_2 = PandasMol2().read_mol2('./data/1b5e_2.mol2')\n", "\n", - "r_heavy = PandasMOL2.rmsd(l_1.df, l_2.df)\n", - "r_all = PandasMOL2.rmsd(l_1.df, l_2.df, heavy_only=False)\n", + "r_heavy = PandasMol2.rmsd(l_1.df, l_2.df)\n", + "r_all = PandasMol2.rmsd(l_1.df, l_2.df, heavy_only=False)\n", "\n", "print('Heavy-atom RMSD: %.4f Angstrom' % r_heavy)\n", "print('All-atom RMSD: %.4f Angstrom' % r_all)" @@ -1126,9 +1126,9 @@ } ], "source": [ - "from biopandas.pdb import PandasPDB\n", + "from biopandas.pdb import PandasPdb\n", "\n", - "pmol = PandasMOL2().read_mol2('./data/1b5e_1.mol2')\n", + "pmol = PandasMol2().read_mol2('./data/1b5e_1.mol2')\n", "\n", "keto_coord = pmol.df[pmol.df['atom_type'] == 'O.2'][['x', 'y', 'z']]\n", "keto_coord" @@ -1499,7 +1499,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As mentioned earlier, `PandasMOL2.read_mol2()` only reads in the first molecule if it is given a multi-MOL2 file. However, if we want to create DataFrames from multiple structures in a MOL2 file, we can use the handy `split_multimol2` generator.\n", + "As mentioned earlier, `PandasMol2.read_mol2()` only reads in the first molecule if it is given a multi-MOL2 file. However, if we want to create DataFrames from multiple structures in a MOL2 file, we can use the handy `split_multimol2` generator.\n", "\n", "The `split_multimol2` generator yields tuples containing the molecule IDs and the MOL2 content as strings in a list -- each line in the MOL2 file is stored as a string in the list." ] @@ -1535,7 +1535,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We can now use this generator to loop over all files in a multi-MOL2 file and create PandasMOL2 DataFrames. A typical use case would be the filtering of mol2 files by certain properties:" + "We can now use this generator to loop over all files in a multi-MOL2 file and create PandasMol2 DataFrames. A typical use case would be the filtering of mol2 files by certain properties:" ] }, { @@ -1546,7 +1546,7 @@ }, "outputs": [], "source": [ - "pdmol = PandasMOL2()\n", + "pdmol = PandasMol2()\n", "\n", "with open('./data/filtered.mol2', 'w') as f:\n", " for mol2 in split_multimol2('./data/40_mol2_files.mol2'):\n", diff --git a/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.ipynb b/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.ipynb index 6431144..713e979 100644 --- a/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.ipynb +++ b/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.ipynb @@ -43,7 +43,7 @@ }, "outputs": [], "source": [ - "from biopandas.pdb import PandasPDB\n", + "from biopandas.pdb import PandasPdb\n", "import pandas as pd\n", "pd.set_option('display.width', 600)\n", "pd.set_option('display.max_columns', 8)" @@ -67,7 +67,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "There are 2 1/2 ways to load a PDB structure into a `PandasPDB` object.\n" + "There are 2 1/2 ways to load a PDB structure into a `PandasPdb` object.\n" ] }, { @@ -75,7 +75,7 @@ "metadata": {}, "source": [ "#### 1\n", - "PDB files can be directly fetched from The Protein Data Bank at [http://www.rcsb.org](http://www.rcsb.org) via its unique 4-letter after initializing a new [`PandasPDB`](../api/biopandas.pdb#pandaspdb) object and calling the [`fetch_pdb`](../api/biopandas.pdb#pandaspdbfetch_pdb) method:" + "PDB files can be directly fetched from The Protein Data Bank at [http://www.rcsb.org](http://www.rcsb.org) via its unique 4-letter after initializing a new [`PandasPdb`](../api/biopandas.pdb#pandaspdb) object and calling the [`fetch_pdb`](../api/biopandas.pdb#pandaspdbfetch_pdb) method:" ] }, { @@ -86,11 +86,11 @@ }, "outputs": [], "source": [ - "from biopandas.pdb import PandasPDB\n", + "from biopandas.pdb import PandasPdb\n", "\n", - "# Initialize a new PandasPDB object\n", + "# Initialize a new PandasPdb object\n", "# and fetch the PDB file from rcsb.org\n", - "ppdb = PandasPDB().fetch_pdb('3eiy')" + "ppdb = PandasPdb().fetch_pdb('3eiy')" ] }, { @@ -112,7 +112,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -143,7 +143,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -205,7 +205,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The most interesting / useful attribute is the [`PandasPDB.df`](../api/biopandas.pdb#pandaspdbdf) DataFrame dictionary though, which gives us access to the PDB files as pandas DataFrames. Let's print the first 3 lines from the `ATOM` coordinate section to see how it looks like:" + "The most interesting / useful attribute is the [`PandasPdb.df`](../api/biopandas.pdb#pandaspdbdf) DataFrame dictionary though, which gives us access to the PDB files as pandas DataFrames. Let's print the first 3 lines from the `ATOM` coordinate section to see how it looks like:" ] }, { @@ -373,7 +373,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "After loading a PDB file from rcsb.org or our local drive, the [`PandasPDB.df`](../api/biopandas.pdb/#pandaspdbdf) attribute should contain the following 4 DataFrame objects:" + "After loading a PDB file from rcsb.org or our local drive, the [`PandasPdb.df`](../api/biopandas.pdb/#pandaspdbdf) attribute should contain the following 4 DataFrame objects:" ] }, { @@ -395,8 +395,8 @@ } ], "source": [ - "from biopandas.pdb import PandasPDB\n", - "ppdb = PandasPDB()\n", + "from biopandas.pdb import PandasPdb\n", + "ppdb = PandasPdb()\n", "ppdb.read_pdb('./data/3eiy.pdb')\n", "ppdb.df.keys()" ] @@ -954,8 +954,8 @@ } ], "source": [ - "from biopandas.pdb import PandasPDB\n", - "ppdb = PandasPDB()\n", + "from biopandas.pdb import PandasPdb\n", + "ppdb = PandasPdb()\n", "ppdb.read_pdb('./data/3eiy.pdb.gz')\n", "ppdb.df['ATOM'].head()" ] @@ -1381,8 +1381,8 @@ }, "outputs": [], "source": [ - "from biopandas.pdb import PandasPDB\n", - "ppdb = PandasPDB().read_pdb('./data/3eiy.pdb.gz')" + "from biopandas.pdb import PandasPdb\n", + "ppdb = PandasPdb().read_pdb('./data/3eiy.pdb.gz')" ] }, { @@ -1530,11 +1530,11 @@ } ], "source": [ - "from biopandas.pdb import PandasPDB\n", + "from biopandas.pdb import PandasPdb\n", "\n", - "l_1 = PandasPDB().read_pdb('./data/lig_conf_1.pdb')\n", - "l_2 = PandasPDB().read_pdb('./data/lig_conf_2.pdb')\n", - "r = PandasPDB.rmsd(l_1.df['HETATM'], l_2.df['HETATM'],\n", + "l_1 = PandasPdb().read_pdb('./data/lig_conf_1.pdb')\n", + "l_2 = PandasPdb().read_pdb('./data/lig_conf_2.pdb')\n", + "r = PandasPdb.rmsd(l_1.df['HETATM'], l_2.df['HETATM'],\n", " s=None) # all atoms, including hydrogens\n", "print('RMSD: %.4f Angstrom' % r)" ] @@ -1555,7 +1555,7 @@ } ], "source": [ - "r = PandasPDB.rmsd(l_1.df['HETATM'], l_2.df['HETATM'], \n", + "r = PandasPdb.rmsd(l_1.df['HETATM'], l_2.df['HETATM'], \n", " s='carbon') # carbon atoms only\n", "print('RMSD: %.4f Angstrom' % r)" ] @@ -1576,7 +1576,7 @@ } ], "source": [ - "r = PandasPDB.rmsd(l_1.df['HETATM'], l_2.df['HETATM'], \n", + "r = PandasPdb.rmsd(l_1.df['HETATM'], l_2.df['HETATM'], \n", " s='heavy') # heavy atoms only\n", "print('RMSD: %.4f Angstrom' % r)" ] @@ -1613,9 +1613,9 @@ } ], "source": [ - "p_1 = PandasPDB().read_pdb('./data/1t48_995.pdb')\n", - "p_2 = PandasPDB().read_pdb('./data/1t49_995.pdb')\n", - "r = PandasPDB.rmsd(p_1.df['ATOM'], p_2.df['ATOM'], s='heavy')\n", + "p_1 = PandasPdb().read_pdb('./data/1t48_995.pdb')\n", + "p_2 = PandasPdb().read_pdb('./data/1t49_995.pdb')\n", + "r = PandasPdb.rmsd(p_1.df['ATOM'], p_2.df['ATOM'], s='heavy')\n", "print('RMSD: %.4f Angstrom' % r)" ] }, @@ -1642,9 +1642,9 @@ } ], "source": [ - "p_1 = PandasPDB().read_pdb('./data/1t48_995.pdb')\n", - "p_2 = PandasPDB().read_pdb('./data/1t49_995.pdb')\n", - "r = PandasPDB.rmsd(p_1.df['ATOM'], p_2.df['ATOM'], s='main chain')\n", + "p_1 = PandasPdb().read_pdb('./data/1t48_995.pdb')\n", + "p_2 = PandasPdb().read_pdb('./data/1t49_995.pdb')\n", + "r = PandasPdb.rmsd(p_1.df['ATOM'], p_2.df['ATOM'], s='main chain')\n", "print('RMSD: %.4f Angstrom' % r)" ] }, @@ -1677,7 +1677,7 @@ }, "outputs": [], "source": [ - "p_1 = PandasPDB().read_pdb('./data/3eiy.pdb')\n", + "p_1 = PandasPdb().read_pdb('./data/3eiy.pdb')\n", "\n", "reference_point = (9.362, 41.410, 10.542)\n", "distances = p_1.distance(xyz=reference_point, record='ATOM')" @@ -1940,8 +1940,8 @@ } ], "source": [ - "from biopandas.pdb import PandasPDB\n", - "ppdb = PandasPDB().read_pdb('./data/3eiy.pdb.gz')\n", + "from biopandas.pdb import PandasPdb\n", + "ppdb = PandasPdb().read_pdb('./data/3eiy.pdb.gz')\n", "ppdb.amino3to1()\n", "# By default, `amino3to1` returns a pandas Series object,\n", "# and to convert it into a Python list, you can wrap it in list\n", @@ -1978,8 +1978,8 @@ }, "outputs": [], "source": [ - "from biopandas.pdb import PandasPDB\n", - "ppdb = PandasPDB().read_pdb('./data/3eiy.pdb.gz')\n", + "from biopandas.pdb import PandasPdb\n", + "ppdb = PandasPdb().read_pdb('./data/3eiy.pdb.gz')\n", "ppdb.df['ATOM'] = ppdb.df['ATOM'][ppdb.df['ATOM']['element_symbol'] != 'H']" ] }, @@ -1987,7 +1987,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We can save the file using the [`PandasPDB.to_pdb`](../api/biopandas.pdb#pandaspdbto_pdb) method:" + "We can save the file using the [`PandasPdb.to_pdb`](../api/biopandas.pdb#pandaspdbto_pdb) method:" ] }, { diff --git a/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.md b/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.md index 661a3d0..43068b0 100644 --- a/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.md +++ b/docs/sources/tutorials/Working_with_PDB_Structures_in_DataFrames.md @@ -3,19 +3,19 @@ ## Loading PDB Files -There are 2 1/2 ways to load a PDB structure into a `PandasPDB` object. +There are 2 1/2 ways to load a PDB structure into a `PandasPdb` object. #### 1 -PDB files can be directly fetched from The Protein Data Bank at [http://www.rcsb.org](http://www.rcsb.org) via its unique 4-letter after initializing a new [`PandasPDB`](../api/biopandas.pdb#pandaspdb) object and calling the [`fetch_pdb`](../api/biopandas.pdb#pandaspdbfetch_pdb) method: +PDB files can be directly fetched from The Protein Data Bank at [http://www.rcsb.org](http://www.rcsb.org) via its unique 4-letter after initializing a new [`PandasPdb`](../api/biopandas.pdb#pandaspdb) object and calling the [`fetch_pdb`](../api/biopandas.pdb#pandaspdbfetch_pdb) method: ```python -from biopandas.pdb import PandasPDB +from biopandas.pdb import PandasPdb -# Initialize a new PandasPDB object +# Initialize a new PandasPdb object # and fetch the PDB file from rcsb.org -ppdb = PandasPDB().fetch_pdb('3eiy') +ppdb = PandasPdb().fetch_pdb('3eiy') ``` #### 2 a) @@ -30,7 +30,7 @@ ppdb.read_pdb('./data/3eiy.pdb') - + @@ -46,7 +46,7 @@ ppdb.read_pdb('./data/3eiy.pdb.gz') - + @@ -80,7 +80,7 @@ print('\nRaw PDB file contents:\n\n%s\n...' % ppdb.pdb_text[:1000]) ... -The most interesting / useful attribute is the [`PandasPDB.df`](../api/biopandas.pdb#pandaspdbdf) DataFrame dictionary though, which gives us access to the PDB files as pandas DataFrames. Let's print the first 3 lines from the `ATOM` coordinate section to see how it looks like: +The most interesting / useful attribute is the [`PandasPdb.df`](../api/biopandas.pdb#pandaspdbdf) DataFrame dictionary though, which gives us access to the PDB files as pandas DataFrames. Let's print the first 3 lines from the `ATOM` coordinate section to see how it looks like: ```python @@ -195,12 +195,12 @@ Below is an example of how this would look like in an actual PDB file: ATOM 153 CG2AVAL A 25 30.835 18.826 57.661 0.28 13.58 A1 C ATOM 154 CG2BVAL A 25 29.909 16.996 55.922 0.72 13.25 A1 C -After loading a PDB file from rcsb.org or our local drive, the [`PandasPDB.df`](../api/biopandas.pdb/#pandaspdbdf) attribute should contain the following 4 DataFrame objects: +After loading a PDB file from rcsb.org or our local drive, the [`PandasPdb.df`](../api/biopandas.pdb/#pandaspdbdf) attribute should contain the following 4 DataFrame objects: ```python -from biopandas.pdb import PandasPDB -ppdb = PandasPDB() +from biopandas.pdb import PandasPdb +ppdb = PandasPdb() ppdb.read_pdb('./data/3eiy.pdb') ppdb.df.keys() ``` @@ -474,8 +474,8 @@ In the previous sections, we've seen how to load PDB structures into DataFrames, ```python -from biopandas.pdb import PandasPDB -ppdb = PandasPDB() +from biopandas.pdb import PandasPdb +ppdb = PandasPdb() ppdb.read_pdb('./data/3eiy.pdb.gz') ppdb.df['ATOM'].head() ``` @@ -874,8 +874,8 @@ Since we are using pandas under the hood, which in turns uses matplotlib under t ```python -from biopandas.pdb import PandasPDB -ppdb = PandasPDB().read_pdb('./data/3eiy.pdb.gz') +from biopandas.pdb import PandasPdb +ppdb = PandasPdb().read_pdb('./data/3eiy.pdb.gz') ``` @@ -942,11 +942,11 @@ we can compute the RMSD as follows: ```python -from biopandas.pdb import PandasPDB +from biopandas.pdb import PandasPdb -l_1 = PandasPDB().read_pdb('./data/lig_conf_1.pdb') -l_2 = PandasPDB().read_pdb('./data/lig_conf_2.pdb') -r = PandasPDB.rmsd(l_1.df['HETATM'], l_2.df['HETATM'], +l_1 = PandasPdb().read_pdb('./data/lig_conf_1.pdb') +l_2 = PandasPdb().read_pdb('./data/lig_conf_2.pdb') +r = PandasPdb.rmsd(l_1.df['HETATM'], l_2.df['HETATM'], s=None) # all atoms, including hydrogens print('RMSD: %.4f Angstrom' % r) ``` @@ -956,7 +956,7 @@ print('RMSD: %.4f Angstrom' % r) ```python -r = PandasPDB.rmsd(l_1.df['HETATM'], l_2.df['HETATM'], +r = PandasPdb.rmsd(l_1.df['HETATM'], l_2.df['HETATM'], s='carbon') # carbon atoms only print('RMSD: %.4f Angstrom' % r) ``` @@ -966,7 +966,7 @@ print('RMSD: %.4f Angstrom' % r) ```python -r = PandasPDB.rmsd(l_1.df['HETATM'], l_2.df['HETATM'], +r = PandasPdb.rmsd(l_1.df['HETATM'], l_2.df['HETATM'], s='heavy') # heavy atoms only print('RMSD: %.4f Angstrom' % r) ``` @@ -982,9 +982,9 @@ The hydrogen-free RMSD: ```python -p_1 = PandasPDB().read_pdb('./data/1t48_995.pdb') -p_2 = PandasPDB().read_pdb('./data/1t49_995.pdb') -r = PandasPDB.rmsd(p_1.df['ATOM'], p_2.df['ATOM'], s='heavy') +p_1 = PandasPdb().read_pdb('./data/1t48_995.pdb') +p_2 = PandasPdb().read_pdb('./data/1t49_995.pdb') +r = PandasPdb.rmsd(p_1.df['ATOM'], p_2.df['ATOM'], s='heavy') print('RMSD: %.4f Angstrom' % r) ``` @@ -995,9 +995,9 @@ Or the RMSD between the main chains only: ```python -p_1 = PandasPDB().read_pdb('./data/1t48_995.pdb') -p_2 = PandasPDB().read_pdb('./data/1t49_995.pdb') -r = PandasPDB.rmsd(p_1.df['ATOM'], p_2.df['ATOM'], s='main chain') +p_1 = PandasPdb().read_pdb('./data/1t48_995.pdb') +p_2 = PandasPdb().read_pdb('./data/1t49_995.pdb') +r = PandasPdb.rmsd(p_1.df['ATOM'], p_2.df['ATOM'], s='main chain') print('RMSD: %.4f Angstrom' % r) ``` @@ -1016,12 +1016,12 @@ Let's say we loaded a PDB structure, removed it from it's hydrogens: ```python -from biopandas.pdb import PandasPDB -ppdb = PandasPDB().read_pdb('./data/3eiy.pdb.gz') +from biopandas.pdb import PandasPdb +ppdb = PandasPdb().read_pdb('./data/3eiy.pdb.gz') ppdb.df['ATOM'] = ppdb.df['ATOM'][ppdb.df['ATOM']['element_symbol'] != 'H'] ``` -We can save the file using the [`PandasPDB.to_pdb`](../api/biopandas.pdb#pandaspdbto_pdb) method: +We can save the file using the [`PandasPdb.to_pdb`](../api/biopandas.pdb#pandaspdbto_pdb) method: ```python