Skip to content

Commit

Permalink
Merge 199b569 into 8f8fdeb
Browse files Browse the repository at this point in the history
  • Loading branch information
mommermi committed Sep 13, 2018
2 parents 8f8fdeb + 199b569 commit 2c6e0c5
Show file tree
Hide file tree
Showing 7 changed files with 183 additions and 51 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -16,6 +16,7 @@ htmlcov
.coverage
MANIFEST
.ipynb_checkpoints
.pytest_cache*

# Sphinx
docs/api
Expand Down
46 changes: 33 additions & 13 deletions docs/sbpy/data.rst
Expand Up @@ -284,6 +284,31 @@ Writing object data to a file
By default, the data are written in ASCII format, but other formats
are available, too (cf. `~astropy.table.Table.write`).

Alternative field names
^^^^^^^^^^^^^^^^^^^^^^^

It is common practice to use a set of different names for the same
property. For instance, the orbital inclination can be referred to as
``'i'``, ``'inc'``, or ``'incl'`` - it's a matter of personal
taste. `~sbpy.data.DataClass` accounts for this fact and is able to
provide a number of alternative field or property names, as suggested
above.

As an example, if your `~sbpy.data.Orbit` object has a column named
``'incl'`` but you try to get column ``'i'``, the object will
internally check if ``'i'`` is a legitimate alternative field name for
``'incl'``. The corresponding column is then returned. If you try to
get a field name that is not connected to any existing field name, a
``KeyError`` will be raised.

The definition of alternative field names is done in the file
``sbpy/data/__init__.py``, using the dictionary ``namealts``. This
dictionary is automatically tested for potential naming conflicts,
i.e., different properties that share the same alternative field
names.



How to use Ephem
----------------

Expand Down Expand Up @@ -460,19 +485,14 @@ asteroid and comet identifiers:
>>> print(Names.parse_comet('73P-C/Schwassmann Wachmann 3 C ')) # doctest: +SKIP
{'type': 'P', 'number': 73, 'fragment': 'C', 'name': 'Schwassmann Wachmann 3 C'}

Note that these examples are somewhat idealized. Consider the
following query:

>>> print(Names.parse_comet('12893 Mommert (1998 QS55)')) # doctest: +SKIP
{'name': 'Mommert ', 'desig': '1998 QS55'}

Although this target identifier clearly denotes an asteroid, the
routine finds a comet name and a comet designation. The reason for
this is that some comets are discovered as asteroids and hence obtain
asteroid-like designations that stick to them; similarly, comet names
cannot be easily distinguished from asteroids names, unless one knows
all comet and asteroid names. Hence, some caution is advised when
using these routines - identification might not be unambiguous.
In order to be able to distinguish between asteroid and comet
identifiers, `sbpy` follows the MPC guideline in that it requires
comet identifiers to include the comet type in either in combination
with a number (e.g., ``'259P'``), a name (e.g., ``'P/Halley'``), or
both (e.g., ``'2P/Encke'``). For instance, the identifier ``'Halley'``
would be identified as an asteroid, as it lacks a comet type
identifier. Hence, some caution is advised when using these routines -
identification might not be unambiguous.



Expand Down
49 changes: 44 additions & 5 deletions sbpy/data/__init__.py
Expand Up @@ -7,8 +7,47 @@
:author: Michael Mommert (mommermiscience@gmail.com)
"""

from .core import *
from .ephem import *
from .orbit import *
from .phys import *
from .names import *

class Conf():

# property name alternatives for Orbit, Ephem, Phys
# default name: [list of alternative names]
namealts = {'i': ['inc', 'incl'],
'epoch': ['datetime_jd'],
'Omega': ['argper'],
'w': ['longnode']
}

# reverse namealts for dict of alternative names pointing to
# default names
altnames = {}
for key, vals in namealts.items():
for val in vals:
altnames[val] = key

# definitions for use of pyoorb in Orbits
oorb_timeScales = {'UTC': 1, 'UT1': 2, 'TT': 3, 'TAI': 4}
oorb_elemType = {'CART': 1, 'COM': 2, 'KEP': 3, 'DEL': 4, 'EQX': 5}
oorb_orbit_fields = {'COM': ['id', 'q', 'e', 'incl', 'Omega',
'w', 'Tp_jd', 'orbtype', 'epoch',
'epoch_scale', 'H', 'G'],
'KEP': ['id', 'a', 'e', 'incl', 'Omega', 'w', 'M',
'orbtype', 'epoch', 'epoch_scale', 'H',
'G'],
'CART': ['id', 'x', 'y', 'z', 'vx', 'vy', 'vz',
'orbtype', 'epoch', 'epoch_scale', 'H',
'G']}


conf = Conf()

from .core import (DataClass, mpc_observations, sb_search,
image_search, pds_ferret)
from .ephem import Ephem
from .orbit import Orbit
from .phys import Phys
from .names import Names

__all__ = ['DataClass', 'Ephem', 'Orbit', 'Phys', 'Names', 'conf', 'Conf',
'mpc_observations', 'sb_search', 'image_search',
'pds_ferret']
56 changes: 43 additions & 13 deletions sbpy/data/core.py
Expand Up @@ -12,6 +12,8 @@
from astropy.table import QTable, Column, vstack
import astropy.units as u

from . import conf

__all__ = ['DataClass', 'mpc_observations', 'sb_search', 'image_search',
'pds_ferret']

Expand Down Expand Up @@ -227,7 +229,7 @@ def from_file(cls, filename, **kwargs):
Examples
--------
>>> from sbpy.data import DataClass
>>> dat = Dataclass.from_file('data.txt', format='ascii') # doctest: +SKIP
>>> dat = DataClass.from_file('data.txt', format='ascii') # doctest: +SKIP
"""

data = QTable.read(filename, **kwargs)
Expand Down Expand Up @@ -277,13 +279,14 @@ def to_file(self, filename, format='ascii', **kwargs):
self._table.write(filename, format=format, **kwargs)

def __getattr__(self, field):
"""Get attribute from ``self._table` (columns, rows) or ``self``,
if the former does not exist."""
"""Get attribute from ``self._table` (columns, rows); checks
for and may use alternative field names."""

if field in self._table.columns:
return self._table[field]
if field == '_table':
return self._table
else:
raise AttributeError("field '{:s}' does not exist".format(field))
field = self._translate_columns(field)[0]
return self._table[field]

def __setattr__(self, field, value):
"""Modify attribute in ``self._table``, if it already exists there,
Expand All @@ -300,7 +303,11 @@ def __setattr__(self, field, value):
super().__setattr__(field, value)

def __getitem__(self, ident):
"""Return column or row from data table (``self._table``)."""
"""Return column or row from data table (``self._table``); checks
for and may use alternative field names."""

if isinstance(ident, str):
ident = self._translate_columns(ident)[0]
return self._table[ident]

@property
Expand Down Expand Up @@ -423,12 +430,35 @@ def add_column(self, data, name):
self._table.add_column(Column(data, name=name))
return len(self.column_names)

def _check_columns(self, colnames):
"""Checks whether all of the elements in ``colnames`` exist as
column names in the data table. If ``self.column_names`` is longer
than ``colnames``, this does not force ``False``."""

return all([col in self.column_names for col in colnames])
def _translate_columns(self, target_colnames):
"""Translate target_colnames to the corresponding column names
present in this object's table. Returns a list of actual column
names present in this object that corresponds to target_colnames
(order is preserved). Raises ValueError if not all columns are
present or one or more columns could not be translated
"""
if not isinstance(target_colnames, (list, ndarray)):
target_colnames = [target_colnames]

translated_colnames = []
for colname in target_colnames:
if colname in self.column_names:
# colname already in self._table
translated_colnames.append(colname)
elif colname in conf.namealts.keys():
# colname already default name (conf.namealts key)
for translation in conf.namealts[colname]:
if translation in self.column_names:
translated_colnames.append(translation)
else:
# colname is alternative name (conf.altnames key)
if colname in conf.altnames.keys():
translated_colnames.append(conf.altnames[colname])
else:
raise KeyError('column \"{:s}\" not available'.format(
colname))

return translated_colnames


def mpc_observations(targetid):
Expand Down
31 changes: 21 additions & 10 deletions sbpy/data/names.py
Expand Up @@ -119,7 +119,8 @@ def parse_comet(s):
"""Parse a string as if it were a comet name.
Considers IAU-formatted permanent and new-style
designations. Note that letter case is important.
designations. Note that comet types (P, D, C etc) are required
and letter case is important.
Parameters
----------
Expand Down Expand Up @@ -169,6 +170,8 @@ def parse_comet(s):
+------------------------------+------+----+-----+----------+------------------------+
|3D/Biela | 3 | D | | |Biela |
+------------------------------+------+----+-----+----------+------------------------+
|P/Encke | | P | | |Encke |
+------------------------------+------+----+-----+----------+------------------------+
|9P/Tempel 1 | 9 | P | | |Tempel 1 |
+------------------------------+------+----+-----+----------+------------------------+
|73P/Schwassmann Wachmann 3 C | 73 | P | | |Schwassmann Wachmann 3 C|
Expand All @@ -193,6 +196,7 @@ def parse_comet(s):
+------------------------------+------+----+-----+----------+------------------------+
|C/2015 V2 (Johnson) | | C | | 2015 V2 |Johnson |
+------------------------------+------+----+-----+----------+------------------------+
"""

import re
Expand Down Expand Up @@ -257,7 +261,7 @@ def parse_comet(s):
if len(el[5]) > 1:
r['name'] = el[5]

if len(r) == 0:
if len(r) == 0 or 'type' not in r:
raise TargetNameParseError(('{} does not appear to be a '
'comet name').format(s))
else:
Expand Down Expand Up @@ -344,7 +348,7 @@ def parse_asteroid(s):
pkd = ('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghifklmnopqrstuvwxyz')

pat = ('((1[8-9][0-9]{2}[ _][A-Z]{2}[0-9]{0,3}|'
pat = ('(([1A][8-9][0-9]{2}[ _][A-Z]{2}[0-9]{0,3}|'
'20[0-9]{2}[ _][A-Z]{2}[0-9]{0,3})'
# designation [0,1]
'|([1-9][0-9]{3}[ _](P-L|T-[1-3])))'
Expand All @@ -355,8 +359,11 @@ def parse_asteroid(s):
# packed desig [4]
'|(^[A-Za-z][0-9]{4}| [A-Za-z][0-9]{4})'
# packed number [5]
'|([A-Z]{3,} |[A-Z]{3,}$|[A-Z][A-Z]*[a-z][a-z]*[^0-9]*'
'[ -]?[A-Z]?[a-z]*[^0-9]*)'
'|([A-Z]{3,} |[A-Z]{3,}$' # capitalized acronymns
'|van de [A-Z][a-z]*[ ^ 0-9]*[-]?[A-Z]?[a-z]*[^0-9] *'
'|de [A-Z][a-z]*[ ^ 0-9]*[-]?[A-Z]?[a-z]*[^0-9] *'
"|['`]?[A-Z][A-Z]*['`]?[a-z][a-z]*['`]?[^0-9]*"
"[ -]?[A-Z]?[a-z]*[^0-9]*)"
# name [6]
'|([1-9][0-9]*(\b|$| |_))'
# number [7,8]
Expand Down Expand Up @@ -391,7 +398,10 @@ def parse_asteroid(s):
for el in m:
# designation
if len(el[0]) > 0:
r['desig'] = el[0]
if el[0][0] == 'A':
r['desig'] = '1'+el[0][1:]
else:
r['desig'] = el[0]
# packed designation (unpack here)
elif len(el[4]) > 0:
ident = el[4]
Expand Down Expand Up @@ -458,7 +468,7 @@ def asteroid_or_comet(s):
Returns
-------
target_type : str
The target identification: ``'comet'``, ``'asteroid'``, or
The target identification: ``'comet'``, ``'asteroid'``, or
``None``.
Notes
Expand All @@ -470,7 +480,10 @@ def asteroid_or_comet(s):
the name is ambiguous, ``None`` will be
returned. ``'asteroid'`` will be returned if the number of
found asteroid identifier elements is larger than the number
of found comet identifier elements and vice versa.
of found comet identifier elements and vice versa. Note that
for any identifier that does not contain a comet type (P, D, C
etc.), it is highly likely that the object gets identified as an
asteroid.
Examples
--------
Expand All @@ -479,8 +492,6 @@ def asteroid_or_comet(s):
comet
>>> print(Names.asteroid_or_comet('(1) Ceres'))
asteroid
>>> print(Names.asteroid_or_comet('Fred'))
None
"""

Expand Down
39 changes: 31 additions & 8 deletions sbpy/data/tests/test_dataclass.py
Expand Up @@ -5,7 +5,7 @@
from numpy import array
import astropy.units as u
from astropy.table import QTable
from ..core import DataClass
from ..core import DataClass, conf


def data_path(filename):
Expand All @@ -27,7 +27,7 @@ def test_get_set():

data['a'][:] = [0, 0, 0]

with pytest.raises(AttributeError):
with pytest.raises(KeyError):
data.d

with pytest.raises(KeyError):
Expand Down Expand Up @@ -174,15 +174,38 @@ def test_add():
assert tab.add_rows(tab) == 20


def test_check_columns():
"""test function that checks the existing of a number of column names
provided"""
def test_alternative_name_uniqueness():
"""test the uniqueness of alternative field names"""

from ..core import conf

assert (len(conf.namealts.keys()) +
sum([len(val) for val in conf.namealts.values()]) ==
len(set(list(conf.namealts.keys()) +
[item for sublist in list(conf.namealts.values())
for item in sublist])))

with pytest.raises(AssertionError):
conf.namealts['epoch'] = 'i'
assert (len(conf.namealts.keys()) +
sum([len(val) for val in conf.namealts.values()]) ==
len(set(list(conf.namealts.keys()) +
[item for sublist in list(conf.namealts.values())
for item in sublist])))


def test_translate_columns():
"""test function that translates column names"""

conf.namealts = {'z': ['a']}

tab = DataClass.from_dict(
[OrderedDict((('a', 1*u.m), ('b', 4*u.m/u.s), ('c', 'a'))),
OrderedDict((('a', 2*u.m), ('b', 5*u.m/u.s), ('c', 'b'))),
OrderedDict((('a', 3*u.m), ('b', 6*u.m/u.s), ('c', 'c')))])

assert tab._check_columns(['a', 'b', 'c'])
assert tab._check_columns(['a', 'b'])
assert not tab._check_columns(['a', 'b', 'f'])
assert tab._translate_columns(['a', 'b', 'c']) == ['a', 'b', 'c']
assert tab._translate_columns(['z', 'b', 'c']) == ['a', 'b', 'c']

with pytest.raises(KeyError):
tab._translate_columns(['x'])

0 comments on commit 2c6e0c5

Please sign in to comment.