Add docs

intake · Sep 18, 2018 · fcb8a26 · fcb8a26
1 parent c8e935b
commit fcb8a26
Show file tree

Hide file tree

Showing 3 changed files with 122 additions and 52 deletions.
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
@@ -1,2 +1,72 @@
 Quickstart
 ==========
+
+``intake-astro`` provides quick and easy access to tabular or array data stored
+in the astronomical FITS_ binary format.
+
+.. _FITS: https://fits.gsfc.nasa.gov/fits_documentation.html
+
+Although the plugin uses astropy_ under the hood, it provides extra facility for remote
+files and partitioned access.
+
+.. _astropy: http://docs.astropy.org/
+
+Installation
+------------
+
+To use this plugin for `intake`_, install with the following command::
+
+   conda install -c intake intake-astro
+
+.. _intake: https://github.com/ContinuumIO/intake
+
+Usage
+-----
+
+Ad-hoc
+~~~~~~
+
+After installation, the functions ``intake.open_fits_array`` and ``intake.open_fits_table``
+will become available. They can be used to load data from local or remote data
+
+.. code-block:: python
+
+    import intake
+    source = intake.open_fits_array('/data/fits/set*.fits', ext=1)
+    darr = source.to_dask()  # for parallel access,
+    arr = source.read()      # to read into memory
+    wcs = source.wcs         # WCS will be set from first file, if possible
+
+
+In this case, "parallel access" will mean one partition per input file, but partitioning
+within files is also possible (only recommended for uncompressed input).
+
+Creating Catalog Entries
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+To use, catalog entries must specify ``driver: `` with one of the two plugins
+available here, ``fits_table``, ``fits_array``. The data source specs will have the
+same parameters as the equivalent open functions. In the following example, the files might
+happen to be stored on amazon S3, to be accesses anonymously.
+
+.. code-block:: yaml
+
+    sources:
+      some_astro_arr:
+        driver: fits_array
+        args:
+          url: s3://mybucket/fits/*.fits
+          ext: 0
+          storage_options:
+            anon: true
+
+
+
+Using a Catalog
+~~~~~~~~~~~~~~~
+
+Assuming the existence of catalogs with blocks such as that above, the data-sets can be
+accessed with the usual intake pattern, i.e., the methods ``discover()``, ``read()``, etc.
+
+As with other array-type plugins, the input to ``read_partition()`` for the fits_array plugin
+is generally a tuple of int.
diff --git a/intake_astro/array.py b/intake_astro/array.py
@@ -5,39 +5,40 @@
 
 
 class FITSArraySource(DataSource):
+    """
+    Read one of more local or remote FITS files using Intake
+
+    At initialisation (when something calls ``._get_schema()``), the
+    header of the first file will be read and a delayed array constructed.
+    The properties ``header, dtype, shape, wcs`` will be populated from
+    that header, and no check is made to ensure that all files are
+    compatible.
+
+    Parameters
+    ----------
+    url: str or list of str
+        Location of the data file(s). May include glob characters; may
+        include protocol specifiers.
+    ext: int or str or tuple
+        Extension to probe. By default, is primary extension. Can either be
+        an integer referring to sequence number, or an extension name. If a
+        tuple like ('SCI', 2), get the second extension named 'SCI'.
+    chunks: None or tuple of int
+        size of blocks to use within each file; must specify all axes,
+        if using. If None, each file is one partition. Do not use chunks
+        for compressed data, and only use contiguous chunks for remote
+        data.
+    storage_options: dics
+        Parameters to pass on to storage backend
+    """
+
     name = 'fits_array'
     container = 'ndarray'
     version = __version__
     partition_access = True
 
     def __init__(self, url, ext=0, chunks=None, storage_options=None,
                  metadata=None):
-        """
-        Read one of more local or remote FITS files using Intake
-
-        At initialisation (when something calls ``._get_schema()``), the
-        header of the first file will be read and a delayed array constructed.
-        The properties ``header, dtype, shape, wcs`` will be populated from
-        that header, and no check is made to ensure that all files are
-        compatible.
-
-        Parameters
-        ----------
-        url: str or list of str
-            Location of the data file(s). May include glob characters; may
-            include protocol specifiers.
-        ext: int or str or tuple
-            Extension to probe. By default, is primary extension. Can either be
-            an integer referring to sequence number, or an extension name. If a
-            tuple like ('SCI', 2), get the second extension named 'SCI'.
-        chunks: None or tuple of int
-            size of blocks to use within each file; must specify all axes,
-            if using. If None, each file is one partition. Do not use chunks
-            for compressed data, and only use contiguous chunks for remote
-            data.
-        storage_options: dics
-            Parameters to pass on to storage backend
-        """
         # TODO: implement case where set of extensions within a file is an axis
         super(FITSArraySource, self).__init__(metadata)
         self.url = url

diff --git a/intake_astro/table.py b/intake_astro/table.py
@@ -9,6 +9,31 @@ class FITSTableSource(DataSource):
 
     For one or more FITS files, which can be local or remote, with support
     for partitioning within files.
+
+    Parameters
+    ----------
+    url: str or list of str
+        files to load. Can include protocol specifiers and/or glob
+        characters
+    ext: str or int
+        Extension to load. Normally 0 or 1.
+    chunksize: int or None
+        For partitioning within files, use this many rows per partition.
+        This is very inefficient for compressed files, and for remote
+        files, will require at least touching each file to discover the
+        number of rows, before even starting to read the data. Cannot be
+        used with FITS tables with a "heap", i.e., containing variable-
+        length arrays.
+    storage_options: dict or None
+        Additional keyword arguments to pass to the storage back-end.
+    metadata:
+        Arbitrary information to associate with this source.
+
+    After reading the schema, the source will have attributes:
+    ``header`` - the full FITS header of one of the files as a dict,
+    ``dtype`` - a numpy-like list of field/dtype string pairs,
+    ``shape`` - where the number of rows will only be known if using
+    partitioning or for a single file input.
     """
     name = 'fits_table'
     container = 'dataframe'
@@ -17,32 +42,6 @@ class FITSTableSource(DataSource):
 
     def __init__(self, url, ext=0, chunksize=None, storage_options=None,
                  metadata=None):
-        """
-        Parameters
-        ----------
-        url: str or list of str
-            files to load. Can include protocol specifiers and/or glob
-            characters
-        ext: str or int
-            Extension to load. Normally 0 or 1.
-        chunksize: int or None
-            For partitioning within files, use this many rows per partition.
-            This is very inefficient for compressed files, and for remote
-            files, will require at least touching each file to discover the
-            number of rows, before even starting to read the data. Cannot be
-            used with FITS tables with a "heap", i.e., containing variable-
-            length arrays.
-        storage_options: dict or None
-            Additional keyword arguments to pass to the storage back-end.
-        metadata:
-            Arbitrary information to associate with this source.
-
-        After reading the schema, the source will have attributes:
-        ``header`` - the full FITS header of one of the files as a dict,
-        ``dtype`` - a numpy-like list of field/dtype string pairs,
-        ``shape`` - where the number of rows will only be known if using
-        partitioning or for a single file input.
-        """
         super(FITSTableSource, self).__init__(metadata)
         self.url = url
         self.ext = ext