Blosc · ARF1 · May 6, 2015 · May 6, 2015 · May 6, 2015 · May 6, 2015
diff --git a/bcolz/ctable.py b/bcolz/ctable.py
@@ -17,6 +17,7 @@
 import os
 import shutil
 from .py2help import _inttypes, _strtypes, imap, xrange
+import weakref
 
 _inttypes += (np.integer,)
 islice = itertools.islice
@@ -222,8 +223,36 @@ def __init__(self, columns=None, names=None, **kwargs):
         # Attach the attrs to this object
         self.attrs = attrs.attrs(self.rootdir, self.mode, _new=_new)
 
-        # Cache a structured array of len 1 for ctable[int] acceleration
-        self._arr1 = np.empty(shape=(1,), dtype=self.dtype)
+        # Initialise output structure cache
+        self._outstruc_update_cache()
+
+    def __new__(cls, *args, **kwargs):
+        # keep track of all ctable instances to be able to update their
+        # output structure caches when the output processor changes
+        if not hasattr(cls, '_instances'):
+            cls._instances = []
+        new_instance = object.__new__(cls)
+        cls._instances.append(weakref.ref(new_instance))
+        return new_instance
+
+    @classmethod
+    def _update_outstruc_processor(cls, processor):
+        bcolz.ctable._outstruc_allocate = processor.allocate
+        bcolz.ctable._outstruc_update_cache = processor.update_cache
+        bcolz.ctable._outstruc_fromindices = processor.fromindices
+        bcolz.ctable._outstruc_fromboolarr = processor.fromboolarr
+        assert hasattr(processor, '__setitem__')
+
+        if not hasattr(cls, '_instances'):
+            return
+
+        live_instances = []
+        for instance in cls._instances:
+            if instance() is not None:
+                instance()._outstruc_update_cache()
+                live_instances.append(instance)
+        cls._instances = live_instances
+
 
     def create_ctable(self, columns, names, **kwargs):
         """Create a ctable anew."""
@@ -487,8 +516,9 @@ def addcol(self, newcol, name=None, pos=None, move=False, **kwargs):
 
         # Insert the column
         self.cols.insert(name, pos, newcol)
-        # Update _arr1
-        self._arr1 = np.empty(shape=(1,), dtype=self.dtype)
+        # Update output structure cache
+        self._outstruc_update_cache()
+
 
         if self.auto_flush:
             self.flush()
@@ -540,8 +570,9 @@ def delcol(self, name=None, pos=None, keep=False):
         if not keep:
             col.purge()
 
-        # Update _arr1
-        self._arr1 = np.empty(shape=(1,), dtype=self.dtype)
+        # Update output structure cache
+        self._outstruc_update_cache()
+
 
         if self.auto_flush:
             self.flush()
@@ -1013,11 +1044,7 @@ def _where(self, boolarr, colnames=None):
 
         if colnames is None:
             colnames = self.names
-        cols = [self.cols[name][boolarr] for name in colnames]
-        dtype = np.dtype([(name, self.cols[name].dtype) for name in colnames])
-        result = np.rec.fromarrays(cols, dtype=dtype).view(np.ndarray)
-
-        return result
+        return self._outstruc_fromboolarr(boolarr, colnames).ra
 
     def __getitem__(self, key):
         """Returns values based on `key`.
@@ -1043,10 +1070,10 @@ def __getitem__(self, key):
         # First, check for integer
         if isinstance(key, _inttypes):
             # Get a copy of the len-1 array
-            ra = self._arr1.copy()
+            result = self._outstruc_allocate(1)
             # Fill it
-            ra[0] = tuple([self.cols[name][key] for name in self.names])
-            return ra[0]
+            result[0] = [self.cols[name][key] for name in self.names]
+            return result.ra
         # Slices
         elif type(key) == slice:
             (start, stop, step) = key.start, key.stop, key.step
@@ -1060,7 +1087,7 @@ def __getitem__(self, key):
         # List of integers (case of fancy indexing), or list of column names
         elif type(key) is list:
             if len(key) == 0:
-                return np.empty(0, self.dtype)
+                return self._outstruc_allocate(0, self.dtype).ra
             strlist = [type(v) for v in key] == [str for v in key]
             # Range of column names
             if strlist:
@@ -1072,15 +1099,14 @@ def __getitem__(self, key):
             except:
                 raise IndexError(
                     "key cannot be converted to an array of indices")
-            return np.fromiter((self[i] for i in key),
-                               dtype=self.dtype, count=len(key))
+            return self._outstruc_fromindices(key).ra
         # A boolean array (case of fancy indexing)
         elif hasattr(key, "dtype"):
             if key.dtype.type == np.bool_:
                 return self._where(key)
             elif np.issubsctype(key, np.int_):
                 # An integer array
-                return np.array([self[i] for i in key], dtype=self.dtype)
+                return self._outstruc_fromindices(key).ra
             else:
                 raise IndexError(
                     "arrays used as indices must be integer (or boolean)")
@@ -1105,12 +1131,12 @@ def __getitem__(self, key):
         (start, stop, step) = slice(start, stop, step).indices(self.len)
         # Build a numpy container
         n = utils.get_len_of_range(start, stop, step)
-        ra = np.empty(shape=(n,), dtype=self.dtype)
+        result = self._outstruc_allocate(n, self.dtype)
         # Fill it
         for name in self.names:
-            ra[name][:] = self.cols[name][start:stop:step]
+            result[name] = self.cols[name][start:stop:step]
 
-        return ra
+        return result.ra
 
     def __setitem__(self, key, value):
         """Sets values based on `key`.
@@ -1231,7 +1257,22 @@ def _get_stats(self):
         return (nbytes, cbytes, cratio)
 
     def __str__(self):
-        return array2string(self)
+        if self._outstruc_allocate.__func__ == OutputStructure_numpy.allocate:
+            return array2string(self)
+
+        # if a custom output structure is configured, use numpy for 
+        # bcolz string representation for consistent output formatting
+        current_allocate_fn = self._outstruc_allocate
+        OutputStructure_numpy.update_cache(self)
+        def tmp_allocate(*args, **kwargs):
+            return OutputStructure_numpy.allocate(self, *args, **kwargs)
+        self._outstruc_allocate = tmp_allocate
+
+        result = array2string(self)
+
+        del self._outstruc_allocate
+        self._outstruc_update_cache()
+        return result
 
     def __repr__(self):
         nbytes, cbytes, cratio = self._get_stats()
@@ -1247,6 +1288,45 @@ def __repr__(self):
         return fullrepr
 
 
+class OutputStructure_numpy(object):
+    __slots__ = ['ra']
+
+    @staticmethod
+    def update_cache(ctable_):
+        ctable_._outstruc_cache = np.empty(shape=(1,), dtype=ctable_.dtype)
+
+    @staticmethod
+    def allocate(ctable_, size, dtype=None):
+        result = object.__new__(OutputStructure_numpy)
+        if size == 1:
+            result.ra = ctable_._outstruc_cache.copy()
+        else:
+            result.ra = np.empty(size, dtype)
+        return result
+
+    @staticmethod
+    def fromindices(ctable_, iter):
+        result = object.__new__(OutputStructure_numpy)
+        result.ra = np.fromiter((ctable_[i] for i in iter), 
+                                dtype=ctable_.dtype, count=len(iter))
+        return result
+
+    @staticmethod
+    def fromboolarr(ctable_, boolarr, colnames):
+        result = object.__new__(OutputStructure_numpy)
+
+        dtype = np.dtype([(name, ctable_.cols[name].dtype) for name in colnames])
+        cols = [ctable_.cols[name][boolarr] for name in colnames]
+        result.ra = np.rec.fromarrays(cols, dtype=dtype).view(np.ndarray)
+        return result
+
+    def __setitem__(self, key, value):
+        if isinstance(key, int):
+            self.ra[key] = tuple(value)
+        else:
+            self.ra[key][:] = value
+
+
 # Local Variables:
 # mode: python
 # tab-width: 4

diff --git a/bcolz/defaults.py b/bcolz/defaults.py
@@ -12,6 +12,7 @@
 from __future__ import absolute_import
 
 import bcolz
+from bcolz.ctable import OutputStructure_numpy
 
 
 class Defaults(object):
@@ -70,6 +71,22 @@ def eval_out_flavor(self, value):
         self.check_choices('eval_out_flavor', value)
         self.__eval_out_flavor = value
 
+    @property
+    def ctable_out_implementation(self):
+        return self.__ctable_out_implementation
+
+    @ctable_out_implementation.setter
+    def ctable_out_implementation(self, value):
+        if value is None:
+            value = OutputStructure_numpy
+        try:
+            bcolz.ctable._update_outstruc_processor(value)
+            self.__ctable_out_implementation = value
+        except (AttributeError, AssertionError):
+            bcolz.ctable._update_outstruc_processor(OutputStructure_numpy)
+            raise NotImplementedError(
+                'The output structure implementation is incomplete')
+
     @property
     def cparams(self):
         return self.__cparams
@@ -90,6 +107,12 @@ def cparams(self, value):
 'numpy'.  Default is 'carray'.
 """
 
+defaults.ctable_out_implementation = None
+"""
+The implementation of the output structure abstraction layer for the 
+output object in `__getitem__()`.
+"""
+
 defaults.eval_vm = "numexpr" if bcolz.numexpr_here else "python"
 """
 The virtual machine to be used in computations (via `eval`).  It can