## Example of a subclass of Container for typed Arrays 

In [1]:
import cat4py as cat
import numpy as np
import os
import time

In [2]:
class WriteIterArray(cat.WriteIter):
    def __init__(self, arr):
        self.arr = arr
        super(cat.WriteIter, self).__init__(arr)
    
    def __next__(self):
        buff, info = cat.WriteIter.__next__(self)
        arr = np.frombuffer(buff, dtype=self.arr.dtype).reshape(info.shape)
        return arr, info
    
    
class ReadIterArray(cat.ReadIter):
    def __init__(self, arr, blockshape):
        self.arr = arr
        super(cat.WriteIter, self).__init__(arr, blockshape)
    
    def __next__(self):
        buff, info = cat.ReadIter.__next__(self)
        arr = np.frombuffer(buff, dtype=self.arr.dtype).reshape(info.shape)
        return arr, info
    
    
class Array(cat.Container):
    
    def __init__(self, dtype, pshape=None, filename=None, **kwargs):
        self.dtype = np.dtype(dtype)
        kwargs["itemsize"] = self.dtype.itemsize
        if pshape is not None:
            kwargs["metalayers"] = {"numpy": {"dtype": str(self.dtype)}}
        self.kwargs = kwargs
        super(cat.Container, self).__init__(pshape=pshape, filename=filename, **kwargs)
    
    def __getitem__(self, key):    
        buff = cat.Container.__getitem__(self, key)
        ndim = self.ndim
        if ndim == 1:
            key = [key]

        key = list(key)

        for i, sl in enumerate(key):
            if type(sl) is not slice:
                key[i] = slice(sl, sl+1, None)

        start = [s.start if s.start is not None else 0 for s in key]
        stop = [s.stop if s.stop is not None else sh for s, sh in zip(key, self.shape)]
        shape = [sp - st for st, sp in zip(start, stop)]
        return np.frombuffer(buff, dtype=self.dtype).reshape(shape)
    
    
    def iter_write(self):
        return WriteIterArray(self)
    
    def iter_read(self, blockshape):
        buff, info = cat.Container.iter_read(self, blockshape)
        arr = np.frombuffer(buff, dtype=self.dtype).reshape(info.shape)
        return arr, info
    
    def copy(self, pshape=None, filename=None, **kwargs):
        arr = cat.Container.copy(self, pshape=pshape, filename=filename, **kwargs)
        arr.__class__ = Array
        arr.dtype = self.dtype
        return arr
    
    def __array__(self):
        return cat.Container.to_numpy(self, self.dtype)

def empty_array(dtype, shape, pshape=None, filename=None, **kwargs):
    arr = Array(dtype, pshape=pshape, filename=filename, **kwargs)
    arr.updateshape(shape)
    return arr

def from_file(filename):
    arr = cat.from_file(filename)
    arr.__class__ = Array
    dtype = arr.get_metalayer("numpy")[b"dtype"]
    arr.dtype = np.dtype(dtype)
    return arr

### Example of creation of a typed Array

In [3]:
metalayer_small_fname = "metalayer_small.cat4npy"
metalayer_small_copy_fname = "metalayer_small_copy.cat4npy"
if os.path.exists(metalayer_small_fname): os.remove(metalayer_small_fname)
if os.path.exists(metalayer_small_copy_fname): os.remove(metalayer_small_copy_fname)

In [4]:
a = empty_array(np.int32, shape=(32, 32), pshape=(16, 16), filename=metalayer_small_fname)

In [5]:
for block, info in a.iter_write():
    block[:] = np.ones((info.shape), dtype=np.int32)

In [6]:
b = a.copy(pshape=(12, 3), filename=metalayer_small_copy_fname)  # copy with a different partition shape

In [7]:
np.matmul(a, b)

array([[32, 32, 32, ..., 32, 32, 32],
       [32, 32, 32, ..., 32, 32, 32],
       [32, 32, 32, ..., 32, 32, 32],
       ...,
       [32, 32, 32, ..., 32, 32, 32],
       [32, 32, 32, ..., 32, 32, 32],
       [32, 32, 32, ..., 32, 32, 32]], dtype=int32)

### A quick comparison with .npy format

In [8]:
metalayer_big_fname = "metalayer_big"
if os.path.exists(metalayer_big_fname +".npy"): os.remove(metalayer_big_fname +".npy")
if os.path.exists(metalayer_big_fname +".cat4npy"): os.remove(metalayer_big_fname +".cat4npy")

In [9]:
cat_array = empty_array(np.float64, shape=(2048, 2048), pshape=(2, 2048), filename=metalayer_big_fname +".cat4npy",
                        cname="zstd", clevel=5, filters=[cat.TRUNC_PREC, cat.SHUFFLE], filters_meta=[10, 0])

In [10]:
for block, info in cat_array.iter_write():
    block[:] = np.random.randn(info.size).reshape(info.shape)

In [11]:
numpy_array = cat_array.__array__()
np.save(metalayer_big_fname + ".npy", numpy_array)

In [12]:
t1 = time.time()
new_numpy_array = np.load(metalayer_big_fname + ".npy")
numpy_slice = new_numpy_array[2:3, :]
t2 = time.time()
t_numpy = t2 - t1
print(t_numpy)

0.026963233947753906


In [13]:
t1 = time.time()
new_cat_array = from_file(metalayer_big_fname +".cat4npy")
cat_slice = new_cat_array[2:3, :]
t2 = time.time()
t_cat = t2 - t1
print(t_cat)

0.0008440017700195312


In [14]:
print(t_numpy / t_cat)

31.946892655367233


In [15]:
np.testing.assert_almost_equal(cat_slice, numpy_slice)

In [16]:
!ls -lh *.*npy

-rw-r--r--  1 faltet  staff   8.2M Sep 16 14:32 metalayer_big.cat4npy
-rw-r--r--  1 faltet  staff    32M Sep 16 14:32 metalayer_big.npy
-rw-r--r--  1 faltet  staff   773B Sep 16 14:32 metalayer_small.cat4npy
-rw-r--r--  1 faltet  staff   2.3K Sep 16 14:32 metalayer_small_copy.cat4npy
