# Caterva - PyData NYC 2019

In [1]:
import os
import cat4py as cat
import numpy as np
from time import time

## NPArray

In [2]:
shape = (3000, 3000)
chunkshape = (1000, 1200)
blockshape = (300, 25)
dtype = 'f8'

a = np.linspace(0, 1, np.prod(shape), dtype=dtype).reshape(shape)

### Different ways to create a Caterva NPArray

In [3]:
b1 = cat.from_numpy(a, chunkshape=chunkshape, blockshape=blockshape)

In [4]:
b2 = cat.empty(shape, dtype=dtype, chunkshape=chunkshape, blockshape=blockshape)
for block, info in b2.iter_write():
    block[:] = a[info.slice]

In [5]:
b3 = cat.from_buffer(bytes(a), shape, dtype=dtype, chunkshape=chunkshape, blockshape=blockshape)

### Read iterator over Caterva NPArray

In [6]:
for block, info in b3.iter_read():
    np.testing.assert_allclose(block, a[info.slice])

### Getting a slice from a Caterva NPArray

In [7]:
b2[3:40, 200:500]

array([[0.00102222, 0.00102233, 0.00102244, ..., 0.00105522, 0.00105533,
        0.00105544],
       [0.00135556, 0.00135567, 0.00135578, ..., 0.00138856, 0.00138867,
        0.00138878],
       [0.00168889, 0.001689  , 0.00168911, ..., 0.00172189, 0.001722  ,
        0.00172211],
       ...,
       [0.01235556, 0.01235567, 0.01235578, ..., 0.01238856, 0.01238867,
        0.01238878],
       [0.01268889, 0.012689  , 0.01268911, ..., 0.01272189, 0.012722  ,
        0.01272211],
       [0.01302222, 0.01302233, 0.01302245, ..., 0.01305522, 0.01305533,
        0.01305545]])

### Serialize Caterva NPArray

In [8]:
c1 = cat.from_numpy(a, chunkshape=chunkshape, blockshape=blockshape)

t0 = time()
sframe = c1.to_sframe()
t1 = time()
tnsf = t1 - t0

print(f"No serialized format: {tnsf:.4f}")

No serialized format: 0.0474


In [9]:
c2 = cat.from_numpy(a, chunkshape=chunkshape, blockshape=blockshape, enforceframe=True)

t0 = time()
sframe = c2.to_sframe()
t1 = time()
tsf = t1 - t0

print(f"Serialized format: {tsf:.4f}")

Serialized format: 0.0017


In [10]:
print(f"Speed-up: {(tnsf / tsf):.4f}")

Speed-up: 27.9118


### Persistency

In [11]:
if os.path.exists("caterva-demo.cat"):
    os.remove("caterva-demo.cat")
    
d1 = cat.from_numpy(a, chunkshape=chunkshape, blockshape=blockshape, filename="caterva-demo.cat")
d2 = cat.from_file("caterva-demo.cat")