# Caterva - PyData NYC 2019

In [1]:
import os
import cat4py as cat
import numpy as np
from time import time

## NPArray

In [2]:
shape = (5000, 5000)
chunkshape = (500, 500)
blockshape = (50, 50)
dtype = 'f8'
a = np.linspace(0, 1, np.prod(shape), dtype=dtype).reshape(shape)

### Different ways to create a Caterva NPArray

In [3]:
b1 = cat.from_numpy(a, chunkshape=chunkshape, blockshape=blockshape)

In [4]:
b2 = cat.empty(shape, dtype=dtype, chunkshape=chunkshape, blockshape=blockshape)
for block, info in b2.iter_write():
    block[:] = a[info.slice]

In [5]:
b3 = cat.from_buffer(bytes(a), shape, dtype=dtype, chunkshape=chunkshape, blockshape=blockshape)

### Read iterator over Caterva NPArray

In [6]:
for block, info in b1.iter_read():
    np.testing.assert_allclose(block, a[info.slice])
    
for block, info in b2.iter_read():
    np.testing.assert_allclose(block, a[info.slice])
    
for block, info in b3.iter_read():
    np.testing.assert_allclose(block, a[info.slice])

### Getting a slice from a Caterva NPArray

In [7]:
b2[3:40, 200:500]

array([[0.000608  , 0.00060804, 0.00060808, ..., 0.00061988, 0.00061992,
        0.00061996],
       [0.000808  , 0.00080804, 0.00080808, ..., 0.00081988, 0.00081992,
        0.00081996],
       [0.001008  , 0.00100804, 0.00100808, ..., 0.00101988, 0.00101992,
        0.00101996],
       ...,
       [0.007408  , 0.00740804, 0.00740808, ..., 0.00741988, 0.00741992,
        0.00741996],
       [0.007608  , 0.00760804, 0.00760808, ..., 0.00761988, 0.00761992,
        0.00761996],
       [0.007808  , 0.00780804, 0.00780808, ..., 0.00781988, 0.00781992,
        0.00781996]])

### Serialize Caterva NPArray

In [8]:
c1 = cat.from_numpy(a, chunkshape=chunkshape, blockshape=blockshape)

t0 = time()
sframe = c1.to_sframe()
t1 = time()
tnsf = t1 - t0

print(f"No serialized format: {tnsf:.4f}")

No serialized format: 0.0871


In [9]:
c2 = cat.from_numpy(a, chunkshape=chunkshape, blockshape=blockshape, enforceframe=True)

t0 = time()
sframe = c2.to_sframe()
t1 = time()
tsf = t1 - t0

print(f"Serialized format: {tsf:.4f}")

Serialized format: 0.0058


In [10]:
print(f"Speed-up: {(tnsf / tsf):.4f}")

Speed-up: 15.1256


### Persistency

In [11]:
if os.path.exists("caterva-demo.cat"):
    os.remove("caterva-demo.cat")
    
d1 = cat.from_numpy(a, chunkshape=chunkshape, blockshape=blockshape, enforceframe=True, filename="caterva-demo.cat")
d2 = cat.from_file("caterva-demo.cat")