In [1]:
import numpy as np

In [10]:
N = 10000
data_A = np.random.randn(N).astype(np.float32)
data_B = np.random.randint(0, 10000, size=N, dtype=np.uint16)

# Option 1

## Step 1: Creation of a new custom type

In [18]:
my_custom_type = np.dtype([
    ('measurement', np.float32),
    ('batch_id', np.uint16)
])

## Step 2: Allocation of an array of this type

In [19]:
full_data = np.empty(N, dtype=my_custom_type)

## Step 3: Fill it

In [23]:
full_data['measurement'] = data_A
full_data['batch_id'] = data_B

In [25]:
full_data[0]

(2.3258538, 6224)

In [26]:
full_data['measurement']

array([ 2.3258538 , -1.549708  ,  0.6967527 , ...,  0.52565753,
       -0.70380616,  0.6447657 ], dtype=float32)

## Step 4: Store it

In [34]:
# With numpy save
np.save('/tmp/custom_dtype_1', full_data)

# With a regular memmap
mmap1 = np.memmap('/tmp/custom_dtype.dat', shape=full_data.shape, dtype=full_data.dtype, mode='w+')
mmap1[:] = full_data

# With a numpy_memmap
mmap2 = np.lib.format.open_memmap('/tmp/custom_dtype_2.npy', shape=full_data.shape, dtype=full_data.dtype, mode='w+')
mmap2['measurement'] = data_A
mmap2['batch_id'] = data_B

## Step 5: Load it

In [40]:
loaded_1 = np.load('/tmp/custom_dtype_1.npy')
loaded_2 = np.load('/tmp/custom_dtype_2.npy')

loaded_3 = np.lib.format.open_memmap('/tmp/custom_dtype_1.npy')

hoho = np.memmap('/tmp/custom_dtype.dat')

In [41]:
hoho

memmap([202, 218,  20, ...,  63,  63,  26], dtype=uint8)

In [43]:
my_wrong_type = np.dtype([
    ('batch_id', np.uint16),
    ('measurement', np.float32),
])

In [45]:
hoho2 = np.memmap('/tmp/custom_dtype.dat', shape=N, dtype=my_wrong_type)

In [47]:
hoho2[0]

(56010, 2.6915721e-24)

In [48]:
full_data[0]

(2.3258538, 6224)

# Option 2

### Alternative 1

In [53]:
# Write
np.savez('/tmp/custom_dtype_savez',
    measurement= data_A,
    batch_id= data_B
)

archive = np.load('/tmp/custom_dtype_savez.npz')
archive['measurement']

array([ 2.3258538 , -1.549708  ,  0.6967527 , ...,  0.52565753,
       -0.70380616,  0.6447657 ], dtype=float32)

In [57]:
# Write
np.savez_compressed('/tmp/custom_dtype_savez_2',
    measurement= data_A,
    batch_id= data_B
)

archive = np.load('/tmp/custom_dtype_savez_2.npz')
archive['batch_id']

array([6224, 5433, 2903, ..., 6813, 1378, 6719], dtype=uint16)

### Alternative 2

If the two files are reasonably large then it's actually ok to store them separately

### Alternative 3: Concatenation

In [63]:
memmap3 = np.memmap('/tmp/probably_dont_do_this', dtype=np.uint8, shape=(6 * N,), mode='w+')
memmap3[:4 * N] = data_A.view(np.uint8)
memmap3[4 * N:] = data_B.view(np.uint8)

raw_loaded = np.load('/tmp/probably_dont_do_this.npy')

loaded_A = raw_loaded[:4 * N].view(np.float32)
loaded_B = raw_loaded[4 * N:].view(np.uint16)

loaded_B

array([6224, 5433, 2903, ..., 6813, 1378, 6719], dtype=uint16)