# Structured Arrays

In [1]:
import numpy as np

## Constructing

In [7]:
name = ['Alice', 'Bob', 'Cathy', 'Doug']
age = [25, 45, 37, 19]
weight = [55.0, 85.5, 68.0, 61.5]

In [2]:
x = np.zeros(4, dtype=int)

In [3]:
x

array([0, 0, 0, 0])

In [4]:
types = {
    'names': ('name', 'age', 'weight'),
    'formats': ('U10', 'i4', 'f8'),
}
data = np.zeros(4, dtype=types)

In [5]:
data

array([('', 0, 0.), ('', 0, 0.), ('', 0, 0.), ('', 0, 0.)],
      dtype=[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])

In [8]:
data['name'] = name
data['age'] = age
data['weight'] = weight

In [9]:
data

array([('Alice', 25, 55. ), ('Bob', 45, 85.5), ('Cathy', 37, 68. ),
       ('Doug', 19, 61.5)],
      dtype=[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])

## Querying

In [10]:
data['name']

array(['Alice', 'Bob', 'Cathy', 'Doug'], dtype='<U10')

In [11]:
data['name'][1]

'Bob'

In [12]:
data[1]

('Bob', 45, 85.5)

In [13]:
data[1]['name']

'Bob'

In [14]:
data[1][0]

'Bob'

In [15]:
data[-1]['name']

'Doug'

In [16]:
data['age'] < 30 

array([ True, False, False,  True])

In [17]:
data[data['age'] < 30 ]['name']

array(['Alice', 'Doug'], dtype='<U10')

## Constructing (cont'd)

In [18]:
types = {
    'names': ('name', 'age', 'weight'),
    'formats': ('U10', 'i4', 'f8'),
}
np.dtype(types)

dtype([('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])

In [20]:
types = {
    'names': ('name', 'age', 'weight'),
    'formats': ((np.str_, 10), int, np.float32),
}
np.dtype(types)

dtype([('name', '<U10'), ('age', '<i8'), ('weight', '<f4')])

In [21]:
types = [('name', 'S10'), ('age', 'i4'), ('weight', 'f8')]
np.dtype(types)

dtype([('name', 'S10'), ('age', '<i4'), ('weight', '<f8')])

In [22]:
types = 'S10,i4,f8'
np.dtype(types)

dtype([('f0', 'S10'), ('f1', '<i4'), ('f2', '<f8')])

The first (optional) character is < or >, which means “little endian” or “big endian,” respectively, and specifies the ordering convention for significant bits. The next character specifies the type of data: characters, bytes, ints, floating points, and so on. The last character or characters represents the size of the object in bytes.

## Advanced constructs
TODO: why is the dtype call necessary?

In [40]:
types = np.dtype(
[
    ('id', 'i8'),
    ('mat', 'f8', (3,3))
])
X = np.zeros(1, dtype=types)

In [41]:
X

array([(0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])],
      dtype=[('id', '<i8'), ('mat', '<f8', (3, 3))])

In [42]:
X[0]

(0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])

In [43]:
X['mat'][0]

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [44]:
X['mat'][0][1][2] = 1

In [45]:
X['mat'][0]

array([[0., 0., 0.],
       [0., 0., 1.],
       [0., 0., 0.]])

In [46]:
X['mat'][0][0,1] = 3

In [47]:
X['mat'][0]

array([[0., 3., 0.],
       [0., 0., 1.],
       [0., 0., 0.]])

## RecordArrays
This enables field access by attributes. But this is slower!

In [48]:
data

array([('Alice', 25, 55. ), ('Bob', 45, 85.5), ('Cathy', 37, 68. ),
       ('Doug', 19, 61.5)],
      dtype=[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])

In [49]:
data['age']

array([25, 45, 37, 19], dtype=int32)

In [51]:
data_rec = data.view(np.recarray)
data_rec

rec.array([('Alice', 25, 55. ), ('Bob', 45, 85.5), ('Cathy', 37, 68. ),
           ('Doug', 19, 61.5)],
          dtype=[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])

In [52]:
data_rec.age

array([25, 45, 37, 19], dtype=int32)

In [54]:
%timeit data['age']
%timeit data_rec['age']
%timeit data_rec.age

248 ns ± 48.9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
4.16 µs ± 342 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.32 µs ± 367 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
