In [1]:
import numpy as np

In [2]:
name = ['Alice', 'Bob', 'Cathy', 'Doug']
age = [25, 45, 37, 19]
weight = [55.0, 85.5, 68.0, 61.5]

In [4]:
# create a simple array using an expression like this
x = np.zeros(4, dtype=int)

In [5]:
# We can similarly create a structured array using a compound data type specification:
# Use a compound data type for structured arrays
data = np.zeros(4, dtype={'names':('name', 'age', 'weight'),
                          'formats':('U10', 'i4', 'f8')})
print(data.dtype)

[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')]


In [7]:
# Now that we’ve created an empty container array, we can fill the array with our lists of values
data['name'] = name
data['age'] = age
data['weight'] = weight
print(data)

[('Alice', 25, 55. ) ('Bob', 45, 85.5) ('Cathy', 37, 68. )
 ('Doug', 19, 61.5)]


In [8]:
# Get all names
data['name']

array(['Alice', 'Bob', 'Cathy', 'Doug'], dtype='<U10')

In [9]:
# Get first row of data
data[0]

np.void(('Alice', 25, 55.0), dtype=[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])

In [10]:
# Get the name from the last row
data[-1]['name']

np.str_('Doug')

In [None]:
# Using Boolean masking, we can even do some more sophisticated operations, such as filtering on age:
# Get names where age is under 30
data[data['age'] < 30]['name']

array(['Alice', 'Doug'], dtype='<U10')

1. Exploring Structured Array Creation


In [13]:
# Structured array data types can be specified in a number of ways. Earlier, we saw the
# dictionary method:
np.dtype({'names':('name', 'age', 'weight'),
           'formats':('U10', 'i4', 'f8')})

dtype([('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])

In [14]:
# For clarity, numerical types can be specified using Python types or NumPy dtypes instead:
np.dtype({'names':('name', 'age', 'weight'),
          'formats':((np.str_, 10), int, np.float32)})

dtype([('name', '<U10'), ('age', '<i8'), ('weight', '<f4')])

In [15]:
# A compound type can also be specified as a list of tuples:
np.dtype([('name', 'S10'), ('age', 'i4'), ('weight', 'f8')])

dtype([('name', 'S10'), ('age', '<i4'), ('weight', '<f8')])

In [16]:
# If the names of the types do not matter to you, you can specify the types alone in a comma-separated string:
np.dtype('S10, i4, f8')

dtype([('f0', 'S10'), ('f1', '<i4'), ('f2', '<f8')])

2. More Advanced Compound Types

In [18]:
tp = np.dtype([('id', 'i8'), ('mat', 'f8', (3, 3))])
# creates a custom data type (tp) for a structured array.

# It has two fields:

# 'id': a 64-bit integer ('i8')

# 'mat': a 3x3 matrix of 64-bit floats ('f8', (3, 3))

# Think of each element in the array as a record (or row) with two labeled parts: an id number and a 3×3 matrix.



In [19]:
X = np.zeros(1, dtype=tp)
# Creates an array with one element of the structured type tp.
# The entire structure is initialized with zeros: id = 0 ,mat = 3×3 matrix of zeros

In [20]:
print(X[0])

(0, [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]])


In [21]:
print(X['mat'][0])

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


3. Record Arrays: Structured Arrays with a Twist

In [22]:
data['age']

array([25, 45, 37, 19], dtype=int32)

In [23]:
data_rec = data.view(np.recarray)
data_rec.age

array([25, 45, 37, 19], dtype=int32)

In [24]:
# The downside is that for record arrays, there is some extra overhead involved in
# accessing the fields, even when using the same syntax
%timeit data['age']
%timeit data_rec['age']
%timeit data_rec.age


169 ns ± 8 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)
2.34 μs ± 173 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
4.25 μs ± 119 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
