In [2]:
import numpy as np

In [6]:
# Numpy arrays are made up of a pointer to data, a data type (dtype), a shape, and strides.
# Strides are the distance it takes (in bytes) in any dimension to advance to the next element
arr_1d_int = np.zeros((5), dtype=np.int64)

In [7]:
arr_1d_int.dtype

dtype('int64')

In [8]:
arr_1d_int.shape

(5,)

In [9]:
# A 64 bit integer takes up 8 bytes - so the stride is 8.
arr_1d_int.strides

(8,)

In [10]:
arr_2d_int32 = np.zeros((4, 5), dtype=np.int32)

In [11]:
arr_2d_int32.dtype

dtype('int32')

In [12]:
arr_2d_int32.shape

(4, 5)

In [13]:
# A 32 bit integer takes up 4 bytes.
# In the second dimension, a stride is 4 because the next element in that dimension is only 4 bytes away.
# In the first dimension, a stride is 20 because the entire second dimension (of length 5 * 4 bytes) is traversed
# to reach the next element.
arr_2d_int32.strides

(20, 4)

In [14]:
np.int64.mro()

[numpy.int64,
 numpy.signedinteger,
 numpy.integer,
 numpy.number,
 numpy.generic,
 object]

In [15]:
np.uint8.mro()

[numpy.uint8,
 numpy.unsignedinteger,
 numpy.integer,
 numpy.number,
 numpy.generic,
 object]

In [16]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [17]:
np.string_.mro()

[numpy.bytes_, bytes, numpy.character, numpy.flexible, numpy.generic, object]

In [18]:
np.object_.mro()

[numpy.object_, numpy.generic, object]

In [19]:
np.issubdtype(np.int64, np.number)

True

In [20]:
np.issubdtype(np.int64, np.signedinteger)

True

In [21]:
np.issubdtype(np.uint16, np.signedinteger)

False

In [22]:
arr = np.arange(16)

In [23]:
# Reshaping arrays in numpy follows two different orderings - C and Fortran 
# (named after the respective programming languages).  By default reshape() uses C ordering, which is row major.
# This means the row is filled out one at a time.
arr.reshape((4, 4))

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [24]:
# C ordering (row major) can be explicitly specified.
arr.reshape((4, 4), order='C')

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [25]:
# Fortran ordering (column major) fills out columns one at a time.
arr.reshape((4, 4), order='F')

array([[ 0,  4,  8, 12],
       [ 1,  5,  9, 13],
       [ 2,  6, 10, 14],
       [ 3,  7, 11, 15]])

In [36]:
arr = arr.reshape((4, 4))

In [37]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [39]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [40]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [41]:
arr.ravel('F')

array([ 0,  4,  8, 12,  1,  5,  9, 13,  2,  6, 10, 14,  3,  7, 11, 15])

In [33]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [48]:
arr1 = np.array([[1], [2]])
arr2 = np.array([[3], [4]])
np.concatenate((arr1, arr2))

array([[1],
       [2],
       [3],
       [4]])

In [49]:
np.concatenate((arr1, arr2), axis=0)

array([[1],
       [2],
       [3],
       [4]])

In [50]:
np.concatenate((arr1, arr2), axis=1)

array([[1, 3],
       [2, 4]])

In [52]:
# Equivalent to np.concatenate() on axis 0
np.vstack((arr1, arr2))

array([[1],
       [2],
       [3],
       [4]])

In [53]:
# Equivalent to np.concatenate() on axis 1
np.hstack((arr1, arr2))

array([[1, 3],
       [2, 4]])

In [55]:
arr = np.arange(10)
np.split(arr, [2, 4, 9])

[array([0, 1]), array([2, 3]), array([4, 5, 6, 7, 8]), array([9])]

In [61]:
arr = np.arange(3).repeat(2)
arr

array([0, 0, 1, 1, 2, 2])

In [57]:
np.array([5]).repeat(5)

array([5, 5, 5, 5, 5])

In [59]:
np.full([5], 5)

array([5, 5, 5, 5, 5])

In [65]:
np.tile(arr, (2, 1))

array([[0, 0, 1, 1, 2, 2],
       [0, 0, 1, 1, 2, 2]])

In [67]:
np.tile(arr, (1, 2))

array([[0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2]])

In [71]:
arr1 = np.zeros((3, 3))
arr2 = np.array([1, 2, 3])
arr1[:] = arr2[:, np.newaxis]
arr1

array([[1., 1., 1.],
       [2., 2., 2.],
       [3., 3., 3.]])

In [72]:
np.add.reduce(np.arange(10))

45

In [74]:
# Logical AND chained with reduce() is equivalent to all()
arr = np.array([1, 1])
np.logical_and.reduce(arr == 1)

True

In [75]:
arr = np.arange(16).reshape((4, 4))
np.add.accumulate(arr, axis=0)

array([[ 0,  1,  2,  3],
       [ 4,  6,  8, 10],
       [12, 15, 18, 21],
       [24, 28, 32, 36]])

In [76]:
np.add.accumulate(arr, axis=1)

array([[ 0,  1,  3,  6],
       [ 4,  9, 15, 22],
       [ 8, 17, 27, 38],
       [12, 25, 39, 54]])

In [83]:
np.divide.outer(np.array([3, 6, 9]), np.array([1, 2, 3]))

array([[3. , 1.5, 1. ],
       [6. , 3. , 2. ],
       [9. , 4.5, 3. ]])

In [84]:
np.multiply.outer(np.array([3, 6, 9]), np.array([1, 2, 3]))

array([[ 3,  6,  9],
       [ 6, 12, 18],
       [ 9, 18, 27]])

In [85]:
# Reduce to the following array: [(0)]
np.add.reduceat(np.arange(5), [0, 2, 4])

array([1, 5, 4])

In [12]:
# Custom ufuncs can be created with frompyfunc().  Note that these functions take a performance hit
# compared to their numpy counterparts.  There is a way to speed up custom functions to numpy-like performance
# with the numba library.
def miles_to_kilometers(miles):
    return miles * 1.609

# Create a custom unary unfunc (takes a single argument) that converts miles to kilometers
mile2km = np.frompyfunc(miles_to_kilometers, 1, 1)

arr = np.array([1, 3, 5])
mile2km(arr)

array([1.609, 4.827, 8.045], dtype=object)

In [14]:
def comma_separated_strings(x, y):
    return f'{x}, {y}'

# Create a custom binary ufunc (takes two arguments) that concatenates two strings with a comma
cs_str = np.frompyfunc(comma_separated_strings, 2, 1)

arr1 = np.array(['first', 'last'])
arr2 = np.array(['andy', 'jarombek'])

cs_str(arr1, arr2)

array(['first, andy', 'last, jarombek'], dtype=object)

In [15]:
# The arrays returned from a custom frompyfunc() function always have the type object
cs_str(arr1, arr2).dtype

dtype('O')

In [16]:
mile2km(arr).dtype

dtype('O')

In [18]:
# The type can be more specific with the help of the vectorize() function.
mile2km = np.vectorize(miles_to_kilometers, otypes=[np.float64])
mile2km(arr)

array([1.609, 4.827, 8.045])

In [19]:
mile2km(arr).dtype

dtype('float64')

In [23]:
cs_str = np.vectorize(comma_separated_strings, otypes=[np.unicode])
cs_str(arr1, arr2)

array(['first, andy', 'last, jarombek'], dtype='<U14')

In [24]:
cs_str(arr1, arr2).dtype

dtype('<U14')

In [25]:
# Unfortunately, these custom ufuncs take a major performance hit
%timeit mile2km(arr)

6.75 µs ± 107 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [26]:
%timeit arr * 1.609

1.04 µs ± 11.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [27]:
# More complex data types are possible in numpy arrays
metric_dtype = [('miles', np.int32), ('kilometers', np.float64)]
mi_km_arr = np.array([(1, 1.609), (2, 3.218)], dtype=metric_dtype)
mi_km_arr

array([(1, 1.609), (2, 3.218)],
      dtype=[('miles', '<i4'), ('kilometers', '<f8')])

In [28]:
mi_km_arr[0]['miles']

1

In [29]:
mi_km_arr[0]['kilometers']

1.609

In [30]:
mi_km_arr['kilometers']

array([1.609, 3.218])

In [33]:
# Sort a numpy array in place, similar to Python arrays
arr = np.array([2, 3, 1])
arr.sort()
arr

array([1, 2, 3])

In [34]:
# Sort a numpy array, returning a new array instance
np.sort(np.array([2, 3, 1]))

array([1, 2, 3])

In [45]:
arr = np.array([[9, 6, 3], [8, 5, 2], [7, 4, 1]])
np.sort(arr)

array([[3, 6, 9],
       [2, 5, 8],
       [1, 4, 7]])

In [46]:
np.sort(arr, axis=0)

array([[7, 4, 1],
       [8, 5, 2],
       [9, 6, 3]])

In [47]:
np.sort(arr, axis=1)

array([[3, 6, 9],
       [2, 5, 8],
       [1, 4, 7]])

In [64]:
arr = np.array([6, 8, 3, 5, 1])
indexer = arr.argsort()
indexer

array([4, 2, 3, 0, 1])

In [65]:
arr[indexer]

array([1, 3, 5, 6, 8])

In [66]:
# You can also use different sorting algorithms (defaults to quick sort)
indexer = arr.argsort(kind='heapsort')
indexer

array([4, 2, 3, 0, 1])

In [67]:
# The result is the same
arr[indexer]

array([1, 3, 5, 6, 8])

In [70]:
arr = np.random.randn(1000)
%timeit arr[arr.argsort(kind='quicksort')]

27 µs ± 653 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [71]:
%timeit arr[arr.argsort(kind='mergesort')]

44.7 µs ± 947 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [72]:
%timeit arr[arr.argsort(kind='heapsort')]

53.8 µs ± 706 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [73]:
# Perform a binary search on a sorted array.
arr = np.array([1, 2, 4, 8, 16, 32, 64])
arr.searchsorted(16)

4

In [74]:
arr.searchsorted([2, 8, 32])

array([1, 3, 5])

In [76]:
mmap = np.memmap('sample_mmap', dtype='float64', mode='w+', shape=(2, 2))
mmap

memmap([[0., 0.],
        [0., 0.]])

In [77]:
mmap[0] = 1
mmap

memmap([[1., 1.],
        [0., 0.]])

In [80]:
mmap.flush()
del mmap

In [82]:
try:
    mmap
except NameError:
    print("mmap does not exist")

mmap does not exist


In [84]:
# Revive the memory map
mmap = np.memmap('sample_mmap', dtype='float64', shape=(2, 2))
mmap

memmap([[1., 1.],
        [0., 0.]])

In [85]:
arr = np.random.randn(10)
arr.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [86]:
arr = np.arange(4).reshape((2, 2))
arr.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [89]:
arr = np.ones((2, 2), order='F')
arr.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [93]:
%timeit np.ones((100, 100), order='C').sum(1)

15.3 µs ± 339 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [94]:
%timeit np.ones((100, 100), order='F').sum(1)

15.7 µs ± 292 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
