# NumPy

In [1]:
import random
measurements = [random.randint(150,200) for _ in range(1_000_000)]
measurements

[166,
 187,
 165,
 178,
 188,
 185,
 196,
 165,
 199,
 150,
 158,
 162,
 170,
 179,
 165,
 192,
 191,
 162,
 169,
 163,
 191,
 165,
 169,
 174,
 194,
 183,
 150,
 181,
 173,
 164,
 198,
 187,
 172,
 181,
 187,
 170,
 151,
 191,
 197,
 164,
 162,
 179,
 179,
 179,
 173,
 193,
 185,
 157,
 162,
 191,
 196,
 176,
 191,
 154,
 195,
 160,
 197,
 152,
 154,
 158,
 176,
 154,
 178,
 154,
 176,
 167,
 160,
 173,
 167,
 197,
 191,
 175,
 178,
 158,
 150,
 179,
 177,
 177,
 187,
 197,
 185,
 153,
 163,
 174,
 188,
 194,
 171,
 197,
 198,
 161,
 172,
 184,
 173,
 170,
 179,
 154,
 150,
 198,
 191,
 177,
 175,
 167,
 162,
 179,
 165,
 158,
 199,
 199,
 183,
 196,
 154,
 150,
 173,
 166,
 195,
 169,
 200,
 176,
 191,
 185,
 165,
 188,
 169,
 156,
 155,
 164,
 152,
 175,
 164,
 187,
 181,
 200,
 179,
 167,
 151,
 161,
 191,
 152,
 194,
 163,
 190,
 163,
 163,
 160,
 197,
 156,
 165,
 198,
 167,
 157,
 190,
 154,
 152,
 192,
 199,
 190,
 170,
 191,
 198,
 154,
 152,
 195,
 199,
 157,
 186,
 181,
 179

In [2]:
import random 
measurements = [random.randint(150, 200) for _ in range(1_000_000)]

def calculate_mean(measurements):
    accumulator = 0
    for measurement in measurements:
        accumulator += measurement
    
    mean = accumulator / len(measurements)
    return mean

%timeit calculate_mean(measurements)

37 ms ± 7.54 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


This is rather slow since Python has to rebind a new variable in every loop and then has to check whether the + operation is supported between the accumulator and the current measurement. This prevents it from trying to add together objects that can't be added, but in this case we are pretty sure that we are only dealing with integers. If we could tell the interpreter that we are only adding integers, we could skip all that typechecking and speed up the operation. For this purpose, numpy was invented.

To use numpy we have to import it. The import is usuall aliased as np so we have to type less later on. Aliasing things is only recommended if it is well established in the community of the respective package.

In [3]:
import numpy as np

In [4]:
measurements_array = np.array(measurements)
measurements_array

array([152, 195, 159, ..., 180, 194, 166])

In [5]:
type(measurements_array)

numpy.ndarray

In [6]:
measurements_array[0]

152

In [7]:
measurements_array[0:5]

array([152, 195, 159, 157, 187])

In [8]:
measurements_array.dtype

dtype('int32')

In [9]:
%timeit np.mean(measurements_array)

841 µs ± 5.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


Almost 100x speedup in comparision to the pure Python implementation! After convincing ourselfs that NumPy is useful, we have a more in depth look at the numpy array.

## Anatomy of arrays


### dtype 

.dtype gives information about the data type. arrays can contain bools, ints, unsigned ints, floats or complex numbers of various byte sizes. They can also store strings or Python objects, but that has very few use cases.

In [10]:
values = [0, 1, 2, 3, 4]
int_arr = np.array(values, dtype='int')
int_arr, int_arr.dtype

(array([0, 1, 2, 3, 4]), dtype('int32'))

In [11]:
bool_arr = np.array(values, dtype='bool')
bool_arr, bool_arr.dtype

(array([False,  True,  True,  True,  True]), dtype('bool'))

In [12]:
values = [0, 1, 2.5, 3, 4]
float_arr = np.array(values)
float_arr, float_arr.dtype

(array([0. , 1. , 2.5, 3. , 4. ]), dtype('float64'))

In [13]:
int_arr[1] = 2.5
int_arr, int_arr.dtype

(array([0, 2, 2, 3, 4]), dtype('int32'))

In [14]:
values = [0, 1, 2, 3, 4]
uint_arr = np.array(values, dtype='int8')
uint_arr, uint_arr.dtype

(array([0, 1, 2, 3, 4], dtype=int8), dtype('int8'))

In [15]:
uint_arr[1] += 255
uint_arr

array([0, 0, 2, 3, 4], dtype=int8)

In [16]:
type(measurements_array[0]) == type(183)

False

### shape and ndim 

In [17]:
values = [0, 1, 2, 3, 4]
one_dim_arr = np.array(values)
one_dim_arr

array([0, 1, 2, 3, 4])

In [18]:
one_dim_arr.shape

(5,)

In [19]:
one_dim_arr.ndim

1

In [20]:
values = [[0, 1, 2, 3, 4]] * 3
two_dim_arr = np.array(values)
two_dim_arr

array([[0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4]])

In [21]:
two_dim_arr.shape

(3, 5)

In [22]:
two_dim_arr.ndim

2

In [23]:
two_dim_arr[1,1] = 10

In [24]:
two_dim_arr

array([[ 0,  1,  2,  3,  4],
       [ 0, 10,  2,  3,  4],
       [ 0,  1,  2,  3,  4]])

In [25]:
values = [[[0, 1, 2, 3, 4]] * 3] * 6
three_dim_arr = np.array(values)
three_dim_arr

array([[[0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4]],

       [[0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4]],

       [[0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4]],

       [[0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4]],

       [[0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4]],

       [[0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4]]])

In [26]:
three_dim_arr.shape

(6, 3, 5)

In [27]:
three_dim_arr.ndim

3

### Other attributes
 

In [28]:
two_dim_arr

array([[ 0,  1,  2,  3,  4],
       [ 0, 10,  2,  3,  4],
       [ 0,  1,  2,  3,  4]])

In [29]:
two_dim_arr.T

array([[ 0,  0,  0],
       [ 1, 10,  1],
       [ 2,  2,  2],
       [ 3,  3,  3],
       [ 4,  4,  4]])

In [30]:
print(dir(two_dim_arr))

['T', '__abs__', '__add__', '__and__', '__array__', '__array_finalize__', '__array_function__', '__array_interface__', '__array_prepare__', '__array_priority__', '__array_struct__', '__array_ufunc__', '__array_wrap__', '__bool__', '__class__', '__complex__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dir__', '__divmod__', '__doc__', '__eq__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__', '__imul__', '__index__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__lshift__', '__lt__', '__matmul__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__', '__or__', '__pos__', '__pow__', '__radd__', '__rand__', '__rdivmod__', '__reduce__', '__reduce_ex__', '__repr__', '__rfloordiv__', '__rlshift_

## Creating arrays

The equivalent of range.

In [31]:
np.arange(9)

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [32]:
np.arange(2, 14, 2)

array([ 2,  4,  6,  8, 10, 12])

In [33]:
np.arange(start=2, stop=14, step=2)

array([ 2,  4,  6,  8, 10, 12])

In [34]:
np.arange(stop=14, start=2, step=2)

array([ 2,  4,  6,  8, 10, 12])

Creating an array with a certain number of values in a certain interval.

In [35]:
np.linspace(-5, 5, 10)

array([-5.        , -3.88888889, -2.77777778, -1.66666667, -0.55555556,
        0.55555556,  1.66666667,  2.77777778,  3.88888889,  5.        ])

In [36]:
np.linspace(start=-5, stop=5, num=10)

array([-5.        , -3.88888889, -2.77777778, -1.66666667, -0.55555556,
        0.55555556,  1.66666667,  2.77777778,  3.88888889,  5.        ])

An array containing zeros. The default dtype is float.

In [37]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [38]:
np.zeros((2, 3, 2))

array([[[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]]])

In [39]:
np.ones(shape=(2, 3, 2))

array([[[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]])

In [40]:
np.empty(shape=(2, 3, 2))

array([[[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]])

In [41]:
np.full(shape=(2, 3, 2), fill_value=42)

array([[[42, 42],
        [42, 42],
        [42, 42]],

       [[42, 42],
        [42, 42],
        [42, 42]]])

In [42]:
np.empty(shape=(2, 3, 2))

array([[[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]])

### Reshape 

In [43]:
a = np.arange(2, 14)
a, a.shape

(array([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13]), (12,))

In [44]:
b = a.reshape(3, 4)
b

array([[ 2,  3,  4,  5],
       [ 6,  7,  8,  9],
       [10, 11, 12, 13]])

In [45]:
a.reshape(-1, 2)

array([[ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11],
       [12, 13]])

-1 as axis automatically figures out the size of the respective dimension

### Comparing Arrays 

In [46]:
epsilon = 0.000000000001
a = np.zeros((3,3))
b = np.zeros((3,3))
a[0,0] += 0.5*epsilon
a == b

array([[False,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [47]:
(a == b).all()

False

-if either a or b is empty and the other one contains a single element, this will return True. (the comparison a==b returns an empty array, for which the all-operator returns True)

-If a and b don't have the same shape and aren't broadcastable, then this approach will raise an error.

Instead, use numpys provided functions!

In [48]:
np.array_equal(a, b)

False

In [49]:
np.allclose(a, b)

True

In [50]:
np.isclose(a, b)

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

## Masking 

Logical arrays, i.e. arrays containing boolean values, can be used to index other arrays. These logical arrays are then called masks. This is especially useful to index based on logical conditions.

In [51]:
arr = np.arange(1, 6)
arr

array([1, 2, 3, 4, 5])

In [52]:
mask = np.array([True, False, True, False, True])
mask

array([ True, False,  True, False,  True])

Using the mask for indexing returns an array with only elements at positions where mask is True.

In [53]:
arr[mask]

array([1, 3, 5])

In [54]:
arr < 10

array([ True,  True,  True,  True,  True])

In [55]:
arr[arr < 10]

array([1, 2, 3, 4, 5])

In [56]:
arr[mask] = 10
arr

array([10,  2, 10,  4, 10])

# Mathematical operations

## Standard arithmetic 

In [57]:
arr = np.arange(9)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [58]:
arr * 3

array([ 0,  3,  6,  9, 12, 15, 18, 21, 24])

In [59]:
arr + (arr ** 2)

array([ 0,  2,  6, 12, 20, 30, 42, 56, 72])

In [60]:
arr - arr

array([0, 0, 0, 0, 0, 0, 0, 0, 0])

In [61]:
arr / arr

  """Entry point for launching an IPython kernel.


array([nan,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

## Some standard functions 

In [62]:
np.sin(arr)

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,
       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825])

In [63]:
np.sign(arr)

array([0, 1, 1, 1, 1, 1, 1, 1, 1])

In [64]:
np.sign(arr).dtype

dtype('int32')

In [65]:
np.log(arr)

  """Entry point for launching an IPython kernel.


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436,
       1.60943791, 1.79175947, 1.94591015, 2.07944154])

## Broadcasting 

What happens if you try to add arrays of different shapes? Numpy will try to expand the arrays according to three rules and try to make their shapes match, so the operation can be applied elementwise.

1. Rule If the arrays have different numbers of dimensions, the smaller shape is padded with ones on its left side.
Example: (5 x 3) + (3) → (5 x 3) + (1 x 3)
2. Rule If the number of the dimensions matches, but the size of a dimension does not, dimensions with the size of 1 are expanded.
Example: (5 x 3) + (1 x 3) → (5 x 3) + (5 x 3)
3. Rule If the shapes of the arrays still defer after applying the Rule 1 and 2, a broadcasting error is raised.

<img src="broadcasting.png"/>

In [66]:
a = np.arange(15).reshape((5, 3))
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [67]:
b = np.arange(3)
b

array([0, 1, 2])

In [68]:
a + b

array([[ 0,  2,  4],
       [ 3,  5,  7],
       [ 6,  8, 10],
       [ 9, 11, 13],
       [12, 14, 16]])

## Aggregations functions

Aggregation function are functions that reduce the dimensionality of an array. They provide an axis argument, to specify which dimension to reduce.

In [69]:
np.random.seed(1)
two_dim_arr = np.random.randint(0, high=20, size=(4, 4))
two_dim_arr

array([[ 5, 11, 12,  8],
       [ 9, 11,  5, 15],
       [ 0, 16,  1, 12],
       [ 7, 13,  6, 18]])

In [70]:
np.min(two_dim_arr)

0

The optional axis argument allows us to specify, which dimension should be aggregated. You can think of it as the operation being applied to all entries that are obtained by keeping the indices in all dimensions fixed except for the axis dimension. Let's look at the result of the minimum operation with axis=0:

In [71]:
np.min(two_dim_arr, axis=0)

array([ 0, 11,  1,  8])

In [72]:
np.random.seed(1)
three_dim_arr = np.random.randint(0, high=20, size=(4, 4, 4))
three_dim_arr

array([[[ 5, 11, 12,  8],
        [ 9, 11,  5, 15],
        [ 0, 16,  1, 12],
        [ 7, 13,  6, 18]],

       [[ 5, 18, 11, 10],
        [14, 18,  4,  9],
        [17,  0, 13,  9],
        [ 9,  7,  1,  0]],

       [[17,  8, 13, 19],
        [15, 10,  8,  7],
        [ 3,  6, 17,  3],
        [ 4, 17, 11, 12]],

       [[16, 13, 19,  9],
        [18, 15,  0,  4],
        [15,  2,  7,  8],
        [ 9,  3,  7,  4]]])

In [73]:
np.min(three_dim_arr, axis=0)

array([[ 5,  8, 11,  8],
       [ 9, 10,  0,  4],
       [ 0,  0,  1,  3],
       [ 4,  3,  1,  0]])

In [74]:
for i in range(4):
    print(three_dim_arr[i, 0, 0])

5
5
17
16


In [75]:
a = np.array([[[2,4],[6,9]],[[3,1],[7,8]],[[4,5],[9, 0]]])

In [76]:
a, a.shape

(array([[[2, 4],
         [6, 9]],
 
        [[3, 1],
         [7, 8]],
 
        [[4, 5],
         [9, 0]]]), (3, 2, 2))

In [77]:
np.min(a)

0

In [78]:
np.min(a, axis=0)

array([[2, 1],
       [6, 0]])

# Advanced indexing 

## Multidimensional indexing 

In [79]:
two_dim_list = [
    [ 0,  1,  2],
    [ 3,  4,  5],
    [ 6,  7,  8],
    [ 9, 10, 11],
    [12, 13, 14]
]

two_dim_list[2][1]

7

In [80]:
inner_list = two_dim_list[2]
inner_list[1]

7

In [81]:
two_dim_arr = np.array(two_dim_list)
two_dim_arr[2,1]

7

In [82]:
large_two_dim_arr = np.arange(81).reshape(9, 9)
large_two_dim_arr

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8],
       [ 9, 10, 11, 12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23, 24, 25, 26],
       [27, 28, 29, 30, 31, 32, 33, 34, 35],
       [36, 37, 38, 39, 40, 41, 42, 43, 44],
       [45, 46, 47, 48, 49, 50, 51, 52, 53],
       [54, 55, 56, 57, 58, 59, 60, 61, 62],
       [63, 64, 65, 66, 67, 68, 69, 70, 71],
       [72, 73, 74, 75, 76, 77, 78, 79, 80]])

In [83]:
large_two_dim_arr[:,1]

array([ 1, 10, 19, 28, 37, 46, 55, 64, 73])

In [84]:
large_two_dim_arr[:, 1:3]

array([[ 1,  2],
       [10, 11],
       [19, 20],
       [28, 29],
       [37, 38],
       [46, 47],
       [55, 56],
       [64, 65],
       [73, 74]])

In [85]:
large_two_dim_arr[:,2:7:2]

array([[ 2,  4,  6],
       [11, 13, 15],
       [20, 22, 24],
       [29, 31, 33],
       [38, 40, 42],
       [47, 49, 51],
       [56, 58, 60],
       [65, 67, 69],
       [74, 76, 78]])

In [86]:
arr_slice = large_two_dim_arr[:,1]
arr_slice[:] = 0
large_two_dim_arr

array([[ 0,  0,  2,  3,  4,  5,  6,  7,  8],
       [ 9,  0, 11, 12, 13, 14, 15, 16, 17],
       [18,  0, 20, 21, 22, 23, 24, 25, 26],
       [27,  0, 29, 30, 31, 32, 33, 34, 35],
       [36,  0, 38, 39, 40, 41, 42, 43, 44],
       [45,  0, 47, 48, 49, 50, 51, 52, 53],
       [54,  0, 56, 57, 58, 59, 60, 61, 62],
       [63,  0, 65, 66, 67, 68, 69, 70, 71],
       [72,  0, 74, 75, 76, 77, 78, 79, 80]])

In [87]:
large_two_dim_arr[:,2] =0
large_two_dim_arr

array([[ 0,  0,  0,  3,  4,  5,  6,  7,  8],
       [ 9,  0,  0, 12, 13, 14, 15, 16, 17],
       [18,  0,  0, 21, 22, 23, 24, 25, 26],
       [27,  0,  0, 30, 31, 32, 33, 34, 35],
       [36,  0,  0, 39, 40, 41, 42, 43, 44],
       [45,  0,  0, 48, 49, 50, 51, 52, 53],
       [54,  0,  0, 57, 58, 59, 60, 61, 62],
       [63,  0,  0, 66, 67, 68, 69, 70, 71],
       [72,  0,  0, 75, 76, 77, 78, 79, 80]])

In [88]:
l2 = np.copy(large_two_dim_arr)
l2[:, 6] =0
large_two_dim_arr

array([[ 0,  0,  0,  3,  4,  5,  6,  7,  8],
       [ 9,  0,  0, 12, 13, 14, 15, 16, 17],
       [18,  0,  0, 21, 22, 23, 24, 25, 26],
       [27,  0,  0, 30, 31, 32, 33, 34, 35],
       [36,  0,  0, 39, 40, 41, 42, 43, 44],
       [45,  0,  0, 48, 49, 50, 51, 52, 53],
       [54,  0,  0, 57, 58, 59, 60, 61, 62],
       [63,  0,  0, 66, 67, 68, 69, 70, 71],
       [72,  0,  0, 75, 76, 77, 78, 79, 80]])

In [89]:
l2

array([[ 0,  0,  0,  3,  4,  5,  0,  7,  8],
       [ 9,  0,  0, 12, 13, 14,  0, 16, 17],
       [18,  0,  0, 21, 22, 23,  0, 25, 26],
       [27,  0,  0, 30, 31, 32,  0, 34, 35],
       [36,  0,  0, 39, 40, 41,  0, 43, 44],
       [45,  0,  0, 48, 49, 50,  0, 52, 53],
       [54,  0,  0, 57, 58, 59,  0, 61, 62],
       [63,  0,  0, 66, 67, 68,  0, 70, 71],
       [72,  0,  0, 75, 76, 77,  0, 79, 80]])

In [90]:
print(...)

Ellipsis


In [91]:
four_dim_arr = np.stack((np.ones((3, 3, 3)), 
                         np.ones((3, 3, 3)) * 2, 
                         np.ones((3, 3, 3)) * 3, 
                         np.ones((3, 3, 3)) * 4))
four_dim_arr

array([[[[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]],


       [[[2., 2., 2.],
         [2., 2., 2.],
         [2., 2., 2.]],

        [[2., 2., 2.],
         [2., 2., 2.],
         [2., 2., 2.]],

        [[2., 2., 2.],
         [2., 2., 2.],
         [2., 2., 2.]]],


       [[[3., 3., 3.],
         [3., 3., 3.],
         [3., 3., 3.]],

        [[3., 3., 3.],
         [3., 3., 3.],
         [3., 3., 3.]],

        [[3., 3., 3.],
         [3., 3., 3.],
         [3., 3., 3.]]],


       [[[4., 4., 4.],
         [4., 4., 4.],
         [4., 4., 4.]],

        [[4., 4., 4.],
         [4., 4., 4.],
         [4., 4., 4.]],

        [[4., 4., 4.],
         [4., 4., 4.],
         [4., 4., 4.]]]])

In [92]:
four_dim_arr.shape

(4, 3, 3, 3)

In [93]:
four_dim_arr.ndim

4

In [94]:
four_dim_arr[3, :, :, :]

array([[[4., 4., 4.],
        [4., 4., 4.],
        [4., 4., 4.]],

       [[4., 4., 4.],
        [4., 4., 4.],
        [4., 4., 4.]],

       [[4., 4., 4.],
        [4., 4., 4.],
        [4., 4., 4.]]])

In [95]:
four_dim_arr[1,...,1]

array([[2., 2., 2.],
       [2., 2., 2.],
       [2., 2., 2.]])

In [96]:
four_dim_arr[..., 1]

array([[[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]],

       [[2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.]],

       [[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]],

       [[4., 4., 4.],
        [4., 4., 4.],
        [4., 4., 4.]]])

## Fancy indexing 

In [97]:
arr = np.arange(9) + 10
arr

array([10, 11, 12, 13, 14, 15, 16, 17, 18])

In [98]:
indices = np.array([1, 4, 5])
arr[indices]

array([11, 14, 15])

In [99]:
indices = np.array([[1, 4],
                    [5, 7]])
arr[indices]

array([[11, 14],
       [15, 17]])

In [100]:
two_dim_arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [101]:
x_indices = np.array([3, 4])
y_indices = np.array([1, 2])
two_dim_arr[x_indices, y_indices]

array([10, 14])

In [102]:
arr = np.arange(9).reshape(3,3)
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

You can use indexing to swap rows and columns of an array:

In [103]:
arr[[2, 1, 0], :]

array([[6, 7, 8],
       [3, 4, 5],
       [0, 1, 2]])

In [104]:
arr[::-1]

array([[6, 7, 8],
       [3, 4, 5],
       [0, 1, 2]])

# Advanced Masking 

In [105]:
a = np.arange(9)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [106]:
a % 3 == 0

array([ True, False, False,  True, False, False,  True, False, False])

In [107]:
a[a % 3 == 0] = 0

In [108]:
a

array([0, 1, 2, 0, 4, 5, 0, 7, 8])

In [109]:
a[np.array([0, 3, 6])] = 10
a

array([10,  1,  2, 10,  4,  5, 10,  7,  8])

## Using np.where 

Using masking always changes the original array, whereas sometimes the original array should rather be unchanged. np.where figures out the indices of an array where the given condition is true.

In [110]:
a = np.arange(9).reshape(3, 3)
a[a % 3 == 0] = 0
a

array([[0, 1, 2],
       [0, 4, 5],
       [0, 7, 8]])

In [111]:
a = np.arange(9).reshape(3, 3)
tmp = np.where(a % 3 == 0)
tmp

(array([0, 1, 2], dtype=int64), array([0, 0, 0], dtype=int64))

# Extending arrays

## Adding new dimensions with np.newaxis 

Instead of np.newaxis, None can be used.

In [114]:
one_dim_arr = np.arange(5)
one_dim_arr, one_dim_arr.shape

(array([0, 1, 2, 3, 4]), (5,))

In [115]:
two_dim_arr = one_dim_arr[np.newaxis, :]
two_dim_arr, two_dim_arr.shape

(array([[0, 1, 2, 3, 4]]), (1, 5))

In [116]:
two_dim_arr = one_dim_arr[:, np.newaxis, None]
two_dim_arr, two_dim_arr.shape

(array([[[0]],
 
        [[1]],
 
        [[2]],
 
        [[3]],
 
        [[4]]]), (5, 1, 1))

In [117]:
one_dim_arr[:, None]

array([[0],
       [1],
       [2],
       [3],
       [4]])

In [118]:
one_dim_arr.shape

(5,)

## Removing dimensions 

arr.squeeze() removes dimensions of size 1:

In [119]:
one_dim_arr = np.arange(5)
two_dim_arr = one_dim_arr[np.newaxis, :]
two_dim_arr, two_dim_arr.shape

(array([[0, 1, 2, 3, 4]]), (1, 5))

In [120]:
two_dim_arr.squeeze(), two_dim_arr.squeeze().shape

(array([0, 1, 2, 3, 4]), (5,))

In [121]:
a = np.arange(5).reshape(1, -1, 1, 1)
a

array([[[[0]],

        [[1]],

        [[2]],

        [[3]],

        [[4]]]])

In [122]:
a.squeeze()

array([0, 1, 2, 3, 4])

## Combining arrays 

There are many ways to combine existing arrays, like np.append, np.concatenate and np.stack. However, these operations always require the whole array to be copied. Therefore, it often makes more sense to allocate an array of the size you need later upfront and then just fill the respective parts.

In [123]:
np.arange(10), np.arange(10)[::-1]

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]))

In [124]:
np.concatenate((np.arange(10),np.arange(10)[::-1]))

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

A quick and easy way to combine scalars and arrays is using np.r_, with the demanded arrays, lists, or numbers in square brackets:

In [125]:
np.r_[2, 2, 2, np.arange(10), np.arange(10)[::-1], [0, 1, 2]]

array([2, 2, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1,
       0, 0, 1, 2])

np.append uses concatenation internally:

In [126]:
np.append(np.arange(10), np.arange(10))

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

For highr-dimensional arrays, other functions are useful:

In [127]:
np.stack((np.arange(10), np.arange(10)))

array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

There are also the functions np.vstack (row-wise-stacking) and np.hstack (column-wise-stacking):

* hstack is equivalent to concatenation along the second axis, except for 1-D arrays where it concatenates along the first axis
* vstack is equivalent to concatenation along the first axis after 1-D arrays of shape (N,) have been reshaped to (1,N).

In [128]:
two_dim_arr

array([[0, 1, 2, 3, 4]])

In [129]:
two_dim_arr = np.arange(16).reshape(4, -1)
two_dim_arr_2 = np.arange(16).reshape(4, -1) + 16
print(two_dim_arr)
print(two_dim_arr_2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
[[16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]]


In [132]:
np.hstack((two_dim_arr, two_dim_arr_2))

array([[ 0,  1,  2,  3, 16, 17, 18, 19],
       [ 4,  5,  6,  7, 20, 21, 22, 23],
       [ 8,  9, 10, 11, 24, 25, 26, 27],
       [12, 13, 14, 15, 28, 29, 30, 31]])

In [133]:
np.vstack((two_dim_arr, two_dim_arr_2))

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

## random.seed 

If a random seed is set, the random-number-generator re-uses the same numbers over and over again. This is very useful for testing, but of course this takes any randomness out of anything, so it should not be used in final code:

In [137]:
for _ in range(5):
    np.random.seed(0)
    print(np.random.random(5))

[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548 ]
[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548 ]
[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548 ]
[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548 ]
[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548 ]


## Shuffling arrays 

np.random.shuffle shuffles an array among the first index. That means, a one-dimensional is completely shuffled, whereas for multidimensional arrays, t

In [139]:
a = np.arange(10)
np.random.shuffle(a)
a

array([4, 7, 6, 2, 8, 9, 1, 0, 3, 5])

In [140]:
a = np.arange(9).reshape(3, 3)
np.random.shuffle(a)
a

array([[0, 1, 2],
       [6, 7, 8],
       [3, 4, 5]])

To shuffle the array completely, you can flatten it and afterwards reshape it to its original shape:

In [141]:
a = np.arange(9).reshape(3, 3)
a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [142]:
b = a.flatten()
np.random.shuffle(b)
a = b.reshape(a.shape)
a

array([[1, 7, 4],
       [8, 6, 2],
       [0, 3, 5]])

Note that np.shuffle shuffles the array in-place. To return a permutation, you'd use np.permutation:

In [143]:
a = np.arange(9).reshape(3, 3)
a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [145]:
np.random.permutation(a.flatten()).reshape(a.shape)

array([[8, 2, 0],
       [1, 7, 3],
       [4, 5, 6]])

In [146]:
a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

If you are dealing with different arrays you want to shuffle while keeping them matched to each other, 
it is more useful to shuffle the indices instead:

In [147]:
a = np.arange(9) + 1
b = a**2
np.vstack((a, b))

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9],
       [ 1,  4,  9, 16, 25, 36, 49, 64, 81]])

In [148]:
order = np.random.permutation(a.shape[0])
np.vstack((a[order], b[order]))

array([[ 1,  3,  8,  5,  6,  2,  7,  4,  9],
       [ 1,  9, 64, 25, 36,  4, 49, 16, 81]])

## np.random.choice 

np.random.choice generates a sub-array from a given 1D-array:

In [151]:
np.random.seed(1)
a = np.arange(10)
np.random.choice(a, size=5)

array([5, 8, 9, 5, 0])

In [152]:
np.random.choice(a, size=5, replace=False)

array([6, 3, 9, 2, 8])

You can also specify probabilities which which to take certain elements. To generate another array where roughly a quarter of elements are True, you can use:

In [154]:
np.random.choice(np.r_[True, False], size=(5,5), p=[0.25, 0.75])

array([[False, False, False,  True, False],
       [False, False, False, False, False],
       [False, False,  True, False, False],
       [False, False, False,  True, False],
       [False, False, False,  True, False]])