# NumPy

In [1]:
import numpy as np

## Regular arrays

For each of the following, create the corresponding NumPy array.

* [0, 0, 0, ..., 0] (length 10)

In [2]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [3]:
[0 for _ in range(10)]

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [4]:
[0]+[0]

[0, 0]

In [5]:
[0]*10

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

* A five row, seven column matrix containing all 4s.

In [6]:
np.zeros(5,7)

TypeError: Cannot interpret '7' as a data type

In [7]:
help(np.zeros)

Help on built-in function zeros in module numpy:

zeros(...)
    zeros(shape, dtype=float, order='C', *, like=None)
    
    Return a new array of given shape and type, filled with zeros.
    
    Parameters
    ----------
    shape : int or tuple of ints
        Shape of the new array, e.g., ``(2, 3)`` or ``2``.
    dtype : data-type, optional
        The desired data-type for the array, e.g., `numpy.int8`.  Default is
        `numpy.float64`.
    order : {'C', 'F'}, optional, default: 'C'
        Whether to store multi-dimensional data in row-major
        (C-style) or column-major (Fortran-style) order in
        memory.
    like : array_like, optional
        Reference object to allow the creation of arrays which are not
        NumPy arrays. If an array-like passed in as ``like`` supports
        the ``__array_function__`` protocol, the result will be defined
        by it. In this case, it ensures the creation of an array object
        compatible with that passed in via this arg

In [8]:
np.zeros((5,7))

array([[0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.]])

In [9]:
np.zeros((5,7))+4

array([[4., 4., 4., 4., 4., 4., 4.],
       [4., 4., 4., 4., 4., 4., 4.],
       [4., 4., 4., 4., 4., 4., 4.],
       [4., 4., 4., 4., 4., 4., 4.],
       [4., 4., 4., 4., 4., 4., 4.]])

In [10]:
np.ones((5,7))

array([[1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1.]])

In [11]:
np.ones((5,7))*4

array([[4., 4., 4., 4., 4., 4., 4.],
       [4., 4., 4., 4., 4., 4., 4.],
       [4., 4., 4., 4., 4., 4., 4.],
       [4., 4., 4., 4., 4., 4., 4.],
       [4., 4., 4., 4., 4., 4., 4.]])

In [12]:
np.ones((5,7), int)*4

array([[4, 4, 4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4, 4, 4]])

* [0, ..., 100] (length 5, evenly distributed).

In [13]:
np.linspace(0,100,5)

array([  0.,  25.,  50.,  75., 100.])

* The 3x3 matrix $\begin{pmatrix} 0 & 0 & 0 \\ 1 & 1 & 1 \\ 2 & 2 & 2 \end{pmatrix}$

In [14]:
arr = np.zeros((3,3))
for i in range(3):
    arr[i] = i

In [15]:
arr

array([[0., 0., 0.],
       [1., 1., 1.],
       [2., 2., 2.]])

* The 2x5 matrix $
\begin{pmatrix} 2 & 5 & 8 & 11 & 14 \\ 17 & 20 & 23 & 26 & 29 \end{pmatrix}
$

In [16]:
arr = np.zeros((2,5))
for i in range(2):
    for j in range(5):
        arr[i,j] = 2+3*j
        print(arr)

[[2. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
[[2. 5. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
[[2. 5. 8. 0. 0.]
 [0. 0. 0. 0. 0.]]
[[ 2.  5.  8. 11.  0.]
 [ 0.  0.  0.  0.  0.]]
[[ 2.  5.  8. 11. 14.]
 [ 0.  0.  0.  0.  0.]]
[[ 2.  5.  8. 11. 14.]
 [ 2.  0.  0.  0.  0.]]
[[ 2.  5.  8. 11. 14.]
 [ 2.  5.  0.  0.  0.]]
[[ 2.  5.  8. 11. 14.]
 [ 2.  5.  8.  0.  0.]]
[[ 2.  5.  8. 11. 14.]
 [ 2.  5.  8. 11.  0.]]
[[ 2.  5.  8. 11. 14.]
 [ 2.  5.  8. 11. 14.]]


In [17]:
cols = 5
step = 3
arr = np.zeros((2,cols))
for i in range(2):
    for j in range(cols):
        arr[i,j] = 2+step*j+cols*step*i
print(arr)

[[ 2.  5.  8. 11. 14.]
 [17. 20. 23. 26. 29.]]


In [18]:
np.arange(2,29,3)

array([ 2,  5,  8, 11, 14, 17, 20, 23, 26])

In [19]:
np.arange(2,30,3)

array([ 2,  5,  8, 11, 14, 17, 20, 23, 26, 29])

In [20]:
np.arange(2,30,3).reshape((2,5))

array([[ 2,  5,  8, 11, 14],
       [17, 20, 23, 26, 29]])

## Random numbers

* Make a length 10 NumPy array of random integers between 0 (inclusive) and 40 (exclusive).

In [2]:
np.random.randint(0, 40, size=10)

array([30, 35, 37,  3,  0, 18, 12, 17, 38, 16])

In [3]:
help(np.random.randint)

Help on built-in function randint:

randint(...) method of numpy.random.mtrand.RandomState instance
    randint(low, high=None, size=None, dtype=int)
    
    Return random integers from `low` (inclusive) to `high` (exclusive).
    
    Return random integers from the "discrete uniform" distribution of
    the specified dtype in the "half-open" interval [`low`, `high`). If
    `high` is None (the default), then results are from [0, `low`).
    
    .. note::
        New code should use the ``integers`` method of a ``default_rng()``
        instance instead; please see the :ref:`random-quick-start`.
    
    Parameters
    ----------
    low : int or array-like of ints
        Lowest (signed) integers to be drawn from the distribution (unless
        ``high=None``, in which case this parameter is one above the
        *highest* such integer).
    high : int or array-like of ints, optional
        If provided, one above the largest (signed) integer to be drawn
        from the distributi

In [5]:
rng = np.random.default_rng()

In [6]:
type(rng)

numpy.random._generator.Generator

In [8]:
arr = rng.integers(0, 40, size=10)

In [9]:
arr

array([10, 36, 39, 25, 23, 30,  5, 20, 25, 12])

* Choose 6 of those numbers (with replacement) and put them into a NumPy array.

In [10]:
rng.choice(arr, size=6)

array([23, 30, 23, 30, 30, 39])

* Make a three row, five column NumPy array of random real numbers between -1 and 4.

In [11]:
rng.random(-1, 4, size=(3,5))

TypeError: random() got multiple values for keyword argument 'size'

In [12]:
help(rng.random)

Help on built-in function random:

random(...) method of numpy.random._generator.Generator instance
    random(size=None, dtype=np.float64, out=None)
    
    Return random floats in the half-open interval [0.0, 1.0).
    
    Results are from the "continuous uniform" distribution over the
    stated interval.  To sample :math:`Unif[a, b), b > a` multiply
    the output of `random` by `(b-a)` and add `a`::
    
      (b - a) * random() + a
    
    Parameters
    ----------
    size : int or tuple of ints, optional
        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
        ``m * n * k`` samples are drawn.  Default is None, in which case a
        single value is returned.
    dtype : dtype, optional
        Desired dtype of the result, only `float64` and `float32` are supported.
        Byteorder must be native. The default value is np.float64.
    out : ndarray, optional
        Alternative output array in which to place the result. If size is not None,
        it

In [15]:
arr = rng.random((3,5))
arr

array([[0.14373169, 0.04151085, 0.85160258, 0.5625753 , 0.77434184],
       [0.09323801, 0.63691547, 0.83783101, 0.60707909, 0.69066845],
       [0.65647018, 0.45466071, 0.77161217, 0.53020513, 0.71808796]])

In [16]:
arr*5

array([[0.71865844, 0.20755425, 4.25801288, 2.81287651, 3.87170919],
       [0.46619004, 3.18457733, 4.18915507, 3.03539543, 3.45334223],
       [3.28235092, 2.27330357, 3.85806084, 2.65102563, 3.59043979]])

In [17]:
arr*5-1

array([[-0.28134156, -0.79244575,  3.25801288,  1.81287651,  2.87170919],
       [-0.53380996,  2.18457733,  3.18915507,  2.03539543,  2.45334223],
       [ 2.28235092,  1.27330357,  2.85806084,  1.65102563,  2.59043979]])

In [18]:
5*rng.random(size=(3,5)) - 1

array([[-0.01973026,  1.61155096,  2.9866675 ,  1.71899874, -0.01933733],
       [ 1.36928355,  2.32786313, -0.00448403, -0.82777187,  0.89141526],
       [ 3.13028253, -0.38060258,  3.05444416,  0.42173001,  1.3847107 ]])

* Make a length 10 NumPy array of random numbers following a normal distribution with mean 2 and standard deviation 0.1.

In [19]:
help(rng.normal)

Help on built-in function normal:

normal(...) method of numpy.random._generator.Generator instance
    normal(loc=0.0, scale=1.0, size=None)
    
    Draw random samples from a normal (Gaussian) distribution.
    
    The probability density function of the normal distribution, first
    derived by De Moivre and 200 years later by both Gauss and Laplace
    independently [2]_, is often called the bell curve because of
    its characteristic shape (see the example below).
    
    The normal distributions occurs often in nature.  For example, it
    describes the commonly occurring distribution of samples influenced
    by a large number of tiny, random disturbances, each with its own
    unique distribution [2]_.
    
    Parameters
    ----------
    loc : float or array_like of floats
        Mean ("centre") of the distribution.
    scale : float or array_like of floats
        Standard deviation (spread or "width") of the distribution. Must be
        non-negative.
    size : int o

In [20]:
rng.normal(2, 0.1, size=10)

array([1.97074003, 2.10991763, 1.98318881, 2.00287628, 2.10707454,
       2.02365598, 1.89343959, 1.97254061, 1.97359416, 1.96238154])

In [21]:
rng.normal(2, 0.1, size=10).shape

(10,)

## Changing rows and columns

In [2]:
arr = np.zeros((4,4))
for i in range(4):
    arr[i] = i

In [3]:
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.]])

In [4]:
arr = np.zeros((4,4))

In [6]:
arr

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [5]:
arr[2]

array([0., 0., 0., 0.])

In [7]:
arr[2] = 13

In [8]:
arr

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [13., 13., 13., 13.],
       [ 0.,  0.,  0.,  0.]])

In [9]:
arr[:,2] = -4

In [10]:
arr

array([[ 0.,  0., -4.,  0.],
       [ 0.,  0., -4.,  0.],
       [13., 13., -4., 13.],
       [ 0.,  0., -4.,  0.]])

In [11]:
arr[1,:] = 1

In [12]:
arr

array([[ 0.,  0., -4.,  0.],
       [ 1.,  1.,  1.,  1.],
       [13., 13., -4., 13.],
       [ 0.,  0., -4.,  0.]])

In [13]:
v = arr[:, 2]

In [14]:
v

array([-4.,  1., -4., -4.])

In [15]:
v.shape

(4,)

In [16]:
arr[3] = [2,10]

ValueError: could not broadcast input array from shape (2,) into shape (4,)

In [17]:
arr[:] = [1,3,4,7]

In [18]:
arr

array([[1., 3., 4., 7.],
       [1., 3., 4., 7.],
       [1., 3., 4., 7.],
       [1., 3., 4., 7.]])

In [21]:
w = np.array([1,3,4,7])

In [19]:
arr.shape

(4, 4)

In [22]:
w.shape

(4,)

In [23]:
arr = np.zeros((3,3))
for i in range(3):
    arr[i] = i

In [24]:
arr

array([[0., 0., 0.],
       [1., 1., 1.],
       [2., 2., 2.]])

In [27]:
arr = np.zeros((3,3))
arr[:] = np.arange(3)
arr.T

array([[0., 0., 0.],
       [1., 1., 1.],
       [2., 2., 2.]])

In [28]:
arr

array([[0., 1., 2.],
       [0., 1., 2.],
       [0., 1., 2.]])

In [29]:
arr = np.zeros((3,3))
arr[:] = np.arange(3)
arr = arr.T

In [30]:
arr

array([[0., 0., 0.],
       [1., 1., 1.],
       [2., 2., 2.]])

In [31]:
arr = np.zeros((3,3))
arr[:] = np.arange(3).reshape((3,1))
arr

array([[0., 0., 0.],
       [1., 1., 1.],
       [2., 2., 2.]])

In [32]:
x = np.arange(3).reshape((3,1))

In [33]:
x

array([[0],
       [1],
       [2]])

In [34]:
x.shape

(3, 1)

In [35]:
arr.shape

(3, 3)

In [36]:
y = np.arange(3).reshape((2,1))

ValueError: cannot reshape array of size 3 into shape (2,1)

In [37]:
z = np.arange(3).reshape((-1,1))

In [38]:
z

array([[0],
       [1],
       [2]])

In [39]:
z.shape

(3, 1)

In [40]:
arr

array([[0., 0., 0.],
       [1., 1., 1.],
       [2., 2., 2.]])

In [41]:
arr.reshape(-1)

array([0., 0., 0., 1., 1., 1., 2., 2., 2.])

## Timing a computation with a NumPy array vs with a list

In [2]:
rng = np.random.default_rng()

In [3]:
arr = rng.integers(1,10,size=10**6)

In [4]:
mylist = list(arr)

In [5]:
arr[:5]

array([3, 7, 8, 2, 1])

In [6]:
1.5/arr[:5]

array([0.5       , 0.21428571, 0.1875    , 0.75      , 1.5       ])

In [7]:
1.5/mylist[:5]

TypeError: unsupported operand type(s) for /: 'float' and 'list'

In [8]:
newlist = []
for x in mylist[:5]:
    newlist.append(1.5/x)

In [9]:
newlist

[0.5, 0.21428571428571427, 0.1875, 0.75, 1.5]

In [10]:
[1.5/x for x in mylist[:5]]

[0.5, 0.21428571428571427, 0.1875, 0.75, 1.5]

In [11]:
%%timeit
1.5/arr

1.12 ms ± 38.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [12]:
%%timeit
newlist = []
for x in mylist:
    newlist.append(1.5/x)

1.66 s ± 35.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
%%timeit
[1.5/x for x in mylist]

1.62 s ± 27.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Counting using NumPy

In [2]:
rng = np.random.default_rng()

In [3]:
n = 20

* Make a NumPy array of random integers between -50 and 50 (inclusive).  Also make a list with the same numbers.

In [4]:
arr = rng.integers(-50, 51, size=n)

In [5]:
mylist = list(arr)

* How many of the integers are strictly less than 10?

In [6]:
count = 0
for x in mylist:
    if x < 10:
        count += 1

In [7]:
count

13

In [8]:
len([x for x in mylist if x < 10])

13

In [9]:
arr

array([-20,   7,  32, -46,  -2, -16, -12, -39, -22, -38,  17, -20,  49,
        27, -36,   0,  49,   3,  41,  39])

In [10]:
arr < 10

array([ True,  True, False,  True,  True,  True,  True,  True,  True,
        True, False,  True, False, False,  True,  True, False,  True,
       False, False])

In [11]:
np.count_nonzero(arr < 10)

13

* At what indices do these integers less than 10 occur?

In [12]:
np.nonzero(arr < 10)

(array([ 0,  1,  3,  4,  5,  6,  7,  8,  9, 11, 14, 15, 17]),)

* Make a new array containing only these integers.

In [13]:
newlist = []
for x in mylist:
    if x < 10:
        newlist.append(x)

In [14]:
newlist

[-20, 7, -46, -2, -16, -12, -39, -22, -38, -20, -36, 0, 3]

In [15]:
[x for x in mylist if x < 10]

[-20, 7, -46, -2, -16, -12, -39, -22, -38, -20, -36, 0, 3]

In [17]:
arr

array([-20,   7,  32, -46,  -2, -16, -12, -39, -22, -38,  17, -20,  49,
        27, -36,   0,  49,   3,  41,  39])

In [16]:
arr < 10

array([ True,  True, False,  True,  True,  True,  True,  True,  True,
        True, False,  True, False, False,  True,  True, False,  True,
       False, False])

In [18]:
arr[arr < 10]

array([-20,   7, -46,  -2, -16, -12, -39, -22, -38, -20, -36,   0,   3])

* If you pick a random integer from such an array, what's the probability it is less than 10?

In [19]:
n = 10**7

In [20]:
arr = rng.integers(-50, 51, size=n)
mylist = list(arr)

In [21]:
count = 0
for x in mylist:
    if x < 10:
        count += 1
count/n

0.5942684

In [22]:
%%timeit
count = 0
for x in mylist:
    if x < 10:
        count += 1
count/n

930 ms ± 134 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [23]:
%%timeit
len([x for x in mylist if x < 10])

717 ms ± 69.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [24]:
%%timeit
np.count_nonzero(arr < 10)

5.13 ms ± 235 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [25]:
arr = rng.integers(-50, 51, size=n)

In [27]:
np.count_nonzero(arr < 10)/n

0.5939019

In [28]:
60/101

0.594059405940594

## Logic in Python and NumPy

General rule of thumb:
* In base Python, use `and`, `or`, `not`
* In NumPy (or pandas), use `&`, `|`, `~` (tilde)

In [2]:
rng = np.random.default_rng()
n = 20

arr = rng.integers(-50, 51, size=n)
mylist = list(arr)

In [3]:
mylist

[-20,
 17,
 -11,
 29,
 -4,
 -50,
 40,
 -36,
 23,
 24,
 -38,
 37,
 6,
 -32,
 -35,
 2,
 16,
 -32,
 -46,
 -47]

* Find all the entries in `arr` and in `mylist` that are strictly between -10 and 10.

In [4]:
[x for x in mylist if (x > -10) and (x < 10)]

[-4, 6, 2]

In [6]:
[x for x in mylist if not ((x <= -10) or (x >= 10))]

[-4, 6, 2]

In [7]:
arr

array([-20,  17, -11,  29,  -4, -50,  40, -36,  23,  24, -38,  37,   6,
       -32, -35,   2,  16, -32, -46, -47])

In [8]:
arr > -10

array([False,  True, False,  True,  True, False,  True, False,  True,
        True, False,  True,  True, False, False,  True,  True, False,
       False, False])

In [9]:
arr < 10

array([ True, False,  True, False,  True,  True, False,  True, False,
       False,  True, False,  True,  True,  True,  True, False,  True,
        True,  True])

In [10]:
(arr > -10) and (arr < 10)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [11]:
(arr > -10) & (arr < 10)

array([False, False, False, False,  True, False, False, False, False,
       False, False, False,  True, False, False,  True, False, False,
       False, False])

In [12]:
arr[(arr > -10) & (arr < 10)]

array([-4,  6,  2])

In [13]:
arr[(~(arr <= -10)) & (~(arr >= 10))]

array([-4,  6,  2])

In [14]:
arr[~((arr <= -10) | (arr >= 10))]

array([-4,  6,  2])

## The `axis` keyword argument

* If you roll four distinct 6-sided dice, what is the probability that the biggest value is 5?

In [2]:
rng = np.random.default_rng()

In [3]:
rng.integers(1, 7, size=4)

array([4, 3, 5, 1])

In [4]:
exps = 10
s = 0

for i in range(exps):
    if np.max(rng.integers(1, 7, size=4)) == 5:
        s += 1

s/exps

0.3

In [5]:
%%time

exps = 10**6
s = 0

for i in range(exps):
    if np.max(rng.integers(1, 7, size=4)) == 5:
        s += 1

s/exps

CPU times: user 13.7 s, sys: 164 ms, total: 13.9 s
Wall time: 14.3 s


0.284671

In [6]:
exps = 10
rng.integers(1, 7, size=(exps,4))

array([[1, 2, 3, 5],
       [5, 6, 1, 4],
       [2, 3, 4, 3],
       [5, 5, 5, 4],
       [4, 5, 4, 3],
       [3, 3, 5, 1],
       [6, 4, 2, 5],
       [3, 2, 2, 4],
       [3, 1, 5, 6],
       [2, 3, 4, 2]])

In [7]:
exps = 10
arr = rng.integers(1, 7, size=(exps,4))
print(arr)
np.max(arr)

[[1 4 5 1]
 [6 5 4 4]
 [2 1 3 3]
 [6 6 1 6]
 [6 6 6 6]
 [1 4 2 2]
 [4 1 3 3]
 [5 1 4 5]
 [3 5 3 5]
 [3 4 6 1]]


6

In [8]:
help(np.apply_along_axis)

Help on function apply_along_axis in module numpy:

apply_along_axis(func1d, axis, arr, *args, **kwargs)
    Apply a function to 1-D slices along the given axis.
    
    Execute `func1d(a, *args, **kwargs)` where `func1d` operates on 1-D arrays
    and `a` is a 1-D slice of `arr` along `axis`.
    
    This is equivalent to (but faster than) the following use of `ndindex` and
    `s_`, which sets each of ``ii``, ``jj``, and ``kk`` to a tuple of indices::
    
        Ni, Nk = a.shape[:axis], a.shape[axis+1:]
        for ii in ndindex(Ni):
            for kk in ndindex(Nk):
                f = func1d(arr[ii + s_[:,] + kk])
                Nj = f.shape
                for jj in ndindex(Nj):
                    out[ii + jj + kk] = f[jj]
    
    Equivalently, eliminating the inner loop, this can be expressed as::
    
        Ni, Nk = a.shape[:axis], a.shape[axis+1:]
        for ii in ndindex(Ni):
            for kk in ndindex(Nk):
                out[ii + s_[...,] + kk] = func1d(arr[ii 

In [9]:
arr

array([[1, 4, 5, 1],
       [6, 5, 4, 4],
       [2, 1, 3, 3],
       [6, 6, 1, 6],
       [6, 6, 6, 6],
       [1, 4, 2, 2],
       [4, 1, 3, 3],
       [5, 1, 4, 5],
       [3, 5, 3, 5],
       [3, 4, 6, 1]])

In [10]:
np.apply_along_axis(np.max, axis=1, arr=arr)

array([5, 6, 3, 6, 6, 4, 4, 5, 5, 6])

In [11]:
np.apply_along_axis(np.max, axis=0, arr=arr)

array([6, 6, 6, 6])

In [14]:
exps = 10**6

arr = rng.integers(1, 7, size=(exps,4))
(np.apply_along_axis(np.max, axis=1, arr=arr) == 5).mean()

0.284891

In [15]:
%%time

exps = 10**6

arr = rng.integers(1, 7, size=(exps,4))
(np.apply_along_axis(np.max, axis=1, arr=arr) == 5).mean()

CPU times: user 5.27 s, sys: 82.5 ms, total: 5.36 s
Wall time: 5.5 s


0.284142

In [17]:
exps = 10

arr = rng.integers(1, 7, size=(exps,4))
print(arr)
arr.max()

[[6 6 5 6]
 [5 1 5 5]
 [4 5 4 5]
 [1 3 2 3]
 [2 4 5 6]
 [1 1 1 5]
 [1 2 1 3]
 [2 5 6 1]
 [5 6 1 1]
 [2 1 2 6]]


6

In [18]:
help(arr.max)

Help on built-in function max:

max(...) method of numpy.ndarray instance
    a.max(axis=None, out=None, keepdims=False, initial=<no value>, where=True)
    
    Return the maximum along a given axis.
    
    Refer to `numpy.amax` for full documentation.
    
    See Also
    --------
    numpy.amax : equivalent function



In [19]:
exps = 10

arr = rng.integers(1, 7, size=(exps,4))
print(arr)
arr.max(axis=1)

[[5 3 6 4]
 [6 3 1 1]
 [6 6 1 5]
 [1 6 6 2]
 [5 6 4 6]
 [3 1 5 2]
 [1 3 5 1]
 [6 2 4 2]
 [6 1 1 3]
 [2 6 1 5]]


array([6, 6, 6, 6, 6, 5, 5, 6, 6, 6])

In [20]:
exps = 10

arr = rng.integers(1, 7, size=(exps,4))
(arr.max(axis=1) == 5)

array([ True, False, False, False, False, False,  True,  True, False,
        True])

In [21]:
exps = 10

arr = rng.integers(1, 7, size=(exps,4))
(arr.max(axis=1) == 5).mean(axis=1)

AxisError: axis 1 is out of bounds for array of dimension 1

In [22]:
exps = 10

arr = rng.integers(1, 7, size=(exps,4))
(arr.max(axis=1) == 5).mean(axis=0)

0.4

In [24]:
exps = 10**6

arr = rng.integers(1, 7, size=(exps,4))
(arr.max(axis=1) == 5).mean()

0.285484

In [25]:
%%time
exps = 10**6

arr = rng.integers(1, 7, size=(exps,4))
(arr.max(axis=1) == 5).mean()

CPU times: user 46.5 ms, sys: 5.92 ms, total: 52.4 ms
Wall time: 51.3 ms


0.284911

## An exact probability

The same question as last time, but now we will compute the exact probability.

* If you roll four distinct 6-sided dice, what is the probability that the biggest value is 5?

In [2]:
6**4

1296

In [3]:
all_dice = []
for i in range(1,7):
    for j in range(1,7):
        for k in range(1,7):
            for l in range(1,7):
                all_dice.append((i,j,k,l))

In [4]:
len(all_dice)

1296

In [5]:
all_dice[:10]

[(1, 1, 1, 1),
 (1, 1, 1, 2),
 (1, 1, 1, 3),
 (1, 1, 1, 4),
 (1, 1, 1, 5),
 (1, 1, 1, 6),
 (1, 1, 2, 1),
 (1, 1, 2, 2),
 (1, 1, 2, 3),
 (1, 1, 2, 4)]

In [6]:
from itertools import product

In [7]:
help(product)

Help on class product in module itertools:

class product(builtins.object)
 |  product(*iterables, repeat=1) --> product object
 |  
 |  Cartesian product of input iterables.  Equivalent to nested for-loops.
 |  
 |  For example, product(A, B) returns the same as:  ((x,y) for x in A for y in B).
 |  The leftmost iterators are in the outermost for-loop, so the output tuples
 |  cycle in a manner similar to an odometer (with the rightmost element changing
 |  on every iteration).
 |  
 |  To compute the product of an iterable with itself, specify the number
 |  of repetitions with the optional repeat keyword argument. For example,
 |  product(A, repeat=4) means the same as product(A, A, A, A).
 |  
 |  product('ab', range(3)) --> ('a',0) ('a',1) ('a',2) ('b',0) ('b',1) ('b',2)
 |  product((0,1), (0,1), (0,1)) --> (0,0,0) (0,0,1) (0,1,0) (0,1,1) (1,0,0) ...
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /

In [8]:
product(range(1,7), repeat=2)

<itertools.product at 0x112cd4900>

In [9]:
list(product(range(1,7), repeat=2))

[(1, 1),
 (1, 2),
 (1, 3),
 (1, 4),
 (1, 5),
 (1, 6),
 (2, 1),
 (2, 2),
 (2, 3),
 (2, 4),
 (2, 5),
 (2, 6),
 (3, 1),
 (3, 2),
 (3, 3),
 (3, 4),
 (3, 5),
 (3, 6),
 (4, 1),
 (4, 2),
 (4, 3),
 (4, 4),
 (4, 5),
 (4, 6),
 (5, 1),
 (5, 2),
 (5, 3),
 (5, 4),
 (5, 5),
 (5, 6),
 (6, 1),
 (6, 2),
 (6, 3),
 (6, 4),
 (6, 5),
 (6, 6)]

In [10]:
list(product(["a","b","c"], [5, 6]))

[('a', 5), ('a', 6), ('b', 5), ('b', 6), ('c', 5), ('c', 6)]

In [12]:
list(product(["a","b","c"], repeat=3))

[('a', 'a', 'a'),
 ('a', 'a', 'b'),
 ('a', 'a', 'c'),
 ('a', 'b', 'a'),
 ('a', 'b', 'b'),
 ('a', 'b', 'c'),
 ('a', 'c', 'a'),
 ('a', 'c', 'b'),
 ('a', 'c', 'c'),
 ('b', 'a', 'a'),
 ('b', 'a', 'b'),
 ('b', 'a', 'c'),
 ('b', 'b', 'a'),
 ('b', 'b', 'b'),
 ('b', 'b', 'c'),
 ('b', 'c', 'a'),
 ('b', 'c', 'b'),
 ('b', 'c', 'c'),
 ('c', 'a', 'a'),
 ('c', 'a', 'b'),
 ('c', 'a', 'c'),
 ('c', 'b', 'a'),
 ('c', 'b', 'b'),
 ('c', 'b', 'c'),
 ('c', 'c', 'a'),
 ('c', 'c', 'b'),
 ('c', 'c', 'c')]

In [13]:
list(product(["a","b","c"], ["a","b","c"], ["a","b","c"]))

[('a', 'a', 'a'),
 ('a', 'a', 'b'),
 ('a', 'a', 'c'),
 ('a', 'b', 'a'),
 ('a', 'b', 'b'),
 ('a', 'b', 'c'),
 ('a', 'c', 'a'),
 ('a', 'c', 'b'),
 ('a', 'c', 'c'),
 ('b', 'a', 'a'),
 ('b', 'a', 'b'),
 ('b', 'a', 'c'),
 ('b', 'b', 'a'),
 ('b', 'b', 'b'),
 ('b', 'b', 'c'),
 ('b', 'c', 'a'),
 ('b', 'c', 'b'),
 ('b', 'c', 'c'),
 ('c', 'a', 'a'),
 ('c', 'a', 'b'),
 ('c', 'a', 'c'),
 ('c', 'b', 'a'),
 ('c', 'b', 'b'),
 ('c', 'b', 'c'),
 ('c', 'c', 'a'),
 ('c', 'c', 'b'),
 ('c', 'c', 'c')]

In [14]:
temp_list = list(product(range(1,7), repeat=4))

In [15]:
len(temp_list)

1296

In [16]:
arr = np.array(temp_list)

In [17]:
arr.shape

(1296, 4)

In [19]:
(arr.max(axis=1) == 5).sum()/arr.shape[0]

0.2847222222222222

In [20]:
(arr.max(axis=1) == 5).mean()

0.2847222222222222

In [21]:
(5/6)**4-(4/6)**4

0.2847222222222223

## `any` and `all`

Make a 100 row, 4 column NumPy array `arr` of random real numbers between 0 and 1.

In [2]:
rng = np.random.default_rng()

In [4]:
arr = rng.random(size=(100,4))

* Find the sub-array of `arr` containing all rows in which at least one number is bigger than 0.9.

In [6]:
arr[:5]

array([[0.02679582, 0.39964722, 0.38608334, 0.9557704 ],
       [0.62903247, 0.17981028, 0.48643802, 0.16059557],
       [0.49612872, 0.34270062, 0.00862007, 0.77873829],
       [0.55550408, 0.37485681, 0.20417908, 0.82782464],
       [0.51663663, 0.85479878, 0.48625127, 0.88475327]])

In [7]:
arr[:5] > 0.9

array([[False, False, False,  True],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False]])

In [8]:
(arr[:5] > 0.9).any(axis=1)

array([ True, False, False, False, False])

In [9]:
(arr[:5] > 0.9).any(axis=1).sum()

1

In [10]:
arr[:5][(arr[:5] > 0.9).any(axis=1)]

array([[0.02679582, 0.39964722, 0.38608334, 0.9557704 ]])

In [11]:
arr[(arr > 0.9).any(axis=1)]

array([[0.02679582, 0.39964722, 0.38608334, 0.9557704 ],
       [0.89128322, 0.97335335, 0.1821295 , 0.52423414],
       [0.4629412 , 0.71844233, 0.91362938, 0.04770196],
       [0.61030793, 0.15565872, 0.15616296, 0.9572168 ],
       [0.45523301, 0.85912337, 0.34660356, 0.91882733],
       [0.14055923, 0.86443333, 0.95413845, 0.94380163],
       [0.07850217, 0.96370185, 0.21801727, 0.22118187],
       [0.10730651, 0.56976149, 0.95367251, 0.7782287 ],
       [0.95705318, 0.11012904, 0.1380347 , 0.67057015],
       [0.95930433, 0.06501316, 0.67658178, 0.20159313],
       [0.2457492 , 0.67240127, 0.94358665, 0.90993476],
       [0.37975889, 0.92498652, 0.99327127, 0.7283782 ],
       [0.37093209, 0.45237422, 0.91160131, 0.96612184],
       [0.52254037, 0.19969189, 0.94795525, 0.15420271],
       [0.90339743, 0.59821255, 0.62045249, 0.54939576],
       [0.48895101, 0.52105202, 0.96297772, 0.45230437],
       [0.35246537, 0.32415082, 0.86599985, 0.98355888],
       [0.32443071, 0.94130683,

* Find the sub-array of `arr` containing all rows in which no numbers are between 0.4 and 0.6.

In [12]:
((arr < 0.4) | (arr > 0.6)).all(axis=1)

array([ True, False, False, False, False, False, False, False, False,
       False, False,  True, False,  True,  True, False,  True,  True,
       False, False, False,  True,  True,  True,  True,  True,  True,
        True,  True, False, False, False, False, False, False, False,
       False, False,  True, False, False,  True, False,  True,  True,
       False, False, False, False, False,  True,  True,  True, False,
        True,  True, False,  True, False, False, False, False, False,
        True,  True,  True, False, False, False,  True, False, False,
        True, False,  True,  True, False, False, False, False, False,
        True, False, False, False, False, False, False,  True, False,
        True, False, False, False,  True,  True, False, False,  True,
       False])

In [13]:
len(((arr < 0.4) | (arr > 0.6)).all(axis=1))

100

In [14]:
arr[((arr < 0.4) | (arr > 0.6)).all(axis=1)]

array([[0.02679582, 0.39964722, 0.38608334, 0.9557704 ],
       [0.61030793, 0.15565872, 0.15616296, 0.9572168 ],
       [0.64463239, 0.62136197, 0.83285561, 0.69102624],
       [0.19806438, 0.28789025, 0.16912325, 0.71827727],
       [0.14055923, 0.86443333, 0.95413845, 0.94380163],
       [0.07850217, 0.96370185, 0.21801727, 0.22118187],
       [0.95705318, 0.11012904, 0.1380347 , 0.67057015],
       [0.6703015 , 0.81869015, 0.31666259, 0.67068789],
       [0.74644917, 0.06370154, 0.34252816, 0.06391344],
       [0.06099061, 0.08149432, 0.29021873, 0.85693158],
       [0.80698662, 0.31211537, 0.68117698, 0.78836747],
       [0.95930433, 0.06501316, 0.67658178, 0.20159313],
       [0.2457492 , 0.67240127, 0.94358665, 0.90993476],
       [0.37975889, 0.92498652, 0.99327127, 0.7283782 ],
       [0.35246537, 0.32415082, 0.86599985, 0.98355888],
       [0.32443071, 0.94130683, 0.86154932, 0.95131523],
       [0.75210216, 0.38678934, 0.19343926, 0.65172332],
       [0.75785393, 0.97896958,

## From binary to decimal, version 1

$$
\begin{pmatrix} 1 & 0 & 1 & 0 \\ 0 & 1 & 0 & 0 \\ 1 & 1 & 1 & 1 \end{pmatrix} \mapsto \begin{pmatrix} 10 \\ 4 \\ 15 \end{pmatrix}
$$

* Write a function `to_bin` which takes in an m-by-n NumPy array of 0s and 1s, and as output returns a length m NumPy array of the corresponding integers, where we think of each row as representing the binary digits of an integer.

In [2]:
rng = np.random.default_rng()

In [3]:
arr = rng.integers(2, size=(10,4))

In [4]:
m,n = arr.shape

In [5]:
m

10

In [6]:
n

4

In [7]:
np.arange(n-1, -1, -1)

array([3, 2, 1, 0])

In [8]:
2**np.arange(n-1, -1, -1)

array([8, 4, 2, 1])

In [12]:
(arr*(2**np.arange(n-1, -1, -1))).sum(axis=1)

array([12,  7, 11, 15,  4,  1,  0, 12, 14,  9])

In [13]:
def to_bin(arr):
    _,n = arr.shape
    return (arr*(2**np.arange(n-1, -1, -1))).sum(axis=1)

In [14]:
to_bin(arr)

array([12,  7, 11, 15,  4,  1,  0, 12, 14,  9])

In [15]:
to_bin(np.array([[1,0],[1,1]]))

array([2, 3])

## From binary to decimal, version 2

$$
\begin{pmatrix} 1 & 0 & 1 & 0 \\ 0 & 1 & 0 & 0 \\ 1 & 1 & 1 & 1 \end{pmatrix} \mapsto \begin{pmatrix} 10 \\ 4 \\ 15 \end{pmatrix}
$$

* Write a new function, `to_bin2`, which takes in an m-by-n NumPy array of 0s and 1s, and as output returns a length m NumPy array of the corresponding integers, where we think of each row as representing the binary digits of an integer.

In [2]:
rng = np.random.default_rng()
arr = rng.integers(2, size=(10,4))

In [3]:
arr

array([[0, 1, 1, 1],
       [0, 0, 1, 0],
       [1, 0, 0, 1],
       [1, 0, 1, 1],
       [0, 1, 1, 1],
       [1, 0, 0, 1],
       [0, 1, 0, 1],
       [0, 0, 1, 0],
       [1, 1, 0, 0],
       [0, 0, 1, 0]])

In [4]:
int("0111")

111

In [5]:
int("0111", 2)

7

In [6]:
int("0111", 10)

111

In [7]:
z = np.array([0,1,1,1])

In [8]:
z = arr[0]

In [9]:
z

array([0, 1, 1, 1])

In [10]:
[x for x in z]

[0, 1, 1, 1]

In [11]:
[str(x) for x in z]

['0', '1', '1', '1']

In [12]:
sum([str(x) for x in z])

TypeError: unsupported operand type(s) for +: 'int' and 'str'

In [13]:
''.join([str(x) for x in z])

'0111'

In [14]:
def helper(z):
    return ''.join([str(x) for x in z])

In [15]:
helper(arr[3])

'1011'

In [16]:
arr[3]

array([1, 0, 1, 1])

In [17]:
arr

array([[0, 1, 1, 1],
       [0, 0, 1, 0],
       [1, 0, 0, 1],
       [1, 0, 1, 1],
       [0, 1, 1, 1],
       [1, 0, 0, 1],
       [0, 1, 0, 1],
       [0, 0, 1, 0],
       [1, 1, 0, 0],
       [0, 0, 1, 0]])

In [18]:
def helper2(z):
    temp = ''.join([str(x) for x in z])
    return int(temp, 2)

In [19]:
helper2(arr[3])

11

In [20]:
def to_bin2(A):
    return np.apply_along_axis(helper2, axis=1, arr=A)

In [21]:
to_bin2(A)

NameError: name 'A' is not defined

In [22]:
arr

array([[0, 1, 1, 1],
       [0, 0, 1, 0],
       [1, 0, 0, 1],
       [1, 0, 1, 1],
       [0, 1, 1, 1],
       [1, 0, 0, 1],
       [0, 1, 0, 1],
       [0, 0, 1, 0],
       [1, 1, 0, 0],
       [0, 0, 1, 0]])

In [23]:
to_bin2(arr)

array([ 7,  2,  9, 11,  7,  9,  5,  2, 12,  2])

## Raising errors

* Edit the `to_bin` function so that it raises an error if the input is not a NumPy array and it also raises an error if an entry is not 0 or 1.

In [2]:
rng = np.random.default_rng()
arr = rng.integers(2, size=(10,4))

In [4]:
arr

array([[1, 1, 1, 1],
       [0, 0, 1, 1],
       [1, 0, 1, 1],
       [1, 1, 1, 0],
       [0, 0, 1, 1],
       [0, 1, 0, 1],
       [1, 1, 0, 1],
       [1, 1, 1, 1],
       [1, 1, 0, 0],
       [0, 0, 1, 1]])

In [5]:
type(arr)

numpy.ndarray

In [6]:
isinstance(arr, np.ndarray)

True

In [7]:
isinstance(arr, list)

False

In [8]:
def to_bin(arr):
    if not isinstance(arr, np.ndarray):
        raise TypeError("Input should be a NumPy array")
    _,n = arr.shape
    return (arr*(2**np.arange(n-1, -1, -1))).sum(axis=1)

In [9]:
to_bin(arr)

array([15,  3, 11, 14,  3,  5, 13, 15, 12,  3])

In [10]:
to_bin(list(arr))

TypeError: Input should be a NumPy array

In [11]:
arr == 0

array([[False, False, False, False],
       [ True,  True, False, False],
       [False,  True, False, False],
       [False, False, False,  True],
       [ True,  True, False, False],
       [ True, False,  True, False],
       [False, False,  True, False],
       [False, False, False, False],
       [False, False,  True,  True],
       [ True,  True, False, False]])

In [12]:
arr == 1

array([[ True,  True,  True,  True],
       [False, False,  True,  True],
       [ True, False,  True,  True],
       [ True,  True,  True, False],
       [False, False,  True,  True],
       [False,  True, False,  True],
       [ True,  True, False,  True],
       [ True,  True,  True,  True],
       [ True,  True, False, False],
       [False, False,  True,  True]])

In [13]:
(arr == 0) | (arr == 1)

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [14]:
((arr == 0) | (arr == 1)).all()

True

In [15]:
def to_bin(arr):
    if not isinstance(arr, np.ndarray):
        raise TypeError("Input should be a NumPy array")
    if not ((arr == 0) | (arr == 1)).all():
        raise ValueError("All entries should be 0 or 1")
    _,n = arr.shape
    return (arr*(2**np.arange(n-1, -1, -1))).sum(axis=1)

In [16]:
to_bin(arr)

array([15,  3, 11, 14,  3,  5, 13, 15, 12,  3])

In [17]:
arr2 = arr

In [18]:
arr2[5,2] = 6

In [19]:
arr2

array([[1, 1, 1, 1],
       [0, 0, 1, 1],
       [1, 0, 1, 1],
       [1, 1, 1, 0],
       [0, 0, 1, 1],
       [0, 1, 6, 1],
       [1, 1, 0, 1],
       [1, 1, 1, 1],
       [1, 1, 0, 0],
       [0, 0, 1, 1]])

In [20]:
to_bin(arr2)

ValueError: All entries should be 0 or 1

In [21]:
to_bin(arr)

ValueError: All entries should be 0 or 1

In [22]:
arr

array([[1, 1, 1, 1],
       [0, 0, 1, 1],
       [1, 0, 1, 1],
       [1, 1, 1, 0],
       [0, 0, 1, 1],
       [0, 1, 6, 1],
       [1, 1, 0, 1],
       [1, 1, 1, 1],
       [1, 1, 0, 0],
       [0, 0, 1, 1]])

In [23]:
arr3 = arr.copy()

In [24]:
arr3

array([[1, 1, 1, 1],
       [0, 0, 1, 1],
       [1, 0, 1, 1],
       [1, 1, 1, 0],
       [0, 0, 1, 1],
       [0, 1, 6, 1],
       [1, 1, 0, 1],
       [1, 1, 1, 1],
       [1, 1, 0, 0],
       [0, 0, 1, 1]])

In [25]:
arr3[6,::2] = 99

In [26]:
arr3

array([[ 1,  1,  1,  1],
       [ 0,  0,  1,  1],
       [ 1,  0,  1,  1],
       [ 1,  1,  1,  0],
       [ 0,  0,  1,  1],
       [ 0,  1,  6,  1],
       [99,  1, 99,  1],
       [ 1,  1,  1,  1],
       [ 1,  1,  0,  0],
       [ 0,  0,  1,  1]])

In [27]:
arr

array([[1, 1, 1, 1],
       [0, 0, 1, 1],
       [1, 0, 1, 1],
       [1, 1, 1, 0],
       [0, 0, 1, 1],
       [0, 1, 6, 1],
       [1, 1, 0, 1],
       [1, 1, 1, 1],
       [1, 1, 0, 0],
       [0, 0, 1, 1]])