# Using numpy

### Acknowledgments & Credits

This lesson is adapted largely from the excellent curriculum materials by Cliburn Chan (2021) at https://github.com/cliburn/bios-823-2021/ under the MIT License.

In [1]:
import numpy as np

##  NDArray

- shape
- dtype

In [2]:
x = np.arange(12).reshape(3,4)

In [3]:
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [4]:
x.shape

(3, 4)

In [5]:
x.dtype

dtype('int64')

## Transpose creates a *view*, not a *copy*

In [6]:
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [7]:
xt = x.T

In [8]:
xt

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [9]:
x.strides

(32, 8)

In [10]:
xt.strides

(8, 32)

In [11]:
t = x[1,1]
x[1,1] = 0

In [12]:
x

array([[ 0,  1,  2,  3],
       [ 4,  0,  6,  7],
       [ 8,  9, 10, 11]])

In [13]:
xt

array([[ 0,  4,  8],
       [ 1,  0,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [14]:
x[1,1] = t

In [15]:
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [16]:
xt

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

## Indexing and slices also create views

- Views and copies

In [17]:
x[0]

array([0, 1, 2, 3])

In [18]:
x[0, :]

array([0, 1, 2, 3])

In [19]:
x[:, 1:3]

array([[ 1,  2],
       [ 5,  6],
       [ 9, 10]])

In [20]:
y = x[:]

In [21]:
y

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [22]:
y[1] = np.ones(4)

In [23]:
y

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

In [24]:
x

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

## Making copies

In [25]:
xc = x.copy()

In [26]:
xc

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

In [27]:
xc[1,:] = 0

In [28]:
xc

array([[ 0,  1,  2,  3],
       [ 0,  0,  0,  0],
       [ 8,  9, 10, 11]])

In [29]:
x

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

## Matrix multiplication

- Row vectors, column vectors and 1d arrays
- Changing shape - reshape, newaxis, ravel, squeeze, keepdims

In [30]:
x1 = np.arange(5)
x1.shape

(5,)

In [31]:
x2 = x1.reshape(-1,1)
x2.shape

(5, 1)

In [32]:
x1 @ x1.T

30

In [33]:
x2 @ x2.T

array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12],
       [ 0,  4,  8, 12, 16]])

## Conditional replacement with where

In [55]:
x

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

In [56]:
np.where(x % 2 == 0, 0, 1)

array([[0, 1, 0, 1],
       [1, 1, 1, 1],
       [0, 1, 0, 1]])

## Array creating functions

In [57]:
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [58]:
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [59]:
np.fromfunction(lambda i, j: i*3+j, (2, 3))

array([[0., 1., 2.],
       [3., 4., 5.]])

## Reductions (margins)

In [60]:
x

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

In [61]:
x.sum()

48

In [62]:
x.sum(axis=0)

array([ 9, 11, 13, 15])

In [63]:
x.sum(axis=1)

array([ 6,  4, 38])

## Broadcasting

In [64]:
x.shape

(3, 4)

In [65]:
x.sum(axis=0).shape

(4,)

In [66]:
x / x.sum(axis=0)

array([[0.        , 0.09090909, 0.15384615, 0.2       ],
       [0.11111111, 0.09090909, 0.07692308, 0.06666667],
       [0.88888889, 0.81818182, 0.76923077, 0.73333333]])

In [67]:
x.sum(axis=1).shape

(3,)

In [68]:
x.sum(axis=1, keepdims=True).shape

(3, 1)

In [69]:
x / x.sum(axis=1, keepdims=True)

array([[0.        , 0.16666667, 0.33333333, 0.5       ],
       [0.25      , 0.25      , 0.25      , 0.25      ],
       [0.21052632, 0.23684211, 0.26315789, 0.28947368]])

In [70]:
x / x.sum(axis=1)[:, None]

array([[0.        , 0.16666667, 0.33333333, 0.5       ],
       [0.25      , 0.25      , 0.25      , 0.25      ],
       [0.21052632, 0.23684211, 0.26315789, 0.28947368]])

In [71]:
x / x.sum(axis=1)[:, np.newaxis]

array([[0.        , 0.16666667, 0.33333333, 0.5       ],
       [0.25      , 0.25      , 0.25      , 0.25      ],
       [0.21052632, 0.23684211, 0.26315789, 0.28947368]])

## Playing with shape

In [72]:
x.shape

(3, 4)

In [73]:
x1 = x[:,:,None]
x2 = x[:,None,:]
x3 = x[None,:,:]

In [74]:
x1.shape

(3, 4, 1)

In [75]:
x2.shape

(3, 1, 4)

In [76]:
x3.shape

(1, 3, 4)

In [77]:
y1 = np.expand_dims(x, axis=-1)
y2 = np.expand_dims(x, axis=-2)
y3 = np.expand_dims(x, 0)

In [78]:
y1.shape

(3, 4, 1)

In [79]:
y2.shape

(3, 1, 4)

In [80]:
y3.shape

(1, 3, 4)

In [81]:
np.all(x1 == y1), np.all(x2 == y2), np.all(x3 == y3)

(True, True, True)

In [82]:
x

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

In [83]:
x1

array([[[ 0],
        [ 1],
        [ 2],
        [ 3]],

       [[ 1],
        [ 1],
        [ 1],
        [ 1]],

       [[ 8],
        [ 9],
        [10],
        [11]]])

In [84]:
x2

array([[[ 0,  1,  2,  3]],

       [[ 1,  1,  1,  1]],

       [[ 8,  9, 10, 11]]])

In [85]:
x3

array([[[ 0,  1,  2,  3],
        [ 1,  1,  1,  1],
        [ 8,  9, 10, 11]]])

In [86]:
x1.squeeze()

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

In [87]:
x2.squeeze()

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

In [88]:
x3.squeeze()

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

## Universal functions (ufunc)

In [89]:
np.sqrt(x)

array([[0.        , 1.        , 1.41421356, 1.73205081],
       [1.        , 1.        , 1.        , 1.        ],
       [2.82842712, 3.        , 3.16227766, 3.31662479]])

## Einstein summation notation

In [90]:
a = np.array([[1,2], [3,4]])
b = np.array([[3,4], [5,6], [7,8]])

In [91]:
a

array([[1, 2],
       [3, 4]])

In [92]:
b

array([[3, 4],
       [5, 6],
       [7, 8]])

In [93]:
m = np.zeros((a.shape[0], b.shape[0]))
for i, u in enumerate(a):
    for j, v in enumerate(b):
        m[i, j] = u @ v
m

array([[11., 17., 23.],
       [25., 39., 53.]])

In [94]:
np.einsum('in,jn -> ij', a, b)

array([[11, 17, 23],
       [25, 39, 53]])

## Random module (c.f. Scipy)

In [95]:
np.random.poisson(3, (2,3))

array([[2, 6, 5],
       [3, 4, 4]])

In [96]:
np.random.normal(0, 1, (2,3))

array([[-1.45242649,  1.62559815,  1.68639982],
       [-1.12724166, -0.95792063,  0.23236303]])

In [97]:
np.random.permutation(10)

array([1, 4, 0, 9, 8, 5, 2, 3, 6, 7])

In [98]:
np.random.choice(list('abc'), (4,5))

array([['a', 'c', 'a', 'b', 'a'],
       ['a', 'b', 'a', 'c', 'b'],
       ['c', 'c', 'c', 'a', 'a'],
       ['c', 'c', 'b', 'c', 'c']], dtype='<U1')

## Linear algebra submodule (c.f. Scipy)

In [99]:
x

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

In [100]:
np.linalg.svd(x)

SVDResult(U=array([[ 0.16753774,  0.97815111, -0.12309149],
       [ 0.10163439, -0.14132757, -0.98473193],
       [ 0.98061285, -0.15246943,  0.12309149]]), S=array([1.95072080e+01, 1.86248100e+00, 3.86216418e-16]), Vh=array([[ 0.40736415,  0.46622191,  0.52507967,  0.58393743],
       [-0.73079029, -0.28746675,  0.15585679,  0.59918034],
       [ 0.50052571, -0.50157992, -0.4984173 ,  0.49947151],
       [-0.22242754,  0.66963989, -0.67199717,  0.22478482]]))

In [101]:
np.linalg.lstsq(x, np.arange(3), rcond=None)

(array([ 0.21818182,  0.11818182,  0.01818182, -0.08181818]),
 array([], dtype=float64),
 2,
 array([1.95072080e+01, 1.86248100e+00, 7.04631659e-16]))

## Masked array

In [102]:
a = np.arange(20).reshape((4,5))
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [103]:
mask = np.ma.make_mask(a % 2 == 0)
mask

array([[ True, False,  True, False,  True],
       [False,  True, False,  True, False],
       [ True, False,  True, False,  True],
       [False,  True, False,  True, False]])

In [104]:
a = np.where(a % 2 != 0, np.nan, a)

In [105]:
a

array([[ 0., nan,  2., nan,  4.],
       [nan,  6., nan,  8., nan],
       [10., nan, 12., nan, 14.],
       [nan, 16., nan, 18., nan]])

In [106]:
np.sum(a)

nan

In [107]:
np.sum(a[mask])

90.0

## Memory mapping

When you are working with arrays that are too large to fit in memory, you can use `memmap` to map an array on disk.

In [108]:
fp = np.memmap('foo.dat', dtype=np.float64, mode='w+', shape=(10,10))

In [109]:
fp[:] = np.arange(100).reshape((10,10))

In [110]:
fp

memmap([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
        [10., 11., 12., 13., 14., 15., 16., 17., 18., 19.],
        [20., 21., 22., 23., 24., 25., 26., 27., 28., 29.],
        [30., 31., 32., 33., 34., 35., 36., 37., 38., 39.],
        [40., 41., 42., 43., 44., 45., 46., 47., 48., 49.],
        [50., 51., 52., 53., 54., 55., 56., 57., 58., 59.],
        [60., 61., 62., 63., 64., 65., 66., 67., 68., 69.],
        [70., 71., 72., 73., 74., 75., 76., 77., 78., 79.],
        [80., 81., 82., 83., 84., 85., 86., 87., 88., 89.],
        [90., 91., 92., 93., 94., 95., 96., 97., 98., 99.]])

In [111]:
del fp

In [112]:
fp1 = np.memmap('foo.dat', dtype=np.float64, shape=(10,10))

In [113]:
fp1[:5, :5]

memmap([[ 0.,  1.,  2.,  3.,  4.],
        [10., 11., 12., 13., 14.],
        [20., 21., 22., 23., 24.],
        [30., 31., 32., 33., 34.],
        [40., 41., 42., 43., 44.]])

In [114]:
fp2 = np.memmap('foo.dat', dtype=np.float64, offset=75*8, shape=(5,5))

In [115]:
fp2

memmap([[75., 76., 77., 78., 79.],
        [80., 81., 82., 83., 84.],
        [85., 86., 87., 88., 89.],
        [90., 91., 92., 93., 94.],
        [95., 96., 97., 98., 99.]])