In [None]:
# Configuration for the slideshow, DO NOT RUN if you aren't presenting this
from traitlets.config.manager import BaseJSONConfigManager
path = "/home/quickbeam/anaconda3/envs/cogsys-python-intro/etc/jupyter/nbconfig"
cm = BaseJSONConfigManager(config_dir=path)
cm.update("livereveal", {
              "start_slideshow_at": "selected",
})

# Numpy

- Basis for most scientific computing libraries
- Lots of linear algebra and random numbers
- Sacrifices some of Python's flexibility for speed
- Claim: C-like speed with Python-like simplicity

## Installation
- Comes with scientific Python distributions such as Anaconda
- Simple for Unix/Mac: `pip install numpy`
- Can be troublesome for Windows: Download from http://www.lfd.uci.edu/%7Egohlke/pythonlibs/ instead

## Arrays

The basic "unit" in numpy are arrays (think tables). Arrays can be created from "array-like" Python objects such as lists or tuples:

In [None]:
import numpy as np

np.array([1,2,3,4,5,6])

Arrays can have more than one dimension:

In [None]:
np.array([[1,2,3],[4,5,6]])

In [None]:
a = np.array([[(1,), [2]], [[3], [4]]])

Other than Python lists, you can't just put "anything" into arrays -- they have a *dtype*. A default is chosen depending on the elements:
- any element is a float: dtype float64
- all elements are integers: dtype int32
- dtype can be specified when creating the array

In [None]:
np.array([1,2.,3]).dtype

In [None]:
np.array([1,2,3], dtype="float32")

In [None]:
np.array([1.7, 2.3], dtype="int32")

There is also an *object* dtype -- with it, we can create e.g. *arrays of arrays* which is very different from a 2D array!

In [None]:
a = np.array([[1,2,3], [4,5]])
print(a)
print(a.dtype)

There are also numerous functions to create some "default" arrays (note the dtypes!!):

In [None]:
np.zeros((2, 5))

In [None]:
np.ones(7, dtype="int32")

In [None]:
np.arange(0, 18, step=0.5)

In [None]:
# this is preferred; with arange, stick to integer steps
np.linspace(0, 18, num=21)

In [None]:
a = np.asarray([[1,2,3],[4,5,6]])  # does not copy data
np.full_like(a, fill_value=12.4)

In [None]:
np.full_like(a, fill_value=12.4, dtype="float32")

Don't mix up *full* and *fill*:

In [None]:
a = np.full((3, 2), fill_value=5)
print(a)
a.fill(100)
print(a)

In [None]:
np.empty((11, 4))

## Array Attributes

We have already seen the `dtype` attribute. Here are some more important ones:

In [None]:
big_array = np.zeros((12, 9, 11, 24, 7))
big_array.ndim

In [None]:
big_array.shape

In [None]:
big_array.size

In [None]:
big_array.itemsize

The shape of an array can be modified after creation:

In [None]:
a = np.array([[1,2,3],[4,5,6]])
print(a.shape)
a_r = a.reshape((6, 1))
print(a_r.shape)
print(a.shape)

In [None]:
a.resize((3, 2))
print(a.shape)
print(a)

In [None]:
print(a.ravel())
print(a.ravel(order="F"))

In [None]:
a.reshape((100,3))

In [None]:
a.reshape((-1, 3, 2, 1, 1, 1, 2, 1))

## Combining arrays

The following operations require copying the array data every time they are called which can lead to massive run time increases. Try to use them only with list comprehensions, or *preallocate* an array with the correct shape and then fill it instead.

In [None]:
a = np.array([[1,2,3],[4,5,6]])
b = np.array([[7,8,9]])
np.concatenate((a,b))

In [None]:
np.concatenate((a,b), axis=1)

In [None]:
c = np.array([[7,8,9], [10,11,12]])
np.stack(a,c)

In [None]:
np.stack((a,c), axis=0)

In [None]:
np.stack((a,c), axis=1)

In [None]:
np.stack((a,c), axis=2)

Don't use: `vstack, hstack, dstack`

In [None]:
np.append(a, [7,8,9])

In [None]:
np.append(a, [[7,8,9]], axis=0)

In [None]:
np.append(a, [[7],[8]], axis=1)

## More **FUN** Array Operations

In [None]:
np.tile(a, reps=[2,4])

In [None]:
np.repeat(np.arange(5), 3)

In [None]:
np.repeat(a, 2, axis=1)

In [None]:
np.flip(a, axis=1)

In [None]:
np.roll(a, shift=2, axis=1)

In [None]:
why = np.arange(20)[:, None, None, None, None].repeat(2, axis=3)
why.shape

In [None]:
why.squeeze().shape

## Indexing

Indexing works a lot like Python lists, but is somewhat more complex.

In [None]:
a1 = np.arange(20)
a2 = a1.reshape((4, 5))
a1 = a1[::-1]
print(a1)
print(a2)

In [None]:
a1[0]

In [None]:
a2[0]

In [None]:
a2[1,3]

In [None]:
a2[:, 1:4]

In [None]:
a2[..., 0]

In [None]:
a1[0:1]

*Slicing* arrays as above returns *views* of the array, meaning that **no data is copied**!

In [None]:
orig = np.array([[1,2,3],[4,5,6]])
a_slice = orig[:, 1]
print(a_slice)

In [None]:
a_slice[0] = 10999
print(a_slice)
print(orig)

### Advanced Indexing

#### Index arrays

In [None]:
inds = [3, 3, 2, 4, 1]
a1[inds]

In [None]:
a2[[0, 3, 1, 1]]

In [None]:
a2[[0, 3, 1, 1], [2, 3, 1, 2]]

In [None]:
a2[[0, 3, 1, 1], 0]

Note the difference!!

In [None]:
print("Basic:")
print(a2[0, 1])
print("Advanced:")
print(a2[[0, 1]])

Be *very* careful with tuples:

In [None]:
a2[(0, 1)]  # basic indexing!

Super-advanced:

In [None]:
a2[[[0, 0,], [3, 3]], [[0, 4], [0, 4]]]

#### Boolean Indices

In [None]:
bools = a1 > 10
bools

In [None]:
a1[bools]

In [None]:
bools2 = a2 % 2 == 0
print(bools2)

In [None]:
a2[bools2]

In [None]:
a2[np.array([True, True, False])]

Using index arrays or boolean indices *copies* the data!

In [None]:
orig = np.array([[1,2,3],[4,5,6]])
a_slice = orig[[0, 1], [1, 1]]
print(a_slice)

In [None]:
a_slice[0] = 10999
print(a_slice)
print(orig)

However, we can still use index/boolean arrays for modifying arrays:

In [None]:
orig[[0, 1], [1, 1]] = 2000
print(orig)

In [None]:
orig[:, 1] = [500, -31]
print(orig)

In [None]:
orig[orig > 5] = 0
print(orig) 

## Array Math and Broadcasting

Most operations involving arrays are *element-wise*. This includes multiplication!

In [None]:
a = np.array([[1,2,3],[4,5,6]])
b = np.array([[6,5,4],[3,2,1]])
a + b

In [None]:
a * b

What happens in these cases?

In [None]:
a * 5

In [None]:
a + 5

Turns out arrays are automatically *broadcast* to compatible dimensions, if possible. Let's we what we can do with this.

In [None]:
a + np.array([1, 2])

In [None]:
a + np.array([1,2,3])

In [None]:
a + np.array([[1], [2]])

In [None]:
a = np.arange(1,6)
# all these result in the same:
b = a[:, None]
b = a[:, np.newaxis]
b = a.reshape(a.shape + (1,))
print(a)
print(b)

In [None]:
a*b

In [None]:
a*b.reshape((1, 5))

## Common Array Operations

Many common mathematical functions are implemented already (again, generally applied element-wise).

In [None]:
a = np.array([[1,2,3],[4,5,6]])
b = np.array([[6,5,4],[3,2,1]])
np.exp(a)  # or sin, cos, ...

Matrix multiplication requires us to call a function.

In [None]:
np.dot(a, b)

In [None]:
b.T

In [None]:
np.dot(a, b.T)

In [None]:
inner_prod = np.dot([1,2,3], [4,5,6])
print(inner_prod)
print(type(inner_prod))
print(inner_prod.shape)

There are many summary operations for arrays:

In [None]:
a.max()

In [None]:
a.max(axis=0)

In [None]:
a.max(axis=1, keepdims=True)

Many functions can be called as array methods or as "regular" functions:

In [None]:
print(np.max(a))
print(np.reshape(a, (1, 6)))

Other important functions: min(), mean(), sum(), argmax()...

## Logic

We've seen that comparison operators such as > work fine element-wise. What about boolean logic?

In [None]:
a = np.array([True, True, False])
b = np.array([False, True, True])

In [None]:
a and b

Turns out Python tries to convert the arrays to scalar truth values, but it's unclear how this should be done.

In [None]:
np.logical_and(a, b)

## Comparing arrays

In [None]:
a = np.array([1,2,3])
b = np.array([1,2,3])
a == b

In [None]:
np.all(a == b)

In [None]:
a = np.array([0.3, 0.3], dtype="float64")
b = np.array([0.1, 0.1], dtype="float64")
b = 3*b
np.all(a == b)

In [None]:
print(a)
print(b)

Never use "true" equality for floating point numbers!! This applies to all programming in all languages.

In [None]:
np.allclose(a, b)

## Random numbers

In [None]:
np.random.standard_normal((5, 3))

In [None]:
np.random.uniform(-2, 3, (2, 4))

In [None]:
np.random.seed(5)
np.random.uniform(-2, 3, (2,4))

In [None]:
# quick access to uniform [0, 1)
np.random.random((3,))

In [None]:
np.random.rand(3,1)

In [None]:
np.random.randint(2)

In [None]:
np.random.randint(-2, 3, (3, 5))

In [None]:
a = np.arange(20)
np.random.choice(a)

In [None]:
np.random.choice(20, size=10)

In [None]:
np.random.choice(a, size=10, replace=False)

In [None]:
np.random.choice([0, 100], size=20, replace=True, p=[0.9, 0.1])