# `numpy` Overview



In [None]:
!pip install --upgrade numpy



## Basics
### Import convention



In [None]:
import numpy as np



### Creating arrays from Python sequences



In [None]:
# Core data structure in numpy
?np.ndarray



In [None]:
np.ndarray((2,2), dtype=np.float)



It kinda works, but is not very convenient.

In [None]:
?np.array



In [None]:
arr = np.array([[7, 2, 3], [3, 9, 6]])



Each array has `shape`, `size`:

In [None]:
print("Array shape is", arr.shape)
print("Array size is", arr.size)
print("Array has %i dimensions" % arr.ndim)



And `dtype`, `itemsize` and `nbytes`:

In [None]:
print("Array dtype is", arr.dtype)
print("Each item in the array takes %i bytes" % arr.itemsize)
print("Array takes %i bytes" % arr.nbytes)



### Creating arrays of special shape and/or type



In [None]:
zeros_array = np.zeros((2,6), dtype=bool)
zeros_array



In [None]:
zeros_like_array = np.zeros_like(arr)
zeros_like_array



In [None]:
ones_array = np.ones((3,9), dtype=np.float32)
ones_array



In [None]:
ones_like_array = np.ones_like(zeros_array, dtype=np.float32)
ones_like_array



In [None]:
range_array = np.arange(10)
range_array



In [None]:
range_array = np.arange(-5, 5)
range_array



In [None]:
range_array = np.arange(0, 5, 2)
range_array



In [None]:
range_array = np.arange(10, 0, -2)
range_array



In [None]:
range_float_array = np.arange(-0.82, 5.32, 0.25)
range_float_array



### Basic indexing of numpy arrays



In [None]:
range_float_array[1]



In [None]:
range_float_array[:1]



In [None]:
range_float_array[:5]



In [None]:
range_float_array[-5:-2]



In [None]:
arr



In [None]:
arr[:1, 1:]



In [None]:
arr[0, 1:]



In [None]:
arr[0, ::2]



### Boolean and fancy indexing



In [None]:
random_array = np.random.randn(10)
random_array



In [None]:
random_array[random_array>0]



In [None]:
random_array[(random_array>0) | (random_array<-1)]



In [None]:
random_array[(random_array>0) & (random_array<1)]



In [None]:
ix0, = np.where(random_array>0)
random_array[ix0]



In [None]:
random_array = np.random.randn(3, 4)
random_array



In [None]:
random_array[[0,2], [2, 3]]



### Changing array shape



In [None]:
arr = np.array([[7, 2, 3], [3, 9, 6]])



In [None]:
arr.reshape((1,6))



In [None]:
arr.reshape((6,-1))



In [None]:
arr.reshape((6,1,1))



In [None]:
np.expand_dims(arr, axis=1).shape



In [None]:
arr.T



In [None]:
np.transpose(np.expand_dims(arr, axis=-1), axes=(1,2,0)).shape



### Changing array type



In [None]:
(arr>2).astype(np.int8)



In [None]:
arr.astype(np.float32)



In [None]:
arr.astype(np.complex128)



### Stacking arrays



In [None]:
arr_1 = np.random.randint(10, size=(10,))
arr_2 = np.random.randint(10, size=(10,))



In [None]:
arr_1, arr_2



In [None]:
np.vstack([arr_1, arr_2])



In [None]:
np.hstack([arr_1, arr_2])



In [None]:
np.hstack([np.expand_dims(arr_1, 1), np.expand_dims(arr_2, 1)])



### Universal functions

For a full list of universal functions, see [ufunc reference](https://docs.scipy.org/doc/numpy-1.15.1/reference/ufuncs.html).



In [None]:
arr_1



In [None]:
arr_2



In [None]:
arr_1.sum()



In [None]:
arr.sum(axis=1)



In [None]:
arr.sum(axis=1, keepdims=True)



In [None]:
arr.mean(axis=0)



In [None]:
np.add(arr_1, arr_2, where=(arr_2<6))



In [None]:
np.add(arr_1, arr_2, where=(arr_2<6), out=arr_2)



### Broadcasting



In [None]:
arr_2d = np.random.randint(10, size=(10, 3))
arr_1d_1 = np.random.randint(10, size=(3, ))
arr_1d_2 = np.random.randint(10, size=(10, ))



In [None]:
arr_2d



In [None]:
arr_1d_1



In [None]:
arr_1d_2



In [None]:
arr_2d + arr_1d_1



In [None]:
arr_2d + arr_1d_2



In [None]:
arr_2d + np.expand_dims(arr_1d_2, axis=1)



In [None]:
arr_3d = np.random.randint(10, size=(7, 10, 3))



In [None]:
arr_3d + arr_1d_1



In [None]:
arr_3d + np.expand_dims(arr_1d_2, axis=1)



Broadcasting rules:
    
- All input arrays with ndim smaller than the input array of largest ndim, have 1’s prepended to their shapes.
- The size in each dimension of the output shape is the maximum of all the input sizes in that dimension.
- An input can be used in the calculation if its size in a particular dimension either matches the output size in that dimension, or has value exactly 1.
- If an input has a dimension size of 1 in its shape, the first data entry in that dimension will be used for all calculations along that dimension. In other words, the stepping machinery of the ufunc will simply not step along that dimension (the stride will be 0 for that dimension).`

## General rules of thumb
### Avoid loops



In [None]:
def square_loop(a):
    """ Calculate square of an array in a loop, assuming 'a' is a 1d array."""
    result = np.zeros_like(a)
    for idx in range(a.shape[0]):
        result[idx] = a[idx] * a[idx]
    return result



In [None]:
large_arr = np.random.randint(100, size=(100000,))



In [None]:
%timeit -n 10 -r 3 square_loop(large_arr)



In [None]:
%timeit -n 10 -r 3 np.square(large_arr)  # numpy's implementation is vectorized, no loops inside!



### Use broadcasting when possible



In [None]:
def add_loop(a, b):
    """ Add 1d array 'b' into 2d array 'a' with a loop."""
    result = np.zeros_like(a)
    for idx in range(a.shape[0]):
        result[idx] = a[idx] + b
    return result



In [None]:
large_arr = np.random.randint(100, size=(1000,1000))
large_b = np.random.randint(100, size=(1000,))



In [None]:
%timeit -n 10 -r 3 add_loop(large_arr, large_b)



In [None]:
%timeit -n 10 -r 3 np.add(large_arr, large_b)

