# NumPy

In [None]:
import numpy as np

In [None]:
np.__version__

In [None]:
np?

documentation, book numpy chapter, tutorials http://www.numpy.org

## compare list and numpy array

In [None]:
example_list_int = list(range(10000))
example_str = [str(c) for c in example_list_int]
example_different_types = [3, True, 'a', 1.2, '\U0001F914', [1,2,3]]

In [None]:
print(type(example_list_int))
print(type(example_str))
print(type(example_different_types))
print(type(example_list_int[0]))
print(type(example_str[0]))
[type(item) for item in example_different_types]

In [None]:
example_array = np.array(example_list_int)

In [None]:
type(example_array)

### memory consumption

In [None]:
import sys

sys.getsizeof(example_list_int)

In [None]:
sys.getsizeof(example_array)

In [None]:
example_array.dtype

In [None]:
smaller_array = np.array(range(10000), dtype='int32')

In [None]:
sys.getsizeof(smaller_array)

In [None]:
float_array = np.array(range(10000), dtype='float32')

In [None]:
sys.getsizeof(float_array)

In [None]:
example_arange = np.arange(10000)
example_arange

In [None]:
sys.getsizeof(example_arange)

In [None]:
example_arange.itemsize

In [None]:
a16 = np.array(example_arange, dtype='int16')
a16.itemsize

In [None]:
a16.nbytes

We can save some space, depending on the data type, but only by a small factor. What about time?

### time

In [None]:
def compute_squares_list(l):
    squares = list(l)
    for index, value in enumerate(l):
        squares[index] = value ** 2
    return squares

In [None]:
r = 10000000
%time squares_list = compute_squares_list(list(range(r)))

In [None]:
%time squares_list_comprehension = [a ** 2 for a in range(r)]

In [None]:
def compute_squares_array(a):
    squares = np.empty(len(a))
    for index, value in enumerate(a):
        squares[index] = value ** 2
    return squares

In [None]:
%time squares_array = compute_squares_array(np.arange(r))

However, there are fast numpy operations! Details below (see universal functions)

In [None]:
%time squares_numpy = np.arange(r) ** 2

## create arrays and matrices

multidimensional array: matrix: inner arrays rows, outer arrays columns

In [None]:
np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]])

In [None]:
np.array([range(i, i + 3) for i in range(0, 10, 3)])

How do we create this without a list or a for-loop?

#### Let's start with special forms...

In [None]:
np.empty(10)

In [None]:
np.zeros(10, dtype=int)

In [None]:
np.zeros(10, dtype=bool)

In [None]:
np.ones((3, 5))

In [None]:
np.ones((3, 5), dtype=np.int16)

In [None]:
np.full(10, 4)

In [None]:
np.full((20,10), 'spam')

In [None]:
np.eye(3)

In [None]:
np.ones((2, 3, 4), dtype='int')

#### ranges, equidistant sequences, ...

In [None]:
np.arange(20)

In [None]:
np.arange(3, 20, 4)

In [None]:
np.linspace(0, 1, 5)

#### random arrays

In [None]:
np.random.random((4))

random: uniform distribution in $[0,1]$, randint(a, b, size): uniform in $[a,b-1]$, normal(a, b, size): $(a,b)$-normal distribution, 

In [None]:
np.random.randint(-1, 2, (3,4))

Note: These are pseudo-random numbers.. select a seed: init seed(0) for reproducibility. default seed() based on time stamp 

In [None]:
np.random.seed()

In [None]:
np.random.seed(0)

In [None]:
np.random.random(4)

#### reshaping

In [None]:
a = np.arange(12)
a

In [None]:
a = a.reshape(4, 3)
a

#### access numpy arrays by indexing, slicing, ... just like lists

What are a.shape, a. size, a.ndim, a[2, 1], a[-1, -1], a[:2, 1:3], a[3], a[3, :], a[:, 1]?

In [None]:
a.shape

In [None]:
a.size

In [None]:
a.ndim

In [None]:
np.ones((2, 3, 4)).ndim

In [None]:
a[2, 1]

In [None]:
a[:2, 1:3]

In [None]:
a[3]

In [None]:
a[3, :]

In [None]:
a[:, 1]

Note that in contrast to lists, for np arrays slicing creates views, not copies!

In [None]:
a_slice = a[3, :]
a_slice

In [None]:
a_slice[2] = 0
a_slice

In [None]:
a

In [None]:
a_slice is a[3, :]

In [None]:
id(a[3, :])

In [None]:
id(a_slice)

In [None]:
a_slice_copy = a[3, :].copy()

In [None]:
a_slice_copy is a[3, :]

In [None]:
a_slice_copy[2] = -1
a_slice_copy

In [None]:
a

In [None]:
a.reshape(a.size, 1)

In [None]:
a[np.newaxis, :]

#### joining and splitting

In [None]:
a = np.arange(1, 4)
b = np.arange(4, 7)
a, b

In [None]:
np.concatenate([a, b])

In [None]:
np.concatenate([b, a, a])

In [None]:
a = a[:, np.newaxis]
b = b[:, np.newaxis]
a, b

In [None]:
np.concatenate([a, b], axis=0)

In [None]:
np.concatenate([a, b], axis=1)

In [None]:
np.vstack([a, b])

In [None]:
np.hstack([b, a])

In [None]:
a = np.arange(10)

In [None]:
np.split(a, [2, 5])

In [None]:
b = np.arange(12).reshape((4, 3))
b

In [None]:
first, second, third = np.hsplit(b, [1, 2])
print(second)

In [None]:
upper, lower = np.vsplit(b, [2])
print(upper)

In [None]:
np.dsplit(b[np.newaxis], [1])

## Universal Functions - computation on numpy arrays

Interpretability makes python very flexibly, but also quite slow, e.g. for loops.   vectorised operations are implemented by universal functions - they are much more efficient, especially for repeated calculations.

### array arithmetic functions: unary and binary ufuncs

What do +, -, *, /, //, **, % and - do?

In [None]:
a = np.arange(4)

In [None]:
b = a + 5

In [None]:
-b

In [None]:
a + b

In [None]:
np.add(a, b)

further bitwise functions: abs, sin, cos, tan, ..., log, log2, log10, exp, exp2, power, expm1, log1p

In [None]:
np.empty(5)

In [None]:
a / 5

In [None]:
c = np.linspace(0, np.pi, 5)
c

In [None]:
np.sin(c) ** 2 + np.cos(c) ** 2

machine precision

In [None]:
np.log2(c[1:])

In [None]:
np.log(c[1:]) / np.log(2)

further special functions: from scipy import special

### Aggregation

In [None]:
a = np.arange(5)
sum(a)

In [None]:
np.add.reduce(a)

In [None]:
r = np.random.random(10000000)
%time sum(r)
%time np.add.reduce(r)
%time np.sum(r)

In [None]:
min(a)

In [None]:
max(a)

Caution: these functions are really fast in python, but still require a linear running time depending on the array size

In [None]:
a.sum()

In [None]:
a = np.concatenate([a * 2] * 5).reshape(5, 5)

In [None]:
a

In [None]:
type(a)

In [None]:
a = np.arange(5)
np.concatenate([a, a, a, a, a])

In [None]:
np.vstack([a] * 5)

In [None]:
b = np.vstack([a, a, a, a, a])

In [None]:
b.sum()

In [None]:
b.sum(axis = 0)

In [None]:
b.sum(axis = 1)

## Statistics

In [None]:
x = np.random.random(50)
x

In [None]:
x.mean()

In [None]:
x.std()

In [None]:
x.var()

In [None]:
np.median(x)

In [None]:
np.percentile(x, 50)

In [None]:
np.percentile(x, 0)

In [None]:
np.min(x, 0)

In [None]:
np.percentile(x, 5)

binary masks for computations on partial data

In [None]:
x > 0.5

In [None]:
x[x > 0.5]

In [None]:
x[x > 0.5].mean()

In [None]:
x[(x < 0.1) | (x > 0.9)]

In [None]:
x[(x < 0.1) | (x > 0.9)].size