# Introduction to NumPy

Tamás Gál (tamas.gal@fau.de)

The latest version of this notebook is available at [https://github.com/Asterics2020-Obelics](https://github.com/Asterics2020-Obelics/School2019/tree/master/numpy)

In [1]:
import numpy as np
import sys

print("Python version: {0}\n"
      "NumPy version: {1}"
      .format(sys.version, np.__version__))

Python version: 3.7.2 (default, Jan 10 2019, 10:02:28) 
[GCC 8.2.1 20181127]
NumPy version: 1.16.2


In [2]:
def describe(np_obj):
    """Print some information about a NumPy object"""
    print("object type: {0}\n"
          "size: {o.size}\n"
          "ndim: {o.ndim}\n"
          "shape: {o.shape}\n"
          "dtype: {o.dtype}"
          .format(type(np_obj), o=np_obj))

In [3]:
from IPython.core.magic import register_line_magic

@register_line_magic
def shorterr(line):
    """Show only the exception message if one is raised."""
    try:
        output = eval(line)
    except Exception as e:
        print("\x1b[31m\x1b[1m{e.__class__.__name__}: {e}\x1b[0m".format(e=e))
    else:
        return output
    
del shorterr

## The basic datastructure in NumPy: `ndarray`

In [4]:
a = np.array([1, 2, 3, 4, 5, 6])
a

array([1, 2, 3, 4, 5, 6])

In [5]:
type(a)

numpy.ndarray

### Array properties

In [6]:
a.size  # number of elements

6

In [7]:
a.ndim

1

In [8]:
a.shape

(6,)

In [9]:
a.dtype

dtype('int64')

### Multi-Dimensional Arrays

In [10]:
b = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
b

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [11]:
describe(b)

object type: <class 'numpy.ndarray'>
size: 10
ndim: 2
shape: (2, 5)
dtype: int64


### Array Methods

In [12]:
a.min(), a.max(), a.mean(), a.sum()

(1, 6, 3.5, 21)

In [13]:
b

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [14]:
b.sum()

55

In [15]:
b.sum(axis=0)

array([ 7,  9, 11, 13, 15])

In [16]:
b.sum(axis=1)

array([15, 40])

## Operations with Arrays

In [17]:
a

array([1, 2, 3, 4, 5, 6])

In [18]:
a - 42

array([-41, -40, -39, -38, -37, -36])

In [19]:
a * 42 / np.pi

array([13.36901522, 26.73803044, 40.10704566, 53.47606088, 66.8450761 ,
       80.21409132])

In [20]:
a**np.e, np.e**a

(array([  1.        ,   6.58088599,  19.81299075,  43.30806043,
         79.43235917, 130.38703324]),
 array([  2.71828183,   7.3890561 ,  20.08553692,  54.59815003,
        148.4131591 , 403.42879349]))

In [21]:
a * a  # element-wise

array([ 1,  4,  9, 16, 25, 36])

In [22]:
a @ a  # use np.dot(a, a) if you are using < Python 3.5

91

In [23]:
a

array([1, 2, 3, 4, 5, 6])

In [24]:
a < 3

array([ True,  True, False, False, False, False])

In [25]:
a == 4

array([False, False, False,  True, False, False])

In [26]:
(a > 3) & (a < 5)  # bitwise AND

array([False, False, False,  True, False, False])

In [27]:
a < np.array([2, 3, 5, 2, 1, 5])

array([ True,  True,  True, False, False, False])

In [28]:
np.sum(a > 2)

4

## Basic Indexing and Slicing

In [29]:
a[0]  # indexing starts at 0

1

In [30]:
a[-1]  # -1 refers to the last element

6

In [31]:
a[2:6:3]  # just like in Python: [start:end:step]

array([3, 6])

In [32]:
a[::-1]  # reversing an array

array([6, 5, 4, 3, 2, 1])

In [33]:
b[::-1]  # reverses axis 0

array([[ 6,  7,  8,  9, 10],
       [ 1,  2,  3,  4,  5]])

### Indixing and Slicing in Multiple Dimensions

In [34]:
b

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [35]:
b[0, 2]

3

In [36]:
b[0, 1:4]

array([2, 3, 4])

In [37]:
b[:, 1:4]  # the `:` selects the whole axis

array([[2, 3, 4],
       [7, 8, 9]])

In [38]:
b[:, 2:5:2]

array([[ 3,  5],
       [ 8, 10]])

In [39]:
b[::-1, ::-1]  # reverses both axes

array([[10,  9,  8,  7,  6],
       [ 5,  4,  3,  2,  1]])

### Advanced Indexing

In [40]:
d = np.array([4, 3, 2, 5, 4, 5, 4, 4])

In [41]:
mask = np.array([True, False, False, True, False, False, True, True])
mask

array([ True, False, False,  True, False, False,  True,  True])

In [42]:
d[mask]

array([4, 5, 4, 4])

In [43]:
d[[1, 3, 1, 6]]

array([3, 5, 3, 4])

#### Be careful with boolean indexing, the mask has to be a boolean array or a list of booleans.

In [44]:
d

array([4, 3, 2, 5, 4, 5, 4, 4])

In [45]:
d[[False, True, False, False, True, False, False, True]]

array([3, 4, 4])

In [46]:
d[[0, 1, 0, 0, 1, 0, 0, 1]]  # although we know that True==1 and False==0

array([4, 3, 4, 4, 3, 4, 4, 3])

In [47]:
d[np.array([0, 1, 0, 0, 1, 0, 0, 1], dtype=bool)] 

array([3, 4, 4])

## The `dtype`

In [48]:
np.dtype

numpy.dtype

In [49]:
a, a.dtype

(array([1, 2, 3, 4, 5, 6]), dtype('int64'))

In [50]:
e = a * 42 / np.pi  # NumPy will choose the "right" `dtype` automatically
e, e.dtype

(array([13.36901522, 26.73803044, 40.10704566, 53.47606088, 66.8450761 ,
        80.21409132]), dtype('float64'))

### Some Basic `dtype`s

In [51]:
np.dtype('f')

dtype('float32')

In [52]:
np.dtype('f8')

dtype('float64')

In [53]:
np.dtype('i')

dtype('int32')

In [54]:
np.dtype('i2')

dtype('int16')

In [55]:
np.dtype('c16')

dtype('complex128')

In [56]:
np.dtype('S8')  # String with a fixed length of 8

dtype('S8')

### Properties of `dtype`s

In [57]:
dt = np.dtype('>i4')

In [58]:
dt.byteorder  # endinanness: 

'>'

In [59]:
dt.itemsize

4

In [60]:
dt.name

'int32'

### Structured `dtypes`

In [61]:
dt = np.dtype([('x', 'f8'), ('y', 'f8'), ('E', 'i4')])

In [62]:
dt.itemsize

20

In [63]:
dt['x']

dtype('float64')

In [64]:
np.dtype("i4, (3,4)f8, c8")  # three fields, second field has shape (3, 4)

dtype([('f0', '<i4'), ('f1', '<f8', (3, 4)), ('f2', '<c8')])

### Using `dtype`s

In [65]:
np.array([1, 2, 3], dtype='c8')

array([1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64)

In [66]:
dt = np.dtype([('x', 'f8'), ('y', 'f8'), ('E', 'i4')])

In [67]:
f = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], dtype=dt)
f

array([(1., 2., 3), (4., 5., 6), (7., 8., 9)],
      dtype=[('x', '<f8'), ('y', '<f8'), ('E', '<i4')])

In [68]:
f['x']

array([1., 4., 7.])

In [69]:
f['E']

array([3, 6, 9], dtype=int32)

In [70]:
f[2]['y']

8.0

## Helper Functions to Create Arrays

In [71]:
np.arange(7)

array([0, 1, 2, 3, 4, 5, 6])

In [72]:
np.ones(10)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [73]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [74]:
np.zeros((2, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [75]:
np.empty(20)

array([6.94317128e-310, 6.94317128e-310, 1.12465777e-312, 9.76118064e-313,
       1.14587773e-312, 1.14587773e-312, 1.01855798e-312, 6.94295803e-310,
       0.00000000e+000, 1.74405173e-321, 6.94317128e-310, 6.94317128e-310,
       1.58101007e-322, 9.88131292e-324, 6.94311300e-310, 1.50690022e-321,
       6.94317128e-310, 6.94317128e-310, 9.88131292e-324, 6.94311302e-310])

In [76]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [77]:
np.linspace(1, 2, 10)

array([1.        , 1.11111111, 1.22222222, 1.33333333, 1.44444444,
       1.55555556, 1.66666667, 1.77777778, 1.88888889, 2.        ])

In [78]:
np.ones_like(b)

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [79]:
np.ones(10, dtype='i2')

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int16)

### Random numbers

In [80]:
np.random.randint(1, 10, (2, 20))

array([[1, 9, 6, 9, 8, 4, 4, 9, 7, 8, 4, 5, 1, 2, 6, 9, 9, 7, 9, 3],
       [1, 6, 4, 5, 2, 9, 9, 1, 3, 1, 6, 1, 8, 2, 4, 5, 7, 7, 4, 8]])

In [81]:
np.random.random((3, 4))

array([[0.31914911, 0.73054886, 0.0054907 , 0.67161179],
       [0.13733134, 0.77304415, 0.76704027, 0.51065177],
       [0.43307331, 0.67111914, 0.73950112, 0.4934674 ]])

In [82]:
np.random.uniform(0, 5, 10)

array([1.73923588, 0.22299789, 0.44587293, 2.49749704, 4.53945784,
       2.40514761, 3.1716149 , 4.95584304, 1.80844867, 1.10598608])

## Broadcasting

In [83]:
g = np.array([1, 2, 3, 4])
h = np.array([5, 6, 7, 8])
g * h  # if the shapes match, operations are usually done element-by-element

array([ 5, 12, 21, 32])

In [84]:
g * 23  # as we have already seen, the rule relaxes when the shapes meet certain constraints

array([23, 46, 69, 92])

### Broadcasting rules
- NumPy compares the shapes element-wise, starting with the trailing dimension
- two dimensions are compatible if they are equal or one of them is __1__
- raises a `ValueError: frames are not aligned` if the shapes are incompatible
- the size of a successfully broadcasted array is the maximus size along each dimension of the input arrays

### Operation on two arrays with different shapes
```
A      (4d array):  5 x 1 x 4 x 1
B      (3d array):      7 x 1 x 5
Result (4d array):  5 x 7 x 4 x 5
```

In [85]:
arr_1 = np.array([[1, 2, 3], [4, 5, 6]])
arr_2 = np.array([[1], [2]])

print('arr_1 shape:', arr_1.shape)
print('arr_2 shape:', arr_2.shape)

arr_3 = arr_1 + arr_2
print('arr_3 shape:', arr_3.shape)

arr_3

arr_1 shape: (2, 3)
arr_2 shape: (2, 1)
arr_3 shape: (2, 3)


array([[2, 3, 4],
       [6, 7, 8]])

In [86]:
i = np.arange(20).reshape(4, 5)
i

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [87]:
describe(i)

object type: <class 'numpy.ndarray'>
size: 20
ndim: 2
shape: (4, 5)
dtype: int64


In [88]:
i * np.array([0, 1, 2, 4, 5])

array([[ 0,  1,  4, 12, 20],
       [ 0,  6, 14, 32, 45],
       [ 0, 11, 24, 52, 70],
       [ 0, 16, 34, 72, 95]])

In [89]:
j = np.array([0, 10, 20, 30])
k = np.array([7, 8, 9])

In [90]:
%shorterr j+k

[31m[1mValueError: operands could not be broadcast together with shapes (4,) (3,) [0m


In [91]:
j[:, np.newaxis]  # inserts a new axis, making it two dimensional

array([[ 0],
       [10],
       [20],
       [30]])

In [92]:
j[:, np.newaxis] + k

array([[ 7,  8,  9],
       [17, 18, 19],
       [27, 28, 29],
       [37, 38, 39]])

## Universal Functions (`ufunc`)

#### A `ufunc` is a "vectorized" wrapper for a function that takes a fixed number of scalar inputs and produces a fixed number of scalar outputs.

NumPy provides a bunch of `ufunc`s:
- Math operations (`add()`, `subtract()`, `square()`, `log10()`, ...)
- Trigonometric functions (`sin()`, `cos()`, `tan()`, `deg2rad()`, ...)
- Bit-twiddling functions (`bitwise_and()`, `right_shift()`, ...)
- Comparison functions (`greater()`, `less_equal()`, `fmax()`, ...)
- Floating functions (`isnan()`, `isinf()`, `floor()`, ...)
    
They all are subclasses of `np.ufunc`

In [93]:
type(np.cos)  # they all are subclasses of np.ufunc

numpy.ufunc

### Create your own `ufunc` with `np.frompyfunc(func, nin, nout)`

In [94]:
m = np.random.randint(0, 100, 17)
m

array([78, 80, 95, 20,  3, 10, 57, 23, 78, 95, 43,  2, 63, 98, 61, 81,  0])

In [95]:
def step_23(x):
    return 1 if x > 23 else 0

In [96]:
%shorterr step_23(m)

[31m[1mValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()[0m


In [97]:
ustep_23 = np.frompyfunc(step_23, 1, 1)

In [98]:
ustep_23(m)

array([1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0], dtype=object)

## Views and Copies

In [99]:
n = np.arange(10)
n

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [100]:
o = n         # `o` will point to `n`
o[2] = 99
n             # changing `o` has changed `n`

array([ 0,  1, 99,  3,  4,  5,  6,  7,  8,  9])

In [101]:
p = n[5]      # single element access returns a copy
p

5

In [102]:
p = 9999
o             # o is not affected when `p` is changed

array([ 0,  1, 99,  3,  4,  5,  6,  7,  8,  9])

### Slices return (memory) views

In [103]:
q = o[2:4]    # slices return (memory) views
q

array([99,  3])

In [104]:
o[1] = 99  # changing elements of `o` are actual changes to `a`
a

array([1, 2, 3, 4, 5, 6])

In [105]:
o[3:6] = [101, 102, 103]   # changing multiple elements at once
o

array([  0,  99,  99, 101, 102, 103,   6,   7,   8,   9])

## Acknowledgements
![](images/eu_asterics.png)

This tutorial was supported by the H2020-Astronomy ESFRI and Research Infrastructure Cluster (Grant Agreement number: 653477).