In [None]:
# The following is to know when this notebook has been run and with which python version.
import time, sys
print(time.ctime())
print(sys.version.split('|')[0])

# B Numpy

This is part of the Python lecture given by Christophe Morisset at IA-UNAM.

### Import numpy first

In [None]:
# You need first to import the numpy library (must be installed on your computer ;-) )
# As it will be widely used, better to give it a nickname, or an alias. Traditionnaly, it's "np":
import numpy as np

In [None]:
print(np.__version__)

### Tutorials

http://nbviewer.ipython.org/github/jrjohansson/scientific-python-lectures/blob/master/Lecture-2-Numpy.ipynb AND
http://nbviewer.ipython.org/gist/rpmuller/5920182 AND http://www.astro.washington.edu/users/vanderplas/Astr599/notebooks/11_EfficientNumpy

### The ARRAY class

#### Create an array

In [None]:
# Easy to create a numpy array (the basic class) from a list
l = [1,2,3,4,5,6]
print(l)
a = np.array([1,2,3,4,5,6])
print(a)
print(type(a))
# Works with tuples also:
b = np.array((1,2,3))
print(b)

Numpy arrays are efficiently connected to the computer:

In [None]:
L = range(1000)
%timeit L2 = [i**2 for i in L] # Notice the use of timeit, a magic function (starts with %)
A = np.arange(1000)
%timeit A2 = A**2

In [None]:
L = [1, 2, 3, 4]
a = np.array(L)
print(a.dtype)
print(a)

In [None]:
L = [1,2,3,4.]
a = np.array(L)
print(a.dtype)
print(a)

In [None]:
L = [1,2,3,4.,'a']
a = np.array(L)
print(L) # Different types can coexist in a python list
print(a.dtype)
print(a) # NOT in a numpy array. The array is re-typed to the highest type, here string.

Once the type of an array is defined, one can insert values of type that can be transformed to the type of the array

In [None]:
a = np.array([1,2,3,4,5,6]) 
print(a)
a[4] = 2.56 # will be transformed to int(2.56)
print(a)
a[3] = '20' # will be tranformed to int('20')
print(a)

In [None]:
a[2] = '3.2'

In [None]:
a[2] = 'tralala'

#### 1D, 2D, 3D, ...

In [None]:
a = np.array([1,2,3,4,5,6])
b = np.array([[1,2],[1,4]])
c = np.array([[[1], [2]], [[3], [4]]])
print(a.shape, b.shape, c.shape)
print(a[0]) # no error

In [None]:
print(len(a), len(b), len(c)) # size of the first dimension

In [None]:
b.size

In [None]:
print(a.ndim, b.ndim, c.ndim) 

In [None]:
a = np.array([1,2,3,4,5,6])
print('mean: {0}, max: {1}, shape: {2}'.format(a.mean(), a.max(), a.shape))

mean and max are methods (functions) of the array class, they need ()s. shape is an atribute (like a variable).

In [None]:
print(a.mean) # this is printing information about the function, NOT the result of the function!

In [None]:
mm = a.mean # We assign to mn the function. Then we can call it directly, but still need for the ()s:
print(mm())

In [None]:
print(b)
print(b.mean()) # mean over the whole array
print(b.mean(axis=0)) # mean over the first axis (columns)
print(b.mean(1)) # mean over the raws
print(np.mean(b))

#### Creating arrays from scratch

In [None]:
print(np.arange(10))

In [None]:
print(np.linspace(0, 1, 10)) # start, stop (included), number of points
print('--------------------------------')
print(np.linspace(0, 1, 11)) # start, stop (included), number of points
print('--------------------------------')
print(np.linspace(0, 1, 10, endpoint=False)) # Not including the stop point

In [None]:
print(np.logspace(0, 2, 10)) # from 10**start to 10**stop, with 10 values

In [None]:
print(np.zeros(2)) # Filled with 0.0
print('--------------------------------')
print(np.zeros((2,3))) # a 2D array, also filled with 0.0
print('--------------------------------')
print(np.ones_like(a)) # This is very usefull: using an already created array (or list or tuple) as example for the shape of the new one.
print('--------------------------------')
print(np.zeros_like(a, dtype=float)+3) # Can define a value to fille the array when creating it. Or latter...
print('--------------------------------')
print(np.ones_like([1,2,3]))

In [None]:
b = a.reshape((3,2)) # This does NOT change the shape of a
print(a)
print('-------------')
print(b)

In [None]:
print(b.ravel())
print(b.reshape(b.size))

In [None]:
# create 2 2D arrays (coordinates matrices), one describing how x varies, the other for y.
x, y = np.mgrid[0:5, 0:10] # This is not a function!!! notice the []
print(x)
print('------------------------------------')
print(y)

In [None]:
# coordinates matrices using user-defined x- and y-vectors
x, y = np.meshgrid([1,2,4,7], [0.1, 0.2, 0.3])
print(x)
print('--------------------------------')
print(y)

In [None]:
x, y = np.meshgrid([1,2,4,7], [0.1, 0.2, 0.3], indexing='ij') # the other order...
print(x)
print('-----------------------------------------')
print(y)

#### WARNING arrays share memory

In [None]:
b = a.reshape((3,2))
print(a.shape, b.shape)

In [None]:
b[1,1] = 100 # modify a value in the array
print(b)

In [None]:
print(a) # !!! a and b are sharing the same place in the memory, they are pointing to the same values. 

In [None]:
b[1,1], a[3] # same value

In [None]:
a is b # a and b are different

In [None]:
print(b[1,1] == a[3])
print(b[1,1] is a[3]) # Even if the values are the same, the "is" does not tell it.

In [None]:
c = a.reshape((2,3)).copy() # This is the solution.

In [None]:
print(a)
print('---------------')
print(c)

In [None]:
c[0,0] = 8888
print(a)
print('---------------')
print(c)

### Random

In [None]:
ran_uniform = np.random.rand(5) # between 0 and 1
ran_normal = np.random.randn(5) # Gaussian mean 0 variance 1
print(ran_uniform)
print('-----------------------------')
print(ran_normal)
print('-----------------------------')
ran_normal_2D = np.random.randn(5,5) # Gaussian mean 0 variance 1
print(ran_normal_2D)

In [None]:
np.random.seed(1)
print(np.random.rand(5))
np.random.seed(1)
print(np.random.rand(5))

### Timing on 2D array

In [None]:
N = 100
A = np.random.rand(N, N)
B = np.zeros_like(A)

In [None]:
%%timeit
for i in range(N):
    for j in range(N):
        B[i,j] = A[i,j]

In [None]:
%%timeit
B = A # very faster ! It does NOT copy...

In [None]:
%%timeit 
B = (A.copy()) # Takes more time

In [None]:
%%timeit
for i in range(N):
    for j in range(N):
        B[i,j] = A[i,j]**2 

In [None]:
%%timeit
B = A**2 # very faster ! Does a copy

In [None]:
%timeit  B = (A.copy())**2 # Takes a little bit more time

### Slicing

In [None]:
a = np.arange(10)
print(a)
print(a[1:8:3])

In [None]:
print(a[:7])

In [None]:
print(a[4:])

In [None]:
print(a[::2])
print(a[::2][2])

In [None]:
# Revert the array:
print(a[::-1])

#### Assignment

In [None]:
a[5:] = 999
print(a)

In [None]:
a[5:] = a[4::-1]
print(a)

In [None]:
print(a)
b = a[:, np.newaxis] # create a new empty dimension
print(b)
print(a.shape, b.shape)
c = a[np.newaxis, :]
print(c, c.shape)

In [None]:
b*c # Cross product, see below (broadcasting)

#### Using an array

In [None]:
print(a)
a[[2,4,6]] = -999
print(a)

In [None]:
# a = 1 would turn a to be 1, but if we want to assign 1 to every value in a one must do:
a[:] = 1
print(a)

### Using masks

In [None]:
a = np.random.randint(0, 100, 20) # min, max, N
print(a)

In [None]:
a < 50

In [None]:
mask = (a < 50)

In [None]:
mask.sum()

In [None]:
a[mask]

In [None]:
b = a.copy() # do NOT use b = a
b[mask] = 50 # 
print(a)
print(b)

In [None]:
b = a.copy()
b[b <= 50] = 0 # shortest way. Not matter if not even one element fit the test
print(b)

In [None]:
print(a[mask])
print(a[~mask]) # complementary

In [None]:
mask

In [None]:
mask = np.zeros_like(a, dtype=bool)
print(mask)

In [None]:
mask[[2,3,4]] = True

In [None]:
mask

In [None]:
a[mask]

In [None]:
a[mask].sum()

#### combining masks

In [None]:
print(a)
mask_low = a > 30
mask_high = a < 70
print('-------------------------------------')
print(a[mask_low & mask_high]) # both conditions are filled
print('-------------------------------------')
print(a[~mask_low | ~mask_high]) # complementary, using the | for OR

#### the where function

In [None]:
tt = np.where(a > 30)
print(a)
print(tt) # tt is a tuple of arrays, one for each dimension of the condition, 
# containing the indices where the condition is filled in that dimension.

In [None]:
(a > 30).nonzero() # "where" is the same than condition.nonzero(). 

In [None]:
# the indices where the condition is filled are in the first element of the tuple

In [None]:
tt[0]

In [None]:
# faster once you know that the condition is 1D
tt = np.where(a > 30)[0]

In [None]:
tt # the array containing the indices where the condition is filled

In [None]:
a[tt] # the values where the condition is filled

In [None]:
# The where function can take 3 arguments. 
b = np.where(a < 50, np.nan, a)
print(a)
print(b)
print(np.isfinite(b))

In [None]:
b = np.where(a < 50, True, False)
print(a)
print(b)

In [None]:
b = np.where(a < 50, 0, 100)
print(a)
print(b)

### Some operations with arrays

In [None]:
a 

In [None]:
a + 1

In [None]:
a**2 + 3*a**3

In [None]:
# look for the integers I so that i**2 + (i+1)**2 = (i+2)**2
i = np.arange(30)
b = i**2 + (i+1)**2

In [None]:
c = (i+2)**2

In [None]:
print(b)
print(c)

In [None]:
b == c

In [None]:
i[b==c]

In [None]:
i[b==c][0] # the result is an array. To obtain the first value (here the only one), use [0]

Numpy manages almost any mathematical operation. log, trigo, etc

In [None]:
a = np.arange(18)
print(a)
print(np.log10(a))

In [None]:
for aa in a:
    print('{0:2} {1:4.2f} {2:5.2f} {3:8.2e}'.format(aa, np.log10(aa), np.sin(aa), np.exp(aa)))

sum

In [None]:
print(a.sum())
print(17*18/2)

In [None]:
a = np.random.rand(2, 4, 3)
print(a.shape)
print(a.size)

2 planes, 4 rows, 3 columns

A small comment on the order of the elements in arrays in Python: There is two ways arrays can be stored: row- or column major. It has a direct impact on the way one has to loop on the arrays. IDL is like Fortran (column major) and Python is like C (row major). It means that in Python, as you move linearly through the memory of an array, the second dimension (rightmost) changes the fastest, while in IDL the first (leftmost) dimension changes the fastest. Consequence on the loop order in Python:

In [None]:
for plane in a:
    for row in plane:
        for col in row:
            print(col)
            print('-----')

In [None]:
print(a[0,1,2]) # a[p, r, c]

In [None]:
a.sum()

In [None]:
a.sum(0) # from 3D to 2D. Generate an "image" of the sum, i.e. the "projection" on the x-axis of the 3D array

In [None]:
a.sum(0).shape

In [None]:
a.sum(0).sum(0) # from 3D to 1D. From the image, make the sum in each row.

In [None]:
a.min(0)

In [None]:
a.ravel()

In [None]:
i_min = a.argmin() # return the index of where the minimum is. It uses the 1D index.
print(i_min)
b = np.array([10,2,3,4,5,2])
b.argmin() # only the first occurence

In [None]:
a.ravel().shape # 1D 

In [None]:
a.ravel()[i_min] # Check where the minimum is.

In [None]:
z = i_min // 12
y = (i_min - 12*z) // 3
x = i_min - 12*z - 3*y
print(z, y, x)
print(a[z, y, x])

In [None]:
def decompose_ravel(arr, i):
    shapes = arr.shape
    idx = i
    res = []
    for i in np.arange(arr.ndim):
        subdims = np.prod(shapes[i+1:])
        n = int(idx // subdims)
        #print n, subdims, idx
        idx = idx - subdims*n
        res.append(n)
    return tuple(res)

In [None]:
res = decompose_ravel(a, i_min)
print(a.min())
print(res)
print(a[res])

In [None]:
a.min(0).min(0)

In [None]:
print(a[:,0,0])
a[:,0,0].min()

In [None]:
a.mean(0)

In [None]:
np.median(a, 1)

In [None]:
a.std()

In [None]:
np.percentile(a, 25)

In [None]:
print(a[0:4,0])
print(np.cumsum(a[0:100,0])) # axis is a keyword. If absent, applied on the ravel(), e.g. 1D array

In [None]:
b = np.arange(1000).reshape(10,10,10)

In [None]:
b.shape

In [None]:
b[4,:,:] # hundreds digits = 4

In [None]:
b[:,2,:] # tens digit = 2

In [None]:
b[:,:,7] # unity digit = 7

In [None]:
b.min(0) # elements with the smallest value for the hundreds digit

In [None]:
b.min(2) # smallest value for the unity digit

In [None]:
b.min(2).shape

In [None]:
np.median(b)

In [None]:
np.median(b, axis=0)

In [None]:
x = 2 * np.random.rand(100,100,100) - 1.
print(np.min(x), np.max(x))

In [None]:
y = 2 * np.random.rand(100,100,100) - 1.
z = 2 * np.random.rand(100,100,100) - 1.

In [None]:
r = np.sqrt(x**2 + y**2 + z**2)
print(np.min(r), np.max(r))
print(np.sqrt(3))

In [None]:
print(np.mean(r))
print(r.mean())

In [None]:
np.median(r)

### Broadcasting

http://arxiv.org/pdf/1102.1523.pdf

    If the two arrays differ in their number of dimensions, the shape of the array with fewer dimensions is padded with ones on its leading (left) side.
    If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.
    If in any dimension the sizes disagree and neither is equal to 1, an error is raised.

In [None]:
x1 = np.array((1,2,3,4,5))
y1 = np.array((1,2,3,4,5))
z1 = np.array((1,2,3,4,5))
r1 = x1 * y1 * z1
print(r1.shape)

In [None]:
x = np.array((1,2,3,4,5)).reshape(5,1,1)

In [None]:
x

In [None]:
x.shape

In [None]:
x.ndim

In [None]:
y = np.array((1,2,3,4,5)).reshape(1,5,1)
z = np.array((1,2,3,4,5)).reshape(1,1,5)
print(y)
print(z)

In [None]:
r = x * y * z

In [None]:
print(r.shape)

In [None]:
r

In [None]:
a = np.ones((10,10))
b = np.arange(10).reshape(10,1)
print(a)
print(b)
print(b.shape)

In [None]:
a * b

In [None]:
a * b.reshape(1,10)

### Structured arrays and RecArrays

See here: http://docs.scipy.org/doc/numpy/user/basics.rec.html

A structured array in numpy is an array of records. Each record can contain one or more items which can be of different types.

In [None]:
a = np.array([(1.5, 2), (3.0, 4)]) # Classical numpy array
print(a)

In [None]:
astru = np.array([(1.5, 2), (3.0, 4)], dtype=[('x', float), ('y', int)]) # array with named and typed columns
astru

In [None]:
print(astru['x'])
print(astru['y'])

In [None]:
arec = astru.view(np.recarray)
print(type(a), type(astru), type(arec))
print('----------------------------------')
print(a)
print(astru)
print(arec)
print('----------------------------------')
print(a.size, astru.size, arec.size) # not even the same sixe
print('----------------------------------')
print(a.dtype, astru.dtype, arec.dtype) # types tell us that ar has column names and types
print('----------------------------------')
print(a[1,1], astru[1][1], arec[1][1]) # one is 2D, the other is a collection of 1D
print('----------------------------------')
print(astru['y']) # acces by name (a litle like dictionnaries)
print('----------------------------------')
print(arec.x)

In [None]:
%timeit astru2 = np.append(astru, np.array([(5.0, 6)], dtype=astru.dtype)) # Copied all the data, may be slow

In [None]:
%timeit astru3 = np.concatenate((astru, np.array([(5.0, 6)], dtype=astru.dtype))) # A little bit faster

In [None]:
%timeit arec2 = np.append(arec, np.array([(5.0, 6)], dtype=astru.dtype).view(np.recarray)) # Copied all the data, may be slow

In [None]:
%timeit arec3 = np.concatenate((arec, np.array([(5.0, 6)], dtype=astru.dtype).view(np.recarray))) # A little bit faster

In [None]:
arec4 = np.rec.fromrecords([(456,'dbe',1.2),(2,'de',1.3)],names='col1,col2,col3') # direct from data.
print(arec4)
print(type(arec4))
print(arec4.col1[1])
print(arec4[1].col1)

In [None]:
arec4 = np.rec.fromrecords([('etoile_15', 30.015, -0.752, 10.722), 
                            ('etoile_11', 31.163, -9.109, 10.761),
                            ('etoile_16', 39.789, -7.716, 11.071), 
                            ('etoile_14', 35.110, 6.785, 11.176), 
                            ('etoile_31', 33.530, 9.306, 11.823), 
                            ('etoile_04', 33.480, 5.568, 11.978)
                            ],
                           names='name,ra,dec, mag')

In [None]:
mask = arec4.mag > 11.
print(arec4[mask])
print('-------------------------')
for star in arec4[mask]:
    print('name: {0} ra = {1} dec = {2} magnitude = {3}'.format(star.name, star.ra, star.dec, star.mag))
print('-------------------------')
for star in arec4[mask]:
    print('name: {0[name]} ra = {0[ra]} dec = {0[dec]} magnitude = {0[mag]}'.format(star)) # unse only one key in format

### NaN and other ANSI values

In [None]:
a = np.array([-3, -2., -1., 0., 1., 2.])
b = 1./a
print(b)

In [None]:
print(a.sum())
print(b.sum()) # NaN and others are absorbant elements

In [None]:
mask = np.isfinite(b)
print(mask)
print(b[mask].sum())

In [None]:
for elem in b:
    print(np.isinf(elem))

In [None]:
a = np.array([-2, -1, 1., 2, 3])
b = np.log10(a)
mask = np.isfinite(b)
print(a)
print(b)
print(mask)
print(a.mean())
print(b.mean())
print(b[mask].mean())
print(np.nanmean(b))

### Roundish values of floats

In [None]:
import math
res = []
for i in range(100): 
    res.append(math.log(2 ** i, 2)) # The second argument is the base of the log. The result should be i.
print(res)
# We can see that sometimes the value of log2(2**i) is NOT i.

In [None]:
res2 = []
for i in range(100): 
    res2.append(float(round(math.log(2**i, 2))) == math.log(2 ** i, 2))
print(res2)
# An equivalent result is obtained when comparing the round value. This should be always True.

In [None]:
res = []
for i in range(100): 
    res.append(np.log2(2.**i)) # The second argument is the base of the log. The result should be i.
print(res)

res_np = []
for i in range(100): 
    res_np.append(float(round(np.log2(2.**i))) == np.log2(2.**i))
print(res_np)
# No problemes with the numpy log function.

In case of doubdts, one can use the close function from numpy:

In [None]:
res_np2 = []
for i in range(100): 
    res_np2.append(np.isclose(float(round(math.log(2 ** i, 2))), math.log(2 ** i, 2)))
print(res_np2)
# The isclose 

In [None]:
np.isclose?