In [1]:
# The following is to know when this notebook has been run and with which python version.
import time, sys
print(time.ctime())
print(sys.version.split('|')[0])

Thu May 17 15:04:43 2018
3.6.1 


# B Numpy

This is part of the Python lecture given by Christophe Morisset at IA-UNAM.

### Import numpy first

In [2]:
# You need first to import the numpy library (must be installed on your computer ;-) )
# As it will be widely used, better to give it a nickname, or an alias. Traditionnaly, it's "np":
import numpy as np

In [3]:
print(np.__version__)

1.12.1


### Tutorials

http://nbviewer.ipython.org/github/jrjohansson/scientific-python-lectures/blob/master/Lecture-2-Numpy.ipynb AND
http://nbviewer.ipython.org/gist/rpmuller/5920182 AND http://www.astro.washington.edu/users/vanderplas/Astr599/notebooks/11_EfficientNumpy

### The ARRAY class

#### Create an array

In [4]:
# Easy to create a numpy array (the basic class) from a list
l = [1,2,3,4,5,6]
print(l)
a = np.array([1,2,3,4,5,6])
print(a)
print(type(a))

[1, 2, 3, 4, 5, 6]
[1 2 3 4 5 6]
<class 'numpy.ndarray'>


In [5]:
L = [1, 2, 3, 4]
a = np.array(L)
print(a.dtype)
print(a)

int64
[1 2 3 4]


In [6]:
L = [1,2,3,4.]
a = np.array(L)
print(a.dtype)
print(a)

float64
[ 1.  2.  3.  4.]


#### 1D, 2D, 3D, ...

In [7]:
a = np.array([1,2,3,4,5,6])
b = np.array([[1,2],[1,4]])
c = np.array([[[1], [2]], [[3], [4]]])
print(a.shape, b.shape, c.shape)
print(a[0]) # no error

(6,) (2, 2) (2, 2, 1)
1


In [8]:
print(len(a), len(b), len(c)) # size of the first dimension

6 2 2


In [9]:
b.size

4

In [10]:
print(a.ndim, b.ndim, c.ndim) 

1 2 3


In [11]:
a = np.array([1,2,3,4,5,6])
print('mean: {0}, max: {1}, shape: {2}'.format(a.mean(), 
                                               a.max(), 
                                               a.shape))

mean: 3.5, max: 6, shape: (6,)


mean and max are methods (functions) of the array class, they need ()s. shape is an atribute (like a variable).

In [12]:
print(a.mean) # this is printing information about the function, NOT the result of the function!

<built-in method mean of numpy.ndarray object at 0x10cc41030>


#### Creating arrays from scratch

In [13]:
print(np.arange(10))

[0 1 2 3 4 5 6 7 8 9]


In [14]:
print(np.logspace(0, 2, 11)) # from 10**start to 10**stop, with 10 values

[   1.            1.58489319    2.51188643    3.98107171    6.30957344
   10.           15.84893192   25.11886432   39.81071706   63.09573445
  100.        ]


In [15]:
a.ravel?

[0;31mDocstring:[0m
a.ravel([order])

Return a flattened array.

Refer to `numpy.ravel` for full documentation.

See Also
--------
numpy.ravel : equivalent function

ndarray.flat : a flat iterator on the array.
[0;31mType:[0m      builtin_function_or_method


In [16]:
b = a.reshape((3,2)) # This does NOT change the shape of a
print(a)
print('-------------')
print(b)

[1 2 3 4 5 6]
-------------
[[1 2]
 [3 4]
 [5 6]]


#### WARNING arrays share memory

In [17]:
b = a.reshape((3,2))
print(a.shape, b.shape)

(6,) (3, 2)


In [18]:
b[1,1] = 100 # modify a value in the array
print(b)

[[  1   2]
 [  3 100]
 [  5   6]]


In [19]:
print(a) # !!! a and b are sharing the same place in the memory, they are pointing to the same values. 

[  1   2   3 100   5   6]


In [20]:
c = a.reshape((2,3)).copy() # This is the solution.

#### Using an array

In [21]:
a = np.array([1,2,3,4,5,6,7,8,9])
print(a)
a[[2,4,6]] = -999
print(a)

[1 2 3 4 5 6 7 8 9]
[   1    2 -999    4 -999    6 -999    8    9]


### Using masks

In [22]:
a = np.random.randint(0, 100, 20) # min, max, N
print(a)

[15 35 18 37 31  7 51 75 72 15 53 56 10 99 18 37 96 38 56 91]


In [23]:
a < 50

array([ True,  True,  True,  True,  True,  True, False, False, False,
        True, False, False,  True, False,  True,  True, False,  True,
       False, False], dtype=bool)

In [24]:
mask = (a < 50)

In [25]:
print(mask)

[ True  True  True  True  True  True False False False  True False False
  True False  True  True False  True False False]


In [26]:
mask.sum()

11

In [27]:
a[mask]

array([15, 35, 18, 37, 31,  7, 15, 10, 18, 37, 38])

In [28]:
b = a.copy() # do NOT use b = a
b[mask] = 50 # 
print(a)
print(b)

[15 35 18 37 31  7 51 75 72 15 53 56 10 99 18 37 96 38 56 91]
[50 50 50 50 50 50 51 75 72 50 53 56 50 99 50 50 96 50 56 91]


In [29]:
b = a.copy()
b[b <= 50] = 0 # shortest way. Not matter if not even one element fit the test
print(b)

[ 0  0  0  0  0  0 51 75 72  0 53 56  0 99  0  0 96  0 56 91]


In [30]:
print(a[mask])
print(a[~mask]) # complementary

[15 35 18 37 31  7 15 10 18 37 38]
[51 75 72 53 56 99 96 56 91]


In [31]:
mask

array([ True,  True,  True,  True,  True,  True, False, False, False,
        True, False, False,  True, False,  True,  True, False,  True,
       False, False], dtype=bool)

In [32]:
mask = np.zeros_like(a, dtype=bool)
print(mask)

[False False False False False False False False False False False False
 False False False False False False False False]


In [33]:
mask[[2,3,4]] = True

In [34]:
mask

array([False, False,  True,  True,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False], dtype=bool)

In [35]:
a[mask]

array([18, 37, 31])

In [36]:
a[mask].sum()

86

#### combining masks

In [37]:
print(a)
mask_low = a > 30
mask_high = a < 70
print('-------------------------------------')
print(a[mask_low & mask_high]) # both conditions are filled
print('-------------------------------------')
print(a[~mask_low | ~mask_high]) # complementary, using the | for OR

[15 35 18 37 31  7 51 75 72 15 53 56 10 99 18 37 96 38 56 91]
-------------------------------------
[35 37 31 51 53 56 37 38 56]
-------------------------------------
[15 18  7 75 72 15 10 99 18 96 91]


### Some operations with arrays

In [38]:
a

array([15, 35, 18, 37, 31,  7, 51, 75, 72, 15, 53, 56, 10, 99, 18, 37, 96,
       38, 56, 91])

In [39]:
a + 1

array([ 16,  36,  19,  38,  32,   8,  52,  76,  73,  16,  54,  57,  11,
       100,  19,  38,  97,  39,  57,  92])

In [40]:
a**2 + 3*a**3

array([  10350,  129850,   17820,  153328,   90334,    1078,  400554,
       1271250, 1124928,   10350,  449440,  529984,    3100, 2920698,
         17820,  153328, 2663424,  166060,  529984, 2268994])

Numpy manages almost any mathematical operation. log, trigo, etc

In [41]:
a = np.arange(18)
print(a)
print(np.log10(a))

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17]
[       -inf  0.          0.30103     0.47712125  0.60205999  0.69897
  0.77815125  0.84509804  0.90308999  0.95424251  1.          1.04139269
  1.07918125  1.11394335  1.14612804  1.17609126  1.20411998  1.23044892]


  This is separate from the ipykernel package so we can avoid doing imports until


In [42]:
for aa in a:
    print('{0:2} {1:4.2f} {2:5.2f} {3:8.2e}'.format(aa, 
                                                    np.log10(aa), 
                                                    np.sin(aa), 
                                                    np.exp(aa)))

 0 -inf  0.00 1.00e+00
 1 0.00  0.84 2.72e+00
 2 0.30  0.91 7.39e+00
 3 0.48  0.14 2.01e+01
 4 0.60 -0.76 5.46e+01
 5 0.70 -0.96 1.48e+02
 6 0.78 -0.28 4.03e+02
 7 0.85  0.66 1.10e+03
 8 0.90  0.99 2.98e+03
 9 0.95  0.41 8.10e+03
10 1.00 -0.54 2.20e+04
11 1.04 -1.00 5.99e+04
12 1.08 -0.54 1.63e+05
13 1.11  0.42 4.42e+05
14 1.15  0.99 1.20e+06
15 1.18  0.65 3.27e+06
16 1.20 -0.29 8.89e+06
17 1.23 -0.96 2.42e+07


  This is separate from the ipykernel package so we can avoid doing imports until


sum

In [43]:
print(a.sum())
print(17*18/2)

153
153.0


A small comment on the order of the elements in arrays in Python: There is two ways arrays can be stored: row- or column major. It has a direct impact on the way one has to loop on the arrays. IDL is like Fortran (column major) and Python is like C (row major). It means that in Python, as you move linearly through the memory of an array, the second dimension (rightmost) changes the fastest, while in IDL the first (leftmost) dimension changes the fastest. Consequence on the loop order in Python:

### Broadcasting

http://arxiv.org/pdf/1102.1523.pdf

    If the two arrays differ in their number of dimensions, the shape of the array with fewer dimensions is padded with ones on its leading (left) side.
    If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.
    If in any dimension the sizes disagree and neither is equal to 1, an error is raised.

In [44]:
x1 = np.array((1,2,3,4,5))
y1 = np.array((1,2,3,4,5))
z1 = np.array((1,2,3,4,5))
r1 = x1 * y1 * z1
print(r1.shape)

(5,)


In [45]:
x = np.array((1,2,3,4,5)).reshape(5,1,1)

In [46]:
x

array([[[1]],

       [[2]],

       [[3]],

       [[4]],

       [[5]]])

In [47]:
x.shape

(5, 1, 1)

In [48]:
x.ndim

3

In [49]:
y = np.array((1,2,3,4,5)).reshape(1,5,1)
z = np.array((1,2,3,4,5)).reshape(1,1,5)
print(y)
print(z)

[[[1]
  [2]
  [3]
  [4]
  [5]]]
[[[1 2 3 4 5]]]


In [50]:
r = np.sqrt(x**2 + y**2 + z**2)

In [51]:
print(r.shape)

(5, 5, 5)


In [52]:
r

array([[[ 1.73205081,  2.44948974,  3.31662479,  4.24264069,  5.19615242],
        [ 2.44948974,  3.        ,  3.74165739,  4.58257569,  5.47722558],
        [ 3.31662479,  3.74165739,  4.35889894,  5.09901951,  5.91607978],
        [ 4.24264069,  4.58257569,  5.09901951,  5.74456265,  6.4807407 ],
        [ 5.19615242,  5.47722558,  5.91607978,  6.4807407 ,  7.14142843]],

       [[ 2.44948974,  3.        ,  3.74165739,  4.58257569,  5.47722558],
        [ 3.        ,  3.46410162,  4.12310563,  4.89897949,  5.74456265],
        [ 3.74165739,  4.12310563,  4.69041576,  5.38516481,  6.164414  ],
        [ 4.58257569,  4.89897949,  5.38516481,  6.        ,  6.70820393],
        [ 5.47722558,  5.74456265,  6.164414  ,  6.70820393,  7.34846923]],

       [[ 3.31662479,  3.74165739,  4.35889894,  5.09901951,  5.91607978],
        [ 3.74165739,  4.12310563,  4.69041576,  5.38516481,  6.164414  ],
        [ 4.35889894,  4.69041576,  5.19615242,  5.83095189,  6.55743852],
        [ 5.09901951,

In [53]:
a = np.ones((10,10))
b = np.arange(10).reshape(10,1)
print(a)
print(b)
print(b.shape)

[[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]]
[[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]
 [9]]
(10, 1)


In [54]:
a * b

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.],
       [ 5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.],
       [ 6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.],
       [ 7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.],
       [ 8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.],
       [ 9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.]])

In [55]:
a * b.reshape(1,10)

array([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]])