# Numpy: Numerical Python package 

- [NumPy](https://www.numpy.org) is perhaps the most important  package for numerical computing in python
- the n-dimentional array in NumPy used a basic  object in most python packages for data exchange
  - we will look at its methods and semantics starting today through examples
- some of the most important features of Num Py 
  - ndarray: multidimensional array for fast and efficient array-oriented operations and arithmetics
  - mathematical functions for fast operation on  arrays  without using loops and iterations
  - tools for I/O to and from disk
  - Linear algebra
  - random generation 
  - API to connect NumPy with C and C++ libraries

In [2]:
import numpy as np
arr2 = np.array( [ [ 1,2,3], [4,5,6], [7,8,9]  ] )
print(arr2)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [4]:
arr3 = np.random.rand(2,3)
print(arr3)
arr4 = np.random.rand(3,2,2)
print(arr4)

[[0.304784   0.93979483 0.2542286 ]
 [0.76602038 0.43025323 0.4085313 ]]
[[[0.29860346 0.95651447]
  [0.37258984 0.4552999 ]]

 [[0.24350547 0.13509193]
  [0.49155392 0.38578942]]

 [[0.7880916  0.45511436]
  [0.30604773 0.91195589]]]


## creating ndarrays
As seen above, all NumPy functions and classes use ndarray as return type. You only need to specify the dimensions and the size of the array to be created

### uniform random numbers

In [5]:
np.random.rand(10,3)

array([[0.97549457, 0.1639616 , 0.50201603],
       [0.37115064, 0.24499829, 0.09214256],
       [0.30305486, 0.3797549 , 0.91212967],
       [0.20007884, 0.88173664, 0.59489763],
       [0.67946798, 0.20247669, 0.49636172],
       [0.93762847, 0.4055339 , 0.22762076],
       [0.17108054, 0.6376423 , 0.21320185],
       [0.26277791, 0.39586452, 0.47239605],
       [0.46229786, 0.43319688, 0.68888316],
       [0.69067113, 0.82423568, 0.75528689]])

In [6]:
np.random.rand(2,3,4)

array([[[0.98955119, 0.71102764, 0.22247109, 0.40522903],
        [0.18950161, 0.29896845, 0.86138293, 0.44831475],
        [0.25994727, 0.25628765, 0.40980197, 0.71673798]],

       [[0.99679829, 0.9188075 , 0.41340605, 0.8466334 ],
        [0.88080262, 0.35562626, 0.67185563, 0.2215613 ],
        [0.1990677 , 0.76488829, 0.06176701, 0.90072234]]])

### array of zeros

In [9]:
v = np.zeros(3)
print(v)
print(np.zeros( (2,4)) )

[0. 0. 0.]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [10]:
A = np.zeros((3,4) )
print(A)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


### arrays of 1

In [12]:
w = np.ones(4)
print(w)
print(np.ones( (2,4)) )

[1. 1. 1. 1.]
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]]


you can specify the shape of the array with a tuple

In [14]:
B = np.ones ( (3,4))
print(B)

val = 3.6*B
print(val)

print(val/2)

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[3.6 3.6 3.6 3.6]
 [3.6 3.6 3.6 3.6]
 [3.6 3.6 3.6 3.6]]
[[1.8 1.8 1.8 1.8]
 [1.8 1.8 1.8 1.8]
 [1.8 1.8 1.8 1.8]]


Rather than multuplying ones by a scalar, you can directly create an array filled with a given value

In [15]:
z = np.full( 4, fill_value=3.12)
print(z)

[3.12 3.12 3.12 3.12]


use a tuple to specify the shape of the array to be filled

In [16]:
C = np.full( (2,3), fill_value=-4.3)
print(C)

[[-4.3 -4.3 -4.3]
 [-4.3 -4.3 -4.3]]


### Identity array

In [17]:
data = np.identity(7)
print(data)

[[1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 1.]]


operations with arrays

In [18]:
print( 2.3*data )

[[2.3 0.  0.  0.  0.  0.  0. ]
 [0.  2.3 0.  0.  0.  0.  0. ]
 [0.  0.  2.3 0.  0.  0.  0. ]
 [0.  0.  0.  2.3 0.  0.  0. ]
 [0.  0.  0.  0.  2.3 0.  0. ]
 [0.  0.  0.  0.  0.  2.3 0. ]
 [0.  0.  0.  0.  0.  0.  2.3]]


In [19]:
d = data - C

ValueError: operands could not be broadcast together with shapes (7,7) (2,3) 

## Shape of arrays
each ndarray is characterized by its 
- shape
- size 
- type of data

In [20]:
data = np.array([ [-1., 2.3], [2.3, 4.5], [-8.4, 1.9] ])
print("data\n",data)
print("type: ",type(data))
print("dimension:\t", data.ndim)
print("shape: ", data.shape)
print("data type: ",data.dtype)


data
 [[-1.   2.3]
 [ 2.3  4.5]
 [-8.4  1.9]]
type:  <class 'numpy.ndarray'>
dimension:	 2
shape:  (3, 2)
data type:  float64


change type of data

In [21]:
data_int = np.array([ [-1., 2.3], [2.3, 4.5], [-8.4, 1.9] ], dtype=np.int64)
print(data_int)

[[-1  2]
 [ 2  4]
 [-8  1]]


## Reshaping arrays

In [22]:
data = np.arange(1,101)
data

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100])

In [23]:
mat1 = data.reshape(25,4)
print(mat1)
print(data)

[[  1   2   3   4]
 [  5   6   7   8]
 [  9  10  11  12]
 [ 13  14  15  16]
 [ 17  18  19  20]
 [ 21  22  23  24]
 [ 25  26  27  28]
 [ 29  30  31  32]
 [ 33  34  35  36]
 [ 37  38  39  40]
 [ 41  42  43  44]
 [ 45  46  47  48]
 [ 49  50  51  52]
 [ 53  54  55  56]
 [ 57  58  59  60]
 [ 61  62  63  64]
 [ 65  66  67  68]
 [ 69  70  71  72]
 [ 73  74  75  76]
 [ 77  78  79  80]
 [ 81  82  83  84]
 [ 85  86  87  88]
 [ 89  90  91  92]
 [ 93  94  95  96]
 [ 97  98  99 100]]
[  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
  91  92  93  94  95  96  97  98  99 100]


In [24]:
mat2 = mat1.reshape(10,10)
print(mat2)

[[  1   2   3   4   5   6   7   8   9  10]
 [ 11  12  13  14  15  16  17  18  19  20]
 [ 21  22  23  24  25  26  27  28  29  30]
 [ 31  32  33  34  35  36  37  38  39  40]
 [ 41  42  43  44  45  46  47  48  49  50]
 [ 51  52  53  54  55  56  57  58  59  60]
 [ 61  62  63  64  65  66  67  68  69  70]
 [ 71  72  73  74  75  76  77  78  79  80]
 [ 81  82  83  84  85  86  87  88  89  90]
 [ 91  92  93  94  95  96  97  98  99 100]]


__Note that the original data array has not been modified__

In [25]:
data

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100])

In [26]:
data.reshape(9,8)

ValueError: cannot reshape array of size 100 into shape (9,8)

You can create an array with the same shape of an existing array

In [27]:
np.empty_like(data)

array([-6917529027641081856, -6917529027641081856,           4551540932,
                 4551553456,           4551553475,                   11,
                          0,                    8,  6999220688590471168,
        2314950437861683826,  2318283021748284465,  3756037411779584032,
        2314898815152300076,  3186050583738725920,  2314885582760714272,
        2318280895739472953,  3616707298307682592,  7952279640408596524,
        2314885530818453536,  3185506325482976305,  3539864681373048864,
        2318289691832495159,  3472873585209061664,  2314898798274420780,
        3184944475041182258,  3611922275377487904,  6641743139379883061,
        2314885530818453614,  4049334337512354336,  2314898832634159148,
        3184382624599388211,  3683979869381926944,  2318285302375918643,
        3905500624413078304,  2314898824061001772,  3186634424413075507,
        2314885530818473564,  3756037463386365984,  2318283107647630385,
        3761666911313802272,  2314898815487844396, 

In [30]:
data2 = np.ones_like(data)
print(data2)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [31]:
np.zeros_like(data)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [32]:
np.full_like(data, -4.5)

array([-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
       -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
       -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
       -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
       -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
       -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4])

Since the array used for the shape had integers, the new array is also made of integers, despite you providing `4.5` as fill_value. If you need a float array, then you have to specify it:

In [33]:
np.full_like(data, fill_value=-4.5,dtype=np.float64)

array([-4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5,
       -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5,
       -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5,
       -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5,
       -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5,
       -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5,
       -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5,
       -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5,
       -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5,
       -4.5])

In [34]:
data_2 = 1.*np.full_like(data, -4.5)
print(data_2)

[-4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4.
 -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4.
 -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4.
 -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4.
 -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4. -4.
 -4. -4. -4. -4. -4. -4. -4. -4. -4. -4.]


## Operations with ndarray
you can use an ndarray for  basic mathematical operations tyoically used with scalars.



### Arithmetics with arrays

In [35]:
data = np.arange(1,17).reshape(4,4)
print(data)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]]


In [36]:
data2 = np.ones_like(data)
print(data2)

[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


In [37]:
data + data2

array([[ 2,  3,  4,  5],
       [ 6,  7,  8,  9],
       [10, 11, 12, 13],
       [14, 15, 16, 17]])

In [38]:
data - data2

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

Division by an array applies the divison to each element. It is not the same as inverting an array

In [39]:
1./data

array([[1.        , 0.5       , 0.33333333, 0.25      ],
       [0.2       , 0.16666667, 0.14285714, 0.125     ],
       [0.11111111, 0.1       , 0.09090909, 0.08333333],
       [0.07692308, 0.07142857, 0.06666667, 0.0625    ]])

In [40]:
10 / data

array([[10.        ,  5.        ,  3.33333333,  2.5       ],
       [ 2.        ,  1.66666667,  1.42857143,  1.25      ],
       [ 1.11111111,  1.        ,  0.90909091,  0.83333333],
       [ 0.76923077,  0.71428571,  0.66666667,  0.625     ]])

Adding a scalar to an array, adds the same value to all cells

In [41]:
3+data

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19]])

In [43]:
data_5 = 3.67+ np.sin(data)/data
print(data_5)

[[4.51147098 4.12464871 3.71704    3.48079938]
 [3.47821515 3.62343075 3.76385523 3.79366978]
 [3.71579094 3.61559789 3.5790918  3.62528559]
 [3.70232054 3.74075767 3.71335252 3.65200604]]


### functions
you have to use the NumPy functions so __np.sin__ instead of __math.sin__

In [46]:
datasq = data**2

In [47]:
print(data)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]]


Note how applying the function __does not__ modify data. Instead a new array is returned. This is the same behavior as with scalars in other langguage and also in python.

In [48]:
data_sqr = data**2
print(data_sqr)

[[  1   4   9  16]
 [ 25  36  49  64]
 [ 81 100 121 144]
 [169 196 225 256]]


In [49]:

data_2 = np.log(data_sqr) + np.sin( data*np.pi)

In [50]:
data_3 = np.sqrt( data_sqr )
print(data_3)

[[ 1.  2.  3.  4.]
 [ 5.  6.  7.  8.]
 [ 9. 10. 11. 12.]
 [13. 14. 15. 16.]]


## Indexing and slicing

In [51]:
data= np.arange(20).reshape(4,5)
data

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [52]:
data[2:3,2:3].shape

(1, 1)

In [53]:
data[2:4,2:4].shape

(2, 2)

In [54]:
data[1:,:]

array([[ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [55]:
data[:,4]

array([ 4,  9, 14, 19])

In [56]:
data[:2,3:7]

array([[3, 4],
       [8, 9]])

**As with the lists, all operations return a refernce to the original array, unless you make a copy**

In [57]:
x = data[:2,1:3]
print(x)

[[1 2]
 [6 7]]


In [58]:
x[0]

array([1, 2])

In [59]:
x[0][1]

2

In [60]:
x[0][1] = -5.55
print(x)
print(data)

[[ 1 -5]
 [ 6  7]]
[[ 0  1 -5  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]


So you have modified not just x, but also the original data array!

In [61]:
data= np.arange(16).reshape(4,4)
print(data)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


If this is not what you want, then you have to create a new copy by calling the constructor

In [62]:
x = np.array( data[0])
print(x)

[0 1 2 3]


In [63]:
x[1:3]

array([1, 2])

In [64]:
x[1:3] = [ -2, -4]
print(x)

[ 0 -2 -4  3]


Although you have modified `x` `data` has not changed

In [65]:
print(data)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


## converting and casting
When using `arange()` the array's type is integer.

In [66]:
data= np.arange(42).reshape(6,7)
data

array([[ 0,  1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12, 13],
       [14, 15, 16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25, 26, 27],
       [28, 29, 30, 31, 32, 33, 34],
       [35, 36, 37, 38, 39, 40, 41]])

In [67]:
print(data[2])

[14 15 16 17 18 19 20]


Sometimes you might want to change its type for floating point calculations

In [68]:
data[2][0] = -np.pi
data

array([[ 0,  1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12, 13],
       [-3, 15, 16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25, 26, 27],
       [28, 29, 30, 31, 32, 33, 34],
       [35, 36, 37, 38, 39, 40, 41]])

In [69]:
data_float = data.astype( np.float64)
print(data_float)

[[ 0.  1.  2.  3.  4.  5.  6.]
 [ 7.  8.  9. 10. 11. 12. 13.]
 [-3. 15. 16. 17. 18. 19. 20.]
 [21. 22. 23. 24. 25. 26. 27.]
 [28. 29. 30. 31. 32. 33. 34.]
 [35. 36. 37. 38. 39. 40. 41.]]


In [70]:
data_float[4][5] = -1.24
print(data_float)

[[ 0.    1.    2.    3.    4.    5.    6.  ]
 [ 7.    8.    9.   10.   11.   12.   13.  ]
 [-3.   15.   16.   17.   18.   19.   20.  ]
 [21.   22.   23.   24.   25.   26.   27.  ]
 [28.   29.   30.   31.   32.   -1.24 34.  ]
 [35.   36.   37.   38.   39.   40.   41.  ]]


Note that `astype()` returned a copy of the array and our original array `data` has not been modified

In [71]:
print(data)

[[ 0  1  2  3  4  5  6]
 [ 7  8  9 10 11 12 13]
 [-3 15 16 17 18 19 20]
 [21 22 23 24 25 26 27]
 [28 29 30 31 32 33 34]
 [35 36 37 38 39 40 41]]


## Boolean arrays
As with all operations, also logical opertions are vectorised

In [72]:
data = np.random.normal(0,1., 25).reshape(5,5)
print(data)

[[-0.92845247  0.02885038 -0.879231   -1.32864601  0.56638329]
 [-0.69086505  0.02274382 -0.39059623  1.11422538 -1.08223168]
 [-0.2074057  -0.67003773 -0.71377656 -0.25095377 -1.65647244]
 [-0.8732477  -0.22917091  0.60234993 -1.42812683  0.26149021]
 [-0.01534348 -0.60069221  0.30501907  1.31019941  0.53475529]]


check if the elements satisfy a condition

In [73]:
print(data > 0)

[[False  True False False  True]
 [False  True False  True False]
 [False False False False False]
 [False False  True False  True]
 [False False  True  True  True]]


print only positive values

In [74]:
posdata = data[data>0]
print( posdata )

[0.02885038 0.56638329 0.02274382 1.11422538 0.60234993 0.26149021
 0.30501907 1.31019941 0.53475529]


however note that now it does not have the original shape

In [75]:
print( type(posdata))
print( "shape: ", np.shape(posdata) )

<class 'numpy.ndarray'>
shape:  (9,)


In [76]:
data = np.random.normal(0,1., 10)
print(data)
data > 0.

[-0.16249495 -1.36255382  0.5524905  -0.66583092  0.88481683  1.99566734
 -1.79186696 -0.42181492  2.36941191  0.50606759]


array([False, False,  True, False,  True,  True, False, False,  True,
        True])

Since booleans are converted automatically to 0 and 1, you can easy count tem by using the `sum` function

In [77]:
(data >0).sum()

5

## slicing with booleans

Logial arrays can be used to slice and index an array!

In this case we want the array with just the positive cells

In [78]:
pos_vals = data[ data> 0. ]
print(type(pos_vals), pos_vals.shape)

<class 'numpy.ndarray'> (5,)


In [79]:
pos_vals[0]

0.5524905027862763

In [80]:
pos_vals[0] = -1

In [81]:
print (pos_vals)

[-1.          0.88481683  1.99566734  2.36941191  0.50606759]


In [82]:
print(data)

[-0.16249495 -1.36255382  0.5524905  -0.66583092  0.88481683  1.99566734
 -1.79186696 -0.42181492  2.36941191  0.50606759]


Note how slicing with boolean array, creates a new array and is not a reference to the original array

## Example: computing tails of a  Gaussian 

In [86]:
mu =1.0
sig = 0.2
nsig = 3
nvals = 10000000
data = np.random.normal(mu,sig, nvals)
print( (abs(data-mu)>nsig*sig).sum()   )
tail = data[ abs(data-mu)>nsig*sig  ]
print(len(tail))
print("fraction of points beyond %.1f sigma: %.1f"%(nsig,100*len(tail)/nvals),"%")
print("fraction of points within %.1f sigma: %.1f"%(nsig,100*(nvals-len(tail))/nvals),"%")

26785
26785
fraction of points beyond 3.0 sigma: 0.3 %
fraction of points within 3.0 sigma: 99.7 %


## Using NumPy ndarray instead of Lists
We now solve the same problem  of the projectile but this time using a 2D array to do just one comprhension to compute both x(t) and y(t).

When plotting you have to use slicing to specify that the 1st column are the x values and the 2nd column are the y values.

In [87]:
%matplotlib notebook
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np
import time


# initial conditions
g = 9.8
h = 10.
theta = (30./180.)*np.pi
v0 = 30.
dt=0.01

#compute velocity components
v0x = v0*np.cos(theta)
v0y = v0*np.sin(theta)
print("v0_x: %.1f m/s \t v0_y: %.1f m/s"%(v0x,v0y))

x0 = 0
y0 = h

def x(t):
    return x0+v0x*t

def y(t):
    return y0+v0y*t-0.5*g*t*t


dt = 0.01
# generate list of times for sampling
times = np.arange(0., 1000., dt)

#print first 10 elements
print(times[:10])

# use 2D array to do one comprehension
pos = np.array([ [x(t),y(t)] for t in times if y(t)>=0. ])
print("shape of pos array: ",pos.shape)
# create a figure object
fig = plt.figure()

# add subplot (just 1) and set x and y limits based on data
# ax is the object containing objects to be plotted
ax = fig.add_subplot(111, autoscale_on=False, xlim=(-0.1, max(pos[:,0])*1.2), ylim=(-0.1,max(pos[:,1])*1.2) )
ax.grid()
ax.set_xlabel('x(t) [m]')
ax.set_ylabel("y(t) [m]")
plt.title("trajectory of a projectile with $v_0$: %.1f m/s\t $\Theta_0$: %.1f$^\circ$"%(v0,theta))

# plot slices for ndarray
line = ax.plot(pos[:,0], pos[:,1],  lw=2, color='red')
plt.show()


xi = list(pos[:,0])
yi = list(pos[:,1])
print("max height: %.2f at x = %.2f"%(max(yi),xi[yi.index(max(yi))]))


v0_x: 26.0 m/s 	 v0_y: 15.0 m/s
[0.   0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09]
shape of pos array:  (363, 2)


<IPython.core.display.Javascript object>

max height: 21.48 at x = 39.75


## Using function with multiple return value
we now get rid of x(t) and y(t) and replace it with just one function pos(t) returning 2 values

We use ndarray everywhere instaed of the list type. Howevere note that
- to print the position of the maximum, using slices can cause some headache and confusion for who reads the code
  - you can create lists xi and yi to make the code more readable
- a slice does not have the same methods of a list. So for example you can not call `index()` on a slice so we create a list on the fly `list(pos[:,1]).index(max(pos[:,1]))`

In [88]:
%matplotlib notebook
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np
import time


# initial conditions
g = 9.8
h = 10.
theta = (30./180.)*np.pi
v0 = 30.
dt=0.01

#compute velocity components
v0x = v0*np.cos(theta)
v0y = v0*np.sin(theta)
print("v0_x: %.1f m/s \t v0_y: %.1f m/s"%(v0x,v0y))

x0 = 0
y0 = h

def pos(t):
    return x0+v0x*t, y0+v0y*t-0.5*g*t*t


dt = 0.01
# generate list of times for sampling
times = np.arange(0., 1000., dt)

#print first 10 elements
print(times[:10])


# use 2D array to do one comprehension
pos = np.array([ pos(t) for t in times if pos(t)[1]>=0. ])

print("shape of pos: ",pos.shape)

# you can create list for xi and yi
#xi = list(pos[:,0])
#yi = list(pos[:,1])
#print("max height: %.2f at x = %.2f"%(max(yi),xi[yi.index(max(yi))]))

# or you can simply use the slicing again. In this case it can be a bit confusing if not familiar
# also note that index() is a method for a list not for slices.
print("max height: %.2f at x = %.2f"%(max(pos[:,1]),pos[ list(pos[:,1]).index(max(pos[:,1])),0 ] ) )



# create a figure object
fig = plt.figure()

# add subplot (just 1) and set x and y limits based on data
# ax is the object containing objects to be plotted
ax = fig.add_subplot(111, autoscale_on=False, xlim=(-0.1, max(pos[:,0])*1.2), ylim=(-0.1,max(pos[:,1])*1.2) )
ax.grid()
ax.set_xlabel('x(t) [m]')
ax.set_ylabel("y(t) [m]")
plt.title("trajectory of a projectile with $v_0$: %.1f m/s\t $\Theta_0$: %.1f$^\circ$"%(v0,theta))

# plot slices for ndarray
line = ax.plot(pos[:,0], pos[:,1],  lw=2, color='red')

plt.show()

v0_x: 26.0 m/s 	 v0_y: 15.0 m/s
[0.   0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09]
shape of pos:  (363, 2)
max height: 21.48 at x = 39.75


<IPython.core.display.Javascript object>

# Random walks with ndarrays

In this example we use ndarray to solve the classical problem of random walk

The typical C like solution is

In [89]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt

nstep = 100

x = 0
position = [x] 


for i in range(nstep):
    step = 1
    if np.random.uniform(0.,1.) < 0.5: step = -1
    x += step
    position.append(x)    

    
plt.plot(position[:nstep])
plt.grid()
plt.xlabel('step')
plt.ylabel('position')


<IPython.core.display.Javascript object>

Text(0, 0.5, 'position')

## Random walk with arrays
 We now note that the position of the walk is the cumulative sum of random numbers. we can use this to convert the problem to using arrays only

In [90]:

import numpy as np
import matplotlib.pyplot as plt

nsteps = 10000

 - first we draw the random numbers for all `nsteps` at once, think of it as a coin

In [99]:
draws = np.random.randint(0,2, size=nsteps)
print(draws)
print ( (draws==1).sum()/nsteps )
print( (draws<1).sum()/nsteps )

[1 1 1 ... 1 1 1]
0.497
0.503


Based on the drawn coins, we decide the steps, if positive or negative

In [100]:
steps = np.where(draws>0, 1, -1)
print(steps)

[1 1 1 ... 1 1 1]


- then we compute the cumulative sum of random numers

In [101]:
walk = steps.cumsum()
print(walk)
print(steps.sum())

[  1   2   3 ... -62 -61 -60]
-60


The cumulative sum on an array, sums all values in cells before a given position.

In [102]:
%matplotlib notebook
plt.plot(walk)
plt.grid()
plt.xlabel('step')
plt.ylabel('position')



<IPython.core.display.Javascript object>

Text(0, 0.5, 'position')


finding information about the walk and doing analysis is now trivial.

The maximum position is

In [103]:
print(walk.max())
print(walk.argmax())
print(len(walk))

18
285
10000


`argmax()` provides the first index where the `max()` value has occured. Similarly for the minimum

In [104]:
print(walk.min())
print(walk.argmin())
print(len(walk))

-126
6323
10000


But we want to find the location (positive or negative) with the largest distance. This can be easily done with vector operations

In [105]:
print(np.abs(walk))
print(np.abs(walk).max())

[ 1  2  3 ... 62 61 60]
126


In [106]:
print(np.abs(walk).argmax())

6323


We can also easily find the time at which we cross a certain position. For example we want to find when the position is back to origin. This is done using the boolean arrays

In [107]:
np.abs(walk) == 0

array([False, False, False, ..., False, False, False])

In [108]:
(np.abs(walk)==0).max()

True

in a boolean array `max()` corresponds to `True` value. So using `argmax()` we can find when for the first time the particle crosses again the origin

In [109]:
(np.abs(walk)==0).argmax()

11

## Simulating many random walks at once

We now want to simulate `nexp` random walks and study statistics about number of crossings, maximum distance, etc. Instead of using nested loops, we can simplhy use 2D array to keep track of steps for `nexp` experiments.

In fact the only place we change the code, is the extraction of random numbers to decide direction of the random walk.

In [110]:
import numpy as np
import matplotlib.pyplot as plt

nsteps = 1000
nexp = 1000

draws = np.random.randint(0,2, size=(nexp,nsteps) )
print(draws.shape)

steps = np.where(draws>0, 1, -1)
print(steps.shape)


walks = steps.cumsum(1)
print(walks.shape)

(1000, 1000)
(1000, 1000)
(1000, 1000)


By calling `cumsum(1)` we summing over the 1-th dimensions, which are the columns. Recall that each row represents and experiment. columns are the draws or the steps for a given experiment. So by summing the steps over the columns, we obtain the random walk for each experiment.

In [111]:
walks

array([[ -1,  -2,  -3, ..., -18, -17, -16],
       [ -1,  -2,  -3, ..., -36, -35, -34],
       [ -1,  -2,  -3, ...,   2,   3,   2],
       ...,
       [ -1,   0,  -1, ..., -26, -25, -24],
       [  1,   2,   3, ...,  30,  29,  28],
       [  1,   2,   3, ...,  22,  23,  22]])

The maximum distance ever reached in all experiments is 

In [112]:
np.abs(walks).max()

104

We can also find out the maximum distance for each experiment. Thjis is done by using the [`numpy.amax`](https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.amax.html) function. We want to find the maximum along columns (axis =1 ) for each experiment (row)

In [114]:
max_experiment = np.amax( np.abs(walks),1   )
print(max_experiment.shape)
print(max_experiment)

(1000,)
[ 46  67  37  31  43  56  59  62  65  32  59  63  69  21  69  50  32  37
  16  22  83  30  22  34  36  31  24  26  27  33  44  42  63  55  20  33
  36  37  73  26  32  45  36  31  69  27  21  59  58  47  12  37  33  40
  19  35  56  55  31  72  62  30  18  39  25  41  27  29  25  42  27  80
  74  34  56  28  67  92  46  32  35  23  31  51  25  63  50  39  22  29
  70  54  35  28  38  38  36  52  33  33  60  48  43  50  31  27  37  62
  57  38  28  38  36  54  35  53  24  32  25  49  31  22  34  29  33  47
  55  26  49  48  36  38  28  18  52  20  30  32  23  36  41  31  33  43
  36  46  26  36  47  62  37  35  52  44  32  40  26  58  29  33  49  47
  22  34  41  33  27  68  26  37  25  60  28  59  39  70  37  50  38  39
  34  47  65  25  22  56  44  37  42  34  66  74  44  53  82  34  39  54
  37  50  32  28  43  86  55  23  30  34  29  46  80  43  58  31  26  47
  42  41  30  24  18  20  25  62  57  56  80  23  48  17  20  53  46  43
  30  29  17  35  27  26  24  42  39  36  2

For sanity check we see that the maximum distance across experiments is

In [120]:
print("max ever: ",max_experiment.max()," in experiment : ", max_experiment.argmax())

max ever:  104  in experiment :  410


which occured in this experiment

In [121]:
max_experiment.argmax()

410

Similarly the smallest maximum distance ever reached is

In [122]:
max_experiment.min()

12

Finally we make a histogram of the maximum distance ever reached. Wd use
- `numpy.amax` funnction to compute the max for each experiment
- `set` to create unique list of distances reached
- `list` and `count` to compute frequency for each max distance- a dictionary to store the frequency for each max distance


In [123]:
max_dict = { i:list(max_experiment).count(i)   for i in set(max_experiment)  }
    
%matplotlib notebook
import matplotlib.pyplot as plt
plt.bar( list(max_dict.keys()), list(max_dict.values()), color='blue' ) 
plt.grid()
plt.xlabel('maximum distance')
plt.ylabel('experiments')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'experiments')

### Computig crossing of a position

With booleans arrays we can easily check if the walk ever goes back  to the origin or any given position

In [124]:
hits_home = (np.abs(walks)==0).any(1)


Similarly we can check how many times a certain distance has been reached

In [125]:
hits_x = (np.abs(walks)==30).any(1)
print(hits_x.sum())

681


Plot a given experiment

In [127]:
%matplotlib notebook
plt.plot(walks[np.random.randint(0,nexp),:])
plt.grid()
plt.xlabel('step')
plt.ylabel('position')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'position')

Plot the experiment with the smallest maximum distance

In [128]:
%matplotlib notebook
plt.plot(walks[max_experiment.argmin(),:])
plt.grid()
plt.xlabel('step')
plt.ylabel('position')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'position')

Plot the experiment with the largest maximum distance

In [129]:
%matplotlib notebook
plt.plot(walks[max_experiment.argmax(),:])
plt.grid()
plt.xlabel('step')
plt.ylabel('position')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'position')

### Exercise
- use animation to show the experimets in sequence
- simulate the random walk in 2D and 3D and compute the fraction of experiments going back to the origin or reaching an arbitrary distance from the origin

# I/O with files 

Today we cover some practical aspects of python that make it a great language for scripting. While the same is possibile with bash and other shells, the OOP aspect of python and its high level semantics make it very easy for a beginner to write their first scripts.

- Input/Output with files
  - with statement
  - parsing lines
  - lines as lists
  - splitting lines into lists
  
- json format for storage
  - example of json usage
  - storing python objects and reading them back
  - storing custom python objects
   [custom serialize](https://realpython.com/python-json/)
  
- functions with variable number of arguments
  - example of printf


# File handling

Basic I/O with files is almost identical to C, at first.

- You need to open a file object on disk before writing information into it
- Opening a file can fail
  - location does not exist
  - no write privilege for the location

By default a file is opened in **read** mode

In [1]:
fname = '/tmp/data.txt'
f = open(fname)

You must specify the **write** mode to store data in a file in output

In [134]:
fname = '/tmp/data.txt'

f = open(fname,mode='w')

In this case the user does not have write permissions in `/`.

Possible modes are:
```
========= ===============================================================
Character Meaning
--------- ---------------------------------------------------------------
'r'       open for reading (default)
'w'       open for writing, truncating the file first
'x'       create a new file and open it for writing
'a'       open for writing, appending to the end of the file if it exists
'b'       binary mode
't'       text mode (default)
'+'       open a disk file for updating (reading and writing)
========= ===============================================================
```
So by default a text file is opened in read mode.

We now store some values in a file to simulate data.

It is important to close the file to make sure all data are flushed from memory to disk and the file handle closed properly

In [137]:
import os 

fname = 'data.txt'
f = open(fname,'w')

f.write('first file in python\n')
    
f.close()

os.listdir()

['lec20.ipynb', '.ipynb_checkpoints', 'data.txt']

## getting rid of `close()`

To make it less C-like and feel more like python we can get rid of `close()` is usinng the `with` statement

In [140]:
fname = 'data2.txt'
with open(fname,'w') as ofile:
  ofile.write('first file in python\n')

os.listdir()

['lec20.ipynb', '.ipynb_checkpoints', 'data2.txt', 'data.txt']

`with` makes sure that ofile is an open file handle in the `with` scope. Once it ends you can no more use the handle, because `close()` has been called autmatically

## Storing lists and multiple values

You can use the C-style output to format and store elements of a list

In [141]:
import random

nevents = 3

fname = 'data1.txt'
with open(fname,'w') as f:
    for i in range(nevents):
        measurements = [ random.random() for j in range(10) ]
        for val in measurements:
            f.write("%.3f\t"%val)
        f.write('\n')


A more python-style is to use a new `writelines()` function and comprehensions

In [142]:
import random

nevents = 3

fname = 'data2.txt'
with open(fname,'w') as f:
    for i in range(nevents):
        measurements = [ random.random() for j in range(10) ]
        f.writelines("%.3f\t"%val for val in measurements)
        f.write('\n')

which can be further reduced

In [143]:
import random

nevents = 3

fname = 'data2.txt'
with open(fname,'w') as f:
    for i in range(nevents):
        f.writelines("%.3f\t"%val for val in [ random.random() for j in range(10) ] )
        f.write('\n')


## Input from file

A file can be read in a single string and then split into lines and columns. 

In [147]:
fname = 'data2.txt'

f = open(fname)
file = f.read()
print(file)

vals = file.split()
print(vals)
print(type(vals))

0.412	0.772	0.112	0.171	0.405	0.243	0.861	0.022	0.780	0.437	
0.354	0.520	0.522	0.084	0.210	0.814	0.211	0.246	0.088	0.971	
0.820	0.370	0.696	0.526	0.085	0.971	0.485	0.449	0.141	0.415	

['0.412', '0.772', '0.112', '0.171', '0.405', '0.243', '0.861', '0.022', '0.780', '0.437', '0.354', '0.520', '0.522', '0.084', '0.210', '0.814', '0.211', '0.246', '0.088', '0.971', '0.820', '0.370', '0.696', '0.526', '0.085', '0.971', '0.485', '0.449', '0.141', '0.415']
<class 'list'>


You could also read the file as a list of iines, each line marked by a newline `\n`

In [148]:
fname = 'data2.txt'
lines = [l for l in open(fname)]
print(lines)

['0.412\t0.772\t0.112\t0.171\t0.405\t0.243\t0.861\t0.022\t0.780\t0.437\t\n', '0.354\t0.520\t0.522\t0.084\t0.210\t0.814\t0.211\t0.246\t0.088\t0.971\t\n', '0.820\t0.370\t0.696\t0.526\t0.085\t0.971\t0.485\t0.449\t0.141\t0.415\t\n']


However you note that you have `\t` and `\n` as part of the strigf being read in! Fixing this is easy

In [149]:
fname = 'data2.txt'
lines = [l.strip() for l in open(fname)]
print(lines)

['0.412\t0.772\t0.112\t0.171\t0.405\t0.243\t0.861\t0.022\t0.780\t0.437', '0.354\t0.520\t0.522\t0.084\t0.210\t0.814\t0.211\t0.246\t0.088\t0.971', '0.820\t0.370\t0.696\t0.526\t0.085\t0.971\t0.485\t0.449\t0.141\t0.415']


this has removed the `\n`. We now split each line using `\t` as the separator

In [150]:
fname = 'data2.txt'
lines = [l.strip() for l in open(fname)]
data = [ l.split('\t') for l in lines ]
print(data)
print(data[2:])

[['0.412', '0.772', '0.112', '0.171', '0.405', '0.243', '0.861', '0.022', '0.780', '0.437'], ['0.354', '0.520', '0.522', '0.084', '0.210', '0.814', '0.211', '0.246', '0.088', '0.971'], ['0.820', '0.370', '0.696', '0.526', '0.085', '0.971', '0.485', '0.449', '0.141', '0.415']]
[['0.820', '0.370', '0.696', '0.526', '0.085', '0.971', '0.485', '0.449', '0.141', '0.415']]


even more concisely

In [151]:
fname = 'data2.txt'
data = [ l.split('\t') for l in [line.strip() for line in open(fname)] ]
print(data)

[['0.412', '0.772', '0.112', '0.171', '0.405', '0.243', '0.861', '0.022', '0.780', '0.437'], ['0.354', '0.520', '0.522', '0.084', '0.210', '0.814', '0.211', '0.246', '0.088', '0.971'], ['0.820', '0.370', '0.696', '0.526', '0.085', '0.971', '0.485', '0.449', '0.141', '0.415']]


### exercise
- change the separator and use `,` or `:` to store and read back data files

## Storing Lists, Dicts, and Tuples

As you have seen with the example above, there is no automatic writing of objects. So for a dictionary you need to take care of formatting the output file. 

In [152]:
import random

datum = {'val':-1.1, 'err':0.2}

fname = 'data4.txt'

with open(fname,'w') as f:
    f.writelines("%s\t"%v for v in datum.keys())
    f.write('\n')
    for i in range(10):
        datum['val'] = random.uniform(-3.,3.)
        datum['err'] = random.normalvariate(0., 0.2)
        f.writelines("%.3f\t"%val for val in datum.values() )
        f.write('\n')


### Exercise
- use a dictionary to store data for 3 keys of different type
- store 100 dictionary instances in file
- read back and populate dictionary objects from file

## Storing NumPy objects

NumPy provides built-in functions to easily store and read ndarrays in binary and text format  without iterating over each element

In [153]:
import numpy as np
import os

matrix = np.random.randn(100,10)

fname = 'npdata1'
np.save(fname+'.npy', matrix)
np.savetxt(fname+'.txt', matrix)


Reading the file is also simple with `load()`

In [154]:
vals = np.load(fname+'.npy')

print(vals.shape)
print(vals[:1,])

(100, 10)
[[-2.03735386  1.26985234  1.11511253 -0.16664728  0.7090075   0.40143328
   0.33311204 -0.54883975 -0.23908751  0.14522125]]


The [Python Data Analysis Library (pandas)](http://pandas.pydata.org) provides even more efficient tools and data formats to handle data for analysis and their storage to file.

## Data storage with pickle and JSON 

With NumPy we saw the first example of using the binary format to easy store an array.

Previously we had oly saved data in text files by iterating over elements of lists and dictionaries.

Python provides a built-in [pickle]() library for easy storage of lists and other built-in python objects in binary format. 

In [155]:
import random
import pickle
import os

datum = {'val':-1.1, 'err':0.2}

fname = 'pickle1.data'
with open(fname,'wb') as f:
    pickle.dump(datum,f)

os.listdir()

['npdata1.npy',
 'pickle1.data',
 'lec20.ipynb',
 '.ipynb_checkpoints',
 'data1.txt',
 'data2.txt',
 'npdata1.txt',
 'data.txt',
 'data4.txt']

Readig back is also easy

In [156]:

fname = 'pickle1.data'
with open(fname,'rb') as f:
    indata = pickle.load(f)

print(indata)


{'val': -1.1, 'err': 0.2}


## JSON 

However, a commonly used format for data storage that is cross platform and cross language is [JSON (JavaScript Object Notation](https://www.json.org).

The JSON librray in python allows you to convert python objects (including your custom classes) into JSON for storage.

Converting or enconding an object into JSON is commonly called **serialization**. Converting from JSON to python objects is referred to as **deserialization**. For  more details and introduction see this nice webpage on [working with JSON](https://realpython.com/python-json/). 

Here is an example of dictionary and list stored in JSON files.

There are two functions commonly used
- `dump()`: convert an object into JSON and possibly write to file
- `dumps()` note the extra **s**: converto to JSON string but cannot interact with file
The two functions are identical except for the file interaction.

In [157]:
import json
import os

datum = {'val':-1.1, 'err':0.2}

x = json.dumps(datum)
print(x)

data = [z for z in range(10)]
y = json.dumps(data)

print(y)

with open('data.json','w') as of:
    json.dump([datum, data], of)
 
os.listdir()

{"val": -1.1, "err": 0.2}
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


['npdata1.npy',
 'pickle1.data',
 'data.json',
 'lec20.ipynb',
 '.ipynb_checkpoints',
 'data1.txt',
 'data2.txt',
 'npdata1.txt',
 'data.txt',
 'data4.txt']

Now we read back or deserialize the data from file

In [158]:
with open('data.json') as infile:
    indata = json.load(infile)
print(indata)
datum = indata[0]
data = indata[1]
print(datum, data)

[{'val': -1.1, 'err': 0.2}, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]
{'val': -1.1, 'err': 0.2} [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


# Functions with arbitrary number of arguments

As you have seen, `print()` function can have a variable number of arguments. The same behaviour can easily be defined for any custom defined function for both positioal and keyword arguments

## positional arguments

Additional arguments are taken via the special `*arg` argument which is a tuple of additional positional arguments

In [160]:
def myfunc(a, *arg):
    print("positional arguments: %s %s"%(a,arg))
    if len(arg):
        for x in arg:
            print('[%s]\t'%x)
        print('\n')

myfunc(1.1)
myfunc('ciao')
myfunc(-0.2, 0.3, 'ciao')
myfunc(-0.2, 0.3, 'ciao', 'hello', -2, 100)

positional arguments: 1.1 ()
positional arguments: ciao ()
positional arguments: -0.2 (0.3, 'ciao')
[0.3]	
[ciao]	


positional arguments: -0.2 (0.3, 'ciao', 'hello', -2, 100)
[0.3]	
[ciao]	
[hello]	
[-2]	
[100]	




## keyword arguments

For optional keyword arguments the `**kargs` feature is used

In [161]:
def myf2(a,mu=0.0, sig=0.1, **karg):
    print("a: %s"%(a))
    print("keyword arguments: %s %s %s"%(mu,sig,karg))
    if len(karg):
        for x in karg:
            print('[%s]\t'%x)
            
        print('\n')
myf2(0.1)
myf2(0.3, sig=0.5)
myf2(0.3, color='red')
myf2(0.3, color='red', mu=0.6)

a: 0.1
keyword arguments: 0.0 0.1 {}
a: 0.3
keyword arguments: 0.0 0.5 {}
a: 0.3
keyword arguments: 0.0 0.1 {'color': 'red'}
[color]	


a: 0.3
keyword arguments: 0.6 0.1 {'color': 'red'}
[color]	




The additional keyword arguments are stored as a dictionary.

In [None]:
def myf3(a,mu=0.0, sig=0.1, **karg):
    print("a: %s"%(a))
    print("keyword arguments: %s %s %s"%(mu,sig,karg))
    if len(karg):
        for x in karg.keys():
            print('[%s = %s]\t'%(x, karg[x]))
        print('\n')
myf3(0.1)
myf3(0.3, color='red', mu=0.6)

You can also combine both positional and keyword arguments for the most generic function

In [162]:
def myf4(a,*arg, mu=0.0, sig=0.1, **karg):
    print("function called")
    print("positional a: %s %s"%(a,arg))
    if len(arg):
        for x in arg:
            print('[%s]\t'%x)
        print('\n')
    print("keyword: %s %s %s"%(mu,sig,karg))    
    if len(karg):
        for x in karg.keys():
            print('[%s = %s]\t'%(x, karg[x]))
        print('\n')
    print('\n')
myf4(-0.1)
myf4(-0.1,10.1)
myf4(-0.1,mu=10.1)

function called
positional a: -0.1 ()
keyword: 0.0 0.1 {}


function called
positional a: -0.1 (10.1,)
[10.1]	


keyword: 0.0 0.1 {}


function called
positional a: -0.1 ()
keyword: 10.1 0.1 {}




In [163]:
myf4(0.3,'x','y', 0.9, color='red', mu=0.6, thick=1.1, fill='true')

function called
positional a: 0.3 ('x', 'y', 0.9)
[x]	
[y]	
[0.9]	


keyword: 0.6 0.1 {'color': 'red', 'thick': 1.1, 'fill': 'true'}
[color = red]	
[thick = 1.1]	
[fill = true]	






## Command line arguments for python programs

The sys module gives easy access to command line arguments as a list. An example is in [app1.py](examples/app1.py)

In [None]:
# %load examples/app1.py
import sys, os

print("Running "+__file__)

print("Running "+os.path.basename(__file__))


print("program called with %d arguments"%len(sys.argv))

for a in sys.argv:
    print(a)
