<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Python for Finance (2nd ed.)

**Mastering Data-Driven Finance**

&copy; Dr. Yves J. Hilpisch | The Python Quants GmbH

<img src="http://hilpisch.com/images/py4fi_2nd_shadow.png" width="300px" align="left">

# Numerical Computing with NumPy

## Python Lists

In [77]:
v = [0.5, 0.75, 1.0, 1.5, 2.0]  

In [78]:
m = [v, v, v]  
m  

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

In [79]:
m[1]

[0.5, 0.75, 1.0, 1.5, 2.0]

In [80]:
m[1][0]

0.5

In [81]:
v1 = [0.5, 1.5]
v2 = [1, 2]
m = [v1, v2]
c = [m, m]  
c

[[[0.5, 1.5], [1, 2]], [[0.5, 1.5], [1, 2]]]

In [82]:
c[1][1][0]

1

In [83]:
v = [0.5, 0.75, 1.0, 1.5, 2.0]
m = [v, v, v]
m

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

In [98]:
v = [0.5, 0.75, 1.0, 1.5, 2.0]

vv =  [v]*3
vv

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

In [99]:
v[0] = 'Python'
vv

[['Python', 0.75, 1.0, 1.5, 2.0],
 ['Python', 0.75, 1.0, 1.5, 2.0],
 ['Python', 0.75, 1.0, 1.5, 2.0]]

In [100]:
from copy import deepcopy
v = [0.5, 0.75, 1.0, 1.5, 2.0]
print(deepcopy(v))
m = 3 * [deepcopy(v), ]  
m



[0.5, 0.75, 1.0, 1.5, 2.0]


[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

In [101]:
v[0] = 'Python'  
m  

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

### Python Array Class

In [102]:
v = [0.5, 0.75, 1.0, 1.5, 2.0]

In [103]:
import array

In [104]:
a = array.array('f', v)  
a

array('f', [0.5, 0.75, 1.0, 1.5, 2.0])

In [105]:
a.append(0.5)  
a

array('f', [0.5, 0.75, 1.0, 1.5, 2.0, 0.5])

In [106]:
a.extend([5.0, 6.75])  
a

array('f', [0.5, 0.75, 1.0, 1.5, 2.0, 0.5, 5.0, 6.75])

In [107]:
2 * a  

array('f', [0.5, 0.75, 1.0, 1.5, 2.0, 0.5, 5.0, 6.75, 0.5, 0.75, 1.0, 1.5, 2.0, 0.5, 5.0, 6.75])

In [108]:
# causes intentional error
# a.append('string')  

In [109]:
a.tolist()  

[0.5, 0.75, 1.0, 1.5, 2.0, 0.5, 5.0, 6.75]

In [19]:
f = open('array.apy', 'wb')  
a.tofile(f)  
f.close()  

In [20]:
with open('array.apy', 'wb') as f:  
    a.tofile(f)  

In [21]:
!ls -n arr*  

-rw-r--r--  1 501  20  32 Jul 27 10:48 array.apy


In [22]:
b = array.array('f')  

In [23]:
with open('array.apy', 'rb') as f:  
    b.fromfile(f, 5)  

In [24]:
b  

array('f', [0.5, 0.75, 1.0, 1.5, 2.0])

In [25]:
b = array.array('d')  

In [26]:
with open('array.apy', 'rb') as f:
    b.fromfile(f, 2)  

In [27]:
b  

array('d', [0.0004882813645963324, 0.12500002956949174])

## NumPy Arrays

### The Basics

In [110]:
import numpy as np  

In [114]:
#we can convert any iterable into numpy arrays.
a = np.array([0, 0.5, 1.0, 1.5, 2.0])  
a

b = np.array({3:3,4:65,6:4,3:'df'})
print(a, b)

[0.  0.5 1.  1.5 2. ] {3: 'df', 4: 65, 6: 4}


In [118]:
print(type(a), type(b))

<class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [119]:
a = np.array(['a', 'b', 'c'])  
a

array(['a', 'b', 'c'], dtype='<U1')

In [123]:
a = np.arange(2, 20, 2)  
print(a)

#this is equivalent to
lst = []
for i in range(2,20,2):
    lst.append(i)

print(np.array(lst))

#this is equivalent to
lst2 = np.array([i for i in range(2, 20, 2)])
print(lst2)

[ 2  4  6  8 10 12 14 16 18]
[ 2  4  6  8 10 12 14 16 18]
[ 2  4  6  8 10 12 14 16 18]


In [125]:
a = np.arange(8, dtype=np.float)  

print(a)
np_array = np.arange(20, dtype = np.float)

print(np_array)

[0. 1. 2. 3. 4. 5. 6. 7.]
[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.
 18. 19.]


In [126]:
a[5:]  

array([5., 6., 7.])

In [127]:
a[:2]  

array([0., 1.])

In [128]:
a.sum()  

28.0

In [129]:
a.std()  

2.29128784747792

In [130]:
a.cumsum()  

array([ 0.,  1.,  3.,  6., 10., 15., 21., 28.])

In [131]:
l = [0., 0.5, 1.5, 3., 5.]
2 * l  

[0.0, 0.5, 1.5, 3.0, 5.0, 0.0, 0.5, 1.5, 3.0, 5.0]

In [132]:
a

array([0., 1., 2., 3., 4., 5., 6., 7.])

In [135]:
print(2 * a)  
np.concatenate([a]*3)

[ 0.  2.  4.  6.  8. 10. 12. 14.]


array([0., 1., 2., 3., 4., 5., 6., 7., 0., 1., 2., 3., 4., 5., 6., 7., 0.,
       1., 2., 3., 4., 5., 6., 7.])

In [136]:
a ** 2  

array([ 0.,  1.,  4.,  9., 16., 25., 36., 49.])

In [137]:
2 ** a  

array([  1.,   2.,   4.,   8.,  16.,  32.,  64., 128.])

In [138]:
a ** a  

array([1.00000e+00, 1.00000e+00, 4.00000e+00, 2.70000e+01, 2.56000e+02,
       3.12500e+03, 4.66560e+04, 8.23543e+05])

In [139]:
np.exp(a)  

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03])

In [140]:
np.sqrt(a)  

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131])

In [141]:
%timeit xx = np.sqrt(2.5)  

995 ns ± 73.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [142]:
import math

%timeit xx = math.sqrt(2.5)  

160 ns ± 18.5 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [37]:
import math  

In [49]:
math.sqrt(2.5)  

1.5811388300841898

In [144]:
# causes intentional error
#math.sqrt(a)  

In [38]:
%timeit np.sqrt(2.5)  

1.27 µs ± 77.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [39]:
%timeit math.sqrt(2.5)  

238 ns ± 64.5 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


### Multiple Dimensions

In [145]:
b = np.array([a, a * 2])  
b

array([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
       [ 0.,  2.,  4.,  6.,  8., 10., 12., 14.]])

In [146]:
b[0]  

array([0., 1., 2., 3., 4., 5., 6., 7.])

In [147]:
b[0][2]

2.0

In [148]:
b[0, 2]  

2.0

In [149]:
b[:, 1]  

array([1., 2.])

In [151]:
x1 = b.sum()  
x2 = b[1].sum()
print(x1, x2)

84.0 56.0


In [152]:
b.sum(axis=0)  

array([ 0.,  3.,  6.,  9., 12., 15., 18., 21.])

In [153]:
b.sum(axis=1)  

array([28., 56.])

fill values to an array template

In [154]:
c = np.zeros((2, 3), dtype='i', order='C')  
c

array([[0, 0, 0],
       [0, 0, 0]], dtype=int32)

In [156]:
c = np.zeros((2, 3, 4))  
c

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]])

In [49]:
c = np.ones((2, 3, 4), dtype='i', order='C')  
c

array([[[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]]], dtype=int32)

In [157]:
c = np.ones((2, 3, 4))
c

array([[[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]],

       [[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]])

In [62]:
d = np.zeros_like(c, dtype='f16', order='C')  
d

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]], dtype=float128)

In [63]:
d = np.ones_like(c, dtype='f16', order='C')  
d

array([[[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]],

       [[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]], dtype=float128)

In [64]:
e = np.empty((2, 3, 2))  
e

array([[[-1.28822975e-231, -1.28822975e-231],
        [ 3.45845952e-323,  0.00000000e+000],
        [ 0.00000000e+000,  0.00000000e+000]],

       [[-1.28822975e-231, -1.28822975e-231],
        [ 1.97626258e-323,  0.00000000e+000],
        [ 0.00000000e+000,  0.00000000e+000]]])

In [160]:
f = np.zeros_like(c)  
f[1][1]

array([0., 0., 0., 0.])

In [76]:
np.eye(5)  
np.eye(12)

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [200]:
g = np.linspace( [2, 244, 23],[4,3,222],20)  #geneate 20 values to fill in an array, the starting value is [2, 244, 23], the ending avlue is [4,3,222].
print(g)

[[  2.         244.          23.        ]
 [  2.10526316 231.31578947  33.47368421]
 [  2.21052632 218.63157895  43.94736842]
 [  2.31578947 205.94736842  54.42105263]
 [  2.42105263 193.26315789  64.89473684]
 [  2.52631579 180.57894737  75.36842105]
 [  2.63157895 167.89473684  85.84210526]
 [  2.73684211 155.21052632  96.31578947]
 [  2.84210526 142.52631579 106.78947368]
 [  2.94736842 129.84210526 117.26315789]
 [  3.05263158 117.15789474 127.73684211]
 [  3.15789474 104.47368421 138.21052632]
 [  3.26315789  91.78947368 148.68421053]
 [  3.36842105  79.10526316 159.15789474]
 [  3.47368421  66.42105263 169.63157895]
 [  3.57894737  53.73684211 180.10526316]
 [  3.68421053  41.05263158 190.57894737]
 [  3.78947368  28.36842105 201.05263158]
 [  3.89473684  15.68421053 211.52631579]
 [  4.           3.         222.        ]]


### Meta-Information

In [178]:
g.size  

150

In [180]:
g.itemsize  #the number of bytes to represent an element.

8

In [179]:
g.ndim  

2

In [188]:
g.shape  

(50, 3)

In [190]:
g.dtype  

dtype('float64')

In [191]:
g.nbytes  

1200

### Reshaping, Resizing, Stacking, Flattening

In [202]:
g = np.arange(15)

In [203]:
g

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [215]:
print('{} {}'.format(type(g.shape), g.shape ))
np.ndim(g.shape)

<class 'tuple'> (15,)


1

In [218]:
np.shape(g) 


(15,)

In [219]:
g.reshape((3, 5))  

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [220]:
h = g.reshape((5, 3))  
h

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [221]:
h.T  

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

In [222]:
h.transpose()  

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

In [223]:
g

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [224]:
np.resize(g, (3, 1))  #resize the numpy array into an array of (3,1) dimension, this leads to truncatation of the first three elements.

array([[0],
       [1],
       [2]])

In [225]:
np.resize(g, (1, 5))  

array([[0, 1, 2, 3, 4]])

In [226]:
np.resize(g, (2, 5))  

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [227]:
n = np.resize(g, (5, 4))  #this leads to data (duplilcation)
n

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14,  0],
       [ 1,  2,  3,  4]])

In [228]:
h

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [229]:
np.hstack((h, 2 * h))  

array([[ 0,  1,  2,  0,  2,  4],
       [ 3,  4,  5,  6,  8, 10],
       [ 6,  7,  8, 12, 14, 16],
       [ 9, 10, 11, 18, 20, 22],
       [12, 13, 14, 24, 26, 28]])

In [230]:
np.vstack((h, 0.5 * h))  

array([[ 0. ,  1. ,  2. ],
       [ 3. ,  4. ,  5. ],
       [ 6. ,  7. ,  8. ],
       [ 9. , 10. , 11. ],
       [12. , 13. , 14. ],
       [ 0. ,  0.5,  1. ],
       [ 1.5,  2. ,  2.5],
       [ 3. ,  3.5,  4. ],
       [ 4.5,  5. ,  5.5],
       [ 6. ,  6.5,  7. ]])

In [231]:
h

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [232]:
h.flatten()  

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [92]:
h.flatten(order='C')  
h.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [93]:
h.flatten

array([ 0,  3,  6,  9, 12,  1,  4,  7, 10, 13,  2,  5,  8, 11, 14])

In [239]:
for i in h.flat:  #this is equivalent to h.flatten()
    print(i, end=',')

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,

In [95]:
for i in h.ravel(order='C'):  #this is equivalent to h.flatten()
    print(i, end=',')

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,

In [96]:
for i in h.ravel(order='F'):  
    print(i, end=',')

0,3,6,9,12,1,4,7,10,13,2,5,8,11,14,

### Boolean Arrays

In [240]:
h

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [241]:
h > 8  

array([[False, False, False],
       [False, False, False],
       [False, False, False],
       [ True,  True,  True],
       [ True,  True,  True]])

In [248]:
xx  = h > 8
print(type(xx.astype(int)))
xx.astype(int)

<class 'numpy.ndarray'>


array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [1, 1, 1],
       [1, 1, 1]])

In [99]:
h <= 7  

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True, False],
       [False, False, False],
       [False, False, False]])

In [100]:
h == 5  

array([[False, False, False],
       [False, False,  True],
       [False, False, False],
       [False, False, False],
       [False, False, False]])

In [246]:
(h == 5).astype(int)  

array([[0, 0, 0],
       [0, 0, 1],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [102]:
(h > 4) & (h <= 12)  

array([[False, False, False],
       [False, False,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True, False, False]])

In [103]:
h[h > 8]  

array([ 9, 10, 11, 12, 13, 14])

In [104]:
h[(h > 4) & (h <= 12)]  

array([ 5,  6,  7,  8,  9, 10, 11, 12])

In [105]:
h[(h < 4) | (h >= 12)]  

array([ 0,  1,  2,  3, 12, 13, 14])

In [250]:
h

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [249]:
np.where(h > 7, 1, 0)  

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [251]:
np.where(h % 2 == 0, 'even', 'odd')  

array([['even', 'odd', 'even'],
       ['odd', 'even', 'odd'],
       ['even', 'odd', 'even'],
       ['odd', 'even', 'odd'],
       ['even', 'odd', 'even']], dtype='<U4')

In [252]:
np.where(h <= 7, h * 2, h / 2)  

array([[ 0. ,  2. ,  4. ],
       [ 6. ,  8. , 10. ],
       [12. , 14. ,  4. ],
       [ 4.5,  5. ,  5.5],
       [ 6. ,  6.5,  7. ]])

### Speed Comparison

In [267]:
import random
I = 100

In [268]:
%time mat = [[random.gauss(0, 1) for j in range(I)] \
             for i in range(I)]  

Wall time: 12 ms


In [270]:
x = [[random.gauss(0, 1) for j in range(I)] \
             for i in range(I)]  
x

450067,
  -0.1887457272100561,
  -0.9952286328221497,
  1.186261185646235,
  0.013512474561835653,
  -0.6462241414696211,
  -0.40484580597091135,
  -0.26021731248986574,
  0.9575172539033012,
  1.883586735842421,
  1.6294484293685683,
  0.14894070537316817,
  0.0525865529503906,
  0.16607736279869384,
  1.162477172389404,
  1.0096252734202331,
  -0.3897371529579834,
  0.02623371174569361,
  1.3964865200869432,
  -0.7533310998211108,
  -1.3487685278005785,
  -0.10399948714490509,
  -1.5385551740605967,
  0.17649063935837372,
  0.30830490290267626,
  -1.160818083459106,
  0.26701082498963,
  0.6581392482683764,
  -1.21807832882264,
  0.3383902965785359,
  -0.04685463917049732,
  -0.4058096995323263,
  0.5617139750649072,
  -0.6228095806304365,
  -0.05228757578637155,
  0.3943430612080341,
  -2.088853769967864,
  0.9064154103239848,
  -0.9591800696702701,
  0.36607852077468744,
  2.229944727480803,
  0.39561925367356715,
  -0.4050880972160935,
  0.14323176070069987,
  -0.5846566018098507,

In [284]:
I = 3

[[random.gauss(0, 1) for j in range(I)] \
             for i in range(I)]  

[[0.2285980928765402, -1.0118828838817524, -0.12739172904644078],
 [0.09404932967606248, -1.8116498636954337, -1.529092076910532],
 [-0.22909765101388077, -0.14791964528068605, 0.1166328734843664]]

In [285]:
mat[0][:3]  

[0.2971878126448497, -1.323097072782225, 0.3435870562624847]

In [286]:
%time sum([sum(l) for l in mat])  

Wall time: 0 ns


205.8840300324405

In [287]:
import sys
sum([sys.getsizeof(l) for l in mat])  

90400

In [114]:
%time mat = np.random.standard_normal((I, I))  

CPU times: user 1.16 s, sys: 172 ms, total: 1.33 s
Wall time: 1.33 s


In [115]:
%time mat.sum()  

CPU times: user 30.8 ms, sys: 1.37 ms, total: 32.1 ms
Wall time: 30.4 ms


-5107.888370721598

In [116]:
mat.nbytes  

200000000

In [117]:
sys.getsizeof(mat)  

200000112

### Structured Arrays, so that we no longer require all elements in a numpy array to be of the same type - we only require all elements in the same columnn of the numpy array to be of the same type.

In [288]:
dt = np.dtype([('Name', 'S10'), ('Age', 'i4'),
               ('Height', 'f'), ('Children/Pets', 'i4', 2)])  

In [289]:
dt  

dtype([('Name', 'S10'), ('Age', '<i4'), ('Height', '<f4'), ('Children/Pets', '<i4', (2,))])

In [290]:
dt = np.dtype({'names': ['Name', 'Age', 'Height', 'Children/Pets'],
             'formats':'O int float int,int'.split()})  

In [291]:
dt  

dtype([('Name', 'O'), ('Age', '<i4'), ('Height', '<f8'), ('Children/Pets', [('f0', '<i4'), ('f1', '<i4')])])

In [292]:
s = np.array([('Smith', 45, 1.83, (0, 1)),
              ('Jones', 53, 1.72, (2, 2))], dtype=dt)  

In [293]:
s  

array([('Smith', 45, 1.83, (0, 1)), ('Jones', 53, 1.72, (2, 2))],
      dtype=[('Name', 'O'), ('Age', '<i4'), ('Height', '<f8'), ('Children/Pets', [('f0', '<i4'), ('f1', '<i4')])])

In [294]:
type(s)  

numpy.ndarray

In [295]:
s['Name']  

array(['Smith', 'Jones'], dtype=object)

In [296]:
s['Height'].mean()  

1.775

In [297]:
s[0]  

('Smith', 45, 1.83, (0, 1))

In [298]:
s[1]['Age']  

53

## Vectorization of Code

In [299]:
np.random.seed(100)
r = np.arange(12).reshape((4, 3))  
s = np.arange(12).reshape((4, 3)) * 0.5  

In [300]:
r  

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [301]:
s  

array([[0. , 0.5, 1. ],
       [1.5, 2. , 2.5],
       [3. , 3.5, 4. ],
       [4.5, 5. , 5.5]])

In [302]:
r + s  

array([[ 0. ,  1.5,  3. ],
       [ 4.5,  6. ,  7.5],
       [ 9. , 10.5, 12. ],
       [13.5, 15. , 16.5]])

In [308]:
#np.concatenate([r,s])
np.hstack((r,s))

array([[ 0. ,  1. ,  2. ,  0. ,  0.5,  1. ],
       [ 3. ,  4. ,  5. ,  1.5,  2. ,  2.5],
       [ 6. ,  7. ,  8. ,  3. ,  3.5,  4. ],
       [ 9. , 10. , 11. ,  4.5,  5. ,  5.5]])

In [303]:
r + 3  

array([[ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [134]:
2 * r  

array([[ 0,  2,  4],
       [ 6,  8, 10],
       [12, 14, 16],
       [18, 20, 22]])

In [135]:
2 * r + 3  

array([[ 3,  5,  7],
       [ 9, 11, 13],
       [15, 17, 19],
       [21, 23, 25]])

In [309]:
r

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [310]:
r.shape

(4, 3)

In [311]:
s = np.arange(0, 12, 4)  
s  

array([0, 4, 8])

In [312]:
r + s  

array([[ 0,  5, 10],
       [ 3,  8, 13],
       [ 6, 11, 16],
       [ 9, 14, 19]])

In [313]:
s = np.arange(0, 12, 3)  
s  

array([0, 3, 6, 9])

In [314]:
# causes intentional error
# r + s  

In [315]:
r.transpose() + s  

array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20]])

In [316]:
sr = s.reshape(-1, 1)  
sr

array([[0],
       [3],
       [6],
       [9]])

In [317]:
sr.shape  

(4, 1)

In [318]:
r + s.reshape(-1, 1)  

array([[ 0,  1,  2],
       [ 6,  7,  8],
       [12, 13, 14],
       [18, 19, 20]])

In [319]:
def f(x):
    return 3 * x + 5  

In [320]:
f(0.5)  

6.5

In [321]:
f(r)  

array([[ 5,  8, 11],
       [14, 17, 20],
       [23, 26, 29],
       [32, 35, 38]])

## Memory Layout

Cf. http://eli.thegreenplace.net/2015/memory-layout-of-multi-dimensional-arrays/

In [323]:
x = np.random.standard_normal((1000000, 5))  
x

array([[ 0.88856452,  0.51274549,  0.92123913,  0.79005258, -1.53495154],
       [ 0.84872973,  0.69921004, -0.53638366,  0.05992931, -0.87694289],
       [ 1.22574041, -0.00375774,  0.5185445 ,  0.27948334,  0.19083482],
       ...,
       [-0.31248747, -0.89688893, -0.7838734 ,  1.30445266, -0.36016444],
       [ 0.14502176, -0.69081802, -1.72543806,  1.46816233,  1.39259037],
       [ 0.86040368,  1.12537485, -0.90881135, -1.60113848, -0.61654879]])

In [324]:
y = 2 * x + 3  

In [326]:
C = np.array((x, y), order='C')  
C

array([[[ 8.88564522e-01,  5.12745494e-01,  9.21239132e-01,
          7.90052584e-01, -1.53495154e+00],
        [ 8.48729726e-01,  6.99210040e-01, -5.36383657e-01,
          5.99293084e-02, -8.76942888e-01],
        [ 1.22574041e+00, -3.75773762e-03,  5.18544500e-01,
          2.79483337e-01,  1.90834817e-01],
        ...,
        [-3.12487473e-01, -8.96888934e-01, -7.83873397e-01,
          1.30445266e+00, -3.60164441e-01],
        [ 1.45021765e-01, -6.90818025e-01, -1.72543806e+00,
          1.46816233e+00,  1.39259037e+00],
        [ 8.60403677e-01,  1.12537485e+00, -9.08811348e-01,
         -1.60113848e+00, -6.16548789e-01]],

       [[ 4.77712904e+00,  4.02549099e+00,  4.84247826e+00,
          4.58010517e+00, -6.99030746e-02],
        [ 4.69745945e+00,  4.39842008e+00,  1.92723269e+00,
          3.11985862e+00,  1.24611422e+00],
        [ 5.45148083e+00,  2.99248452e+00,  4.03708900e+00,
          3.55896667e+00,  3.38166963e+00],
        ...,
        [ 2.37502505e+00,  1.2062221

In [330]:
F = np.array((x, y), order='F')  

In [333]:
x = 0.0; y = 0.0  

In [334]:
C[:2].round(2)  

array([[[ 0.89,  0.51,  0.92,  0.79, -1.53],
        [ 0.85,  0.7 , -0.54,  0.06, -0.88],
        [ 1.23, -0.  ,  0.52,  0.28,  0.19],
        ...,
        [-0.31, -0.9 , -0.78,  1.3 , -0.36],
        [ 0.15, -0.69, -1.73,  1.47,  1.39],
        [ 0.86,  1.13, -0.91, -1.6 , -0.62]],

       [[ 4.78,  4.03,  4.84,  4.58, -0.07],
        [ 4.7 ,  4.4 ,  1.93,  3.12,  1.25],
        [ 5.45,  2.99,  4.04,  3.56,  3.38],
        ...,
        [ 2.38,  1.21,  1.43,  5.61,  2.28],
        [ 3.29,  1.62, -0.45,  5.94,  5.79],
        [ 4.72,  5.25,  1.18, -0.2 ,  1.77]]])

In [155]:
%timeit C.sum()  

4.39 ms ± 58.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [156]:
%timeit F.sum()  

4.27 ms ± 52.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [157]:
%timeit C.sum(axis=0)  

16.8 ms ± 2.99 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [158]:
%timeit C.sum(axis=1)  

33.2 ms ± 437 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [159]:
%timeit F.sum(axis=0)  

75.1 ms ± 2.69 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [160]:
%timeit F.sum(axis=1)  

77.9 ms ± 2.19 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [161]:
F = 0.0; C = 0.0  

<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:training@tpq.io">training@tpq.io</a>