## This is an example for using NumPy and SciPy for solving problems with Python


First, we should import the NumPy library

In [8]:
import numpy as np

#### Creating numpy arrays
Let's create some 1D arrays

In [11]:
# We can create it using a list of elements
a = np.array([1, 2, 3, 4])

# Or using predefined functions
b = np.ones(4) + 1

Here are some of the properties of `a`

In [33]:
print(a.ndim)

print(a.shape)

print(len(a))

1
(4,)
4


Let's try with a 2D array.

In [44]:
c = np.ones((3, 3))
print(c.ndim)
print(c.shape)

2
(3, 3)


#### Indexing and Slicing

In [50]:
d = np.arange(10)
d

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [51]:
d[1]

1

In [52]:
d[0]

0

In [53]:
d[-1]

9

Remember, indexes start on 0 and end with length-1

In [58]:
d[3:]

array([3, 4, 5, 6, 7, 8, 9])

In [59]:
d[:3]

array([0, 1, 2])

In [54]:
d[0::2]

array([0, 2, 4, 6, 8])

In [56]:
d[0:8:2]

array([0, 2, 4, 6])

In [57]:
d[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [62]:
d > 5

array([False, False, False, False, False, False,  True,  True,  True,  True], dtype=bool)

In [60]:
d[d > 5]

array([6, 7, 8, 9])

In [61]:
d[d]

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

##### Operations on arrays

In [66]:
a

array([1, 2, 3, 4])

In [16]:
a + 1

array([2, 3, 4, 5])

In [17]:
2**a

array([ 2,  4,  8, 16], dtype=int32)

In [18]:
a - b

array([-1.,  0.,  1.,  2.])

In [19]:
a * b

array([ 2.,  4.,  6.,  8.])

In [63]:
j = np.arange(5)
print(j)
2**(j + 1) - j

[0 1 2 3 4]


array([ 2,  3,  6, 13, 28])

In [65]:
a.sum()

10

In [81]:
a + np.array([1, 2])  

ValueError: operands could not be broadcast together with shapes (4,) (2,) 

This was a shape mismatch. When shapes are different, arrays will try to broadcast.

In [72]:
c = np.arange(10).reshape((2,5))
c

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [73]:
c.sum()

45

In [74]:
c.sum(axis=0)

array([ 5,  7,  9, 11, 13])

In [75]:
c.sum(axis=1)

array([10, 35])

In [76]:
c.T

array([[0, 5],
       [1, 6],
       [2, 7],
       [3, 8],
       [4, 9]])

In [79]:
c[:,np.newaxis,:]

array([[[0, 1, 2, 3, 4]],

       [[5, 6, 7, 8, 9]]])

##### What does the operator * (asterisk) do for 2D arrays?

In [39]:
c * c 

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

Remember that we need to use dot() for matrix multiplication

In [40]:
c.dot(c)

array([[ 3.,  3.,  3.],
       [ 3.,  3.,  3.],
       [ 3.,  3.,  3.]])

#### 1D array vs. vector

In [131]:
a = np.arange(3)
a

array([0, 1, 2])

In [136]:
a.T

array([0, 1, 2])

In [137]:
I = np.eye(3)
I

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [138]:
I.dot(a)

array([ 0.,  1.,  2.])

In [139]:
a.dot(I)

array([ 0.,  1.,  2.])

In [142]:
b = np.random.randn(3,1)
b

array([[-0.11030641],
       [ 0.87307364],
       [ 0.7007051 ]])

In [143]:
I.dot(b)

array([[-0.11030641],
       [ 0.87307364],
       [ 0.7007051 ]])

In [144]:
b.dot(I)

ValueError: shapes (3,1) and (3,3) not aligned: 1 (dim 1) != 3 (dim 0)

In [145]:
b.T.dot(I)

array([[-0.11030641,  0.87307364,  0.7007051 ]])

#### Broadcasting

In [82]:
f = np.tile(np.arange(0, 40, 10), (3, 1)).T
f

array([[ 0,  0,  0],
       [10, 10, 10],
       [20, 20, 20],
       [30, 30, 30]])

In [83]:
g = np.array([0, 1, 2])
f + g

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

Achieving the same only with broadcasting:

In [84]:
f = np.arange(0, 40, 10)
f.shape

(4,)

In [85]:
f = f[:, np.newaxis]  # adds a new axis -> 2D array
f.shape

(4, 1)

In [86]:
f

array([[ 0],
       [10],
       [20],
       [30]])

In [87]:
f + g

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

#### Copies and Views

In [151]:
A = np.arange(6).reshape(2,3)
A

array([[0, 1, 2],
       [3, 4, 5]])

In [152]:
B = A.T
B

array([[0, 3],
       [1, 4],
       [2, 5]])

In [153]:
B[1,0] = 10
B

array([[ 0,  3],
       [10,  4],
       [ 2,  5]])

In [154]:
A

array([[ 0, 10,  2],
       [ 3,  4,  5]])

B is a view of A. They share the data.

In [155]:
c = A[0]
c

array([ 0, 10,  2])

In [156]:
c[0] = -1
A

array([[-1, 10,  2],
       [ 3,  4,  5]])

c is a view of row 0 of A.

In [120]:
def identity(x):
    return x

A = np.arange(6).reshape(2,3)

In [121]:
B = identity(A)

In [122]:
B[1,1] = -1

In [123]:
A

array([[ 0,  1,  2],
       [ 3, -1,  5]])

This happens because B is a reference of A:

A was passed by reference to identity() and parameter x was returned (also by reference) to the function caller. Then that reference was assigned to B.


In [126]:
def identity_copy(x):
    return x.copy()

A = np.arange(6).reshape(2,3)

In [127]:
B = identity_copy(A)

In [130]:
B[1,1] = -1
B

array([[ 0,  1,  2],
       [ 3, -1,  5]])

In [129]:
A

array([[0, 1, 2],
       [3, 4, 5]])

What is the difference between A = A + 1 and A += 1?

In [96]:
A = np.arange(6).reshape(2,3)
A

array([[0, 1, 2],
       [3, 4, 5]])

In [97]:
A+= 1
print(A)
print(id(A))

[[1 2 3]
 [4 5 6]]
625349276176


In [98]:
A = A + 1
print(A)
print(id(A))

[[2 3 4]
 [5 6 7]]
625349276016


## SciPy example

In [208]:
from scipy import linalg 
help (linalg.svd)

Help on function svd in module scipy.linalg.decomp_svd:

svd(a, full_matrices=True, compute_uv=True, overwrite_a=False, check_finite=True)
    Singular Value Decomposition.
    
    Factorizes the matrix a into two unitary matrices U and Vh, and
    a 1-D array s of singular values (real, non-negative) such that
    ``a == U*S*Vh``, where S is a suitably shaped matrix of zeros with
    main diagonal s.
    
    Parameters
    ----------
    a : (M, N) array_like
        Matrix to decompose.
    full_matrices : bool, optional
        If True, `U` and `Vh` are of shape ``(M,M)``, ``(N,N)``.
        If False, the shapes are ``(M,K)`` and ``(K,N)``, where
        ``K = min(M,N)``.
    compute_uv : bool, optional
        Whether to compute also `U` and `Vh` in addition to `s`.
        Default is True.
    overwrite_a : bool, optional
        Whether to overwrite `a`; may improve performance.
        Default is False.
    check_finite : bool, optional
        Whether to check that the input 

In [195]:
A = np.random.rand(5,3)
A

array([[ 0.25823086,  0.16917368,  0.49325439],
       [ 0.73497792,  0.54397349,  0.91018629],
       [ 0.4874843 ,  0.73017076,  0.27941487],
       [ 0.12536515,  0.59450122,  0.34390925],
       [ 0.22977848,  0.82709154,  0.35280893]])

In [212]:
U, S, V = linalg.svd(A,full_matrices=False)
U.dot(np.diag(S)).dot(V) - A

array([[ -5.55111512e-17,  -1.11022302e-16,  -2.77555756e-16],
       [ -1.11022302e-16,   0.00000000e+00,  -2.22044605e-16],
       [  0.00000000e+00,  -3.33066907e-16,  -3.33066907e-16],
       [  5.55111512e-17,  -1.11022302e-16,  -1.11022302e-16],
       [  5.55111512e-17,  -2.22044605e-16,  -1.11022302e-16]])

Example 2:

In [213]:
from scipy import stats
A.mean(axis=0)

array([ 0.36716734,  0.57298214,  0.47591475])

In [216]:
A.std(axis=0)

array([ 0.21861928,  0.2252313 ,  0.22807241])

In [218]:
B = stats.zscore(A,axis=0)
B

array([[-0.4982931 , -1.79286119,  0.07602694],
       [ 1.68242516, -0.12879492,  1.90409499],
       [ 0.55034925,  0.69789866, -0.86156794],
       [-1.1060424 ,  0.09554215, -0.57878764],
       [-0.62843891,  1.12821531, -0.53976636]])

Let's check what it did:

In [221]:
B.mean(axis=0)

array([  2.22044605e-17,   0.00000000e+00,   4.44089210e-17])

In [222]:
B.std(axis=0)

array([ 1.,  1.,  1.])