# Numpy

The essential problem that NumPy solves is fast array processing. For example, suppose we want to create an array of 1 million random draws from a uniform distribution and compute the mean

If we did this in pure Python it would be orders of magnitude slower than C or Fortran. This is because Loops in Python over Python data types like lists carry significant overhead C and Fortran code contains a lot of type information that can be used for optimization Various optimizations can be carried out during compilation, when the compiler sees the instructions as a whole.


In [1]:
import numpy as np

In [2]:
np.

1

In [2]:
a = np.zeros(3)
a

array([ 0.,  0.,  0.])

In [3]:
type(a)

numpy.ndarray

In [4]:
type(a[0])

numpy.float64

In [6]:
a[0]

0.0

In [5]:
z = np.zeros(10)

In [6]:
z.shape

(10,)

In [9]:
z.shape = (5,2)
z

array([[ 0.,  0.],
       [ 0.,  0.],
       [ 0.,  0.],
       [ 0.,  0.],
       [ 0.,  0.]])

In [10]:
z = np.zeros(4)
z.shape = (2, 2)
z

array([[ 0.,  0.],
       [ 0.,  0.]])

In [11]:
z.ndim

2

In [12]:
z = np.zeros((2, 2))
z

array([[ 0.,  0.],
       [ 0.,  0.]])

In [13]:
z.ndim

2

In [14]:
z = np.empty(3)
z

array([ 0.,  0.,  0.])

In [15]:
z = np.linspace(2, 4, 5)   # 2 to 4 with 5 elements   (Start, end, no.of elelmets )
z

array([ 2. ,  2.5,  3. ,  3.5,  4. ])

In [None]:
max - min/total 

In [16]:
z = np.identity(2)
z

array([[ 1.,  0.],
       [ 0.,  1.]])

In [17]:
z = np.array([10, 20])                 # ndarray from Python list
z

array([10, 20])

In [18]:
z = np.array([[1, 2], [3, 4]])         # 2D array from a list of lists
z

array([[1, 2],
       [3, 4]])

In [19]:
na = np.linspace(10, 20, 2)
na is np.asarray(na)   # Does not copy NumPy arrays

True

## Array Indexing

In [20]:
z = np.linspace(1, 2, 5)

z

array([ 1.  ,  1.25,  1.5 ,  1.75,  2.  ])

In [21]:
z[0]

1.0

In [22]:
z[0:2]  # Two elements, starting at element 0

array([ 1.  ,  1.25])

In [23]:
z[-1]

2.0

In [24]:
z = np.array([[1, 2], [3, 4]])
z

array([[1, 2],
       [3, 4]])

In [26]:
z[0, 0]

1

In [27]:
z[0, 1]

2

In [28]:
z[0,:]    # 1st row

array([1, 2])

In [29]:
z[:,0]  # 1st column

array([1, 3])

In [30]:
z = np.linspace(2, 4, 5)
z

array([ 2. ,  2.5,  3. ,  3.5,  4. ])

In [31]:
indices = np.array((0, 2, 3))
indices

array([0, 2, 3])

In [125]:
z[indices]

array([ 2. ,  3. ,  3.5])

In [32]:
d = np.array([0, 1, 1, 0, 0], dtype=bool)    #int64, float64 
d

array([False,  True,  True, False, False], dtype=bool)

In [33]:
z[d]

array([ 2.5,  3. ])

In [34]:
z = np.empty(3)
z

array([ 0.,  0.,  0.])

In [35]:
z[:] = 42
z

array([ 42.,  42.,  42.])

In [36]:
M = np.array([[1, 2], [3, 4]], dtype=complex)

M

array([[ 1.+0.j,  2.+0.j],
       [ 3.+0.j,  4.+0.j]])

In [37]:
# create a range
# for(i=0, i<=10, I++2)
x = np.arange(0, 10, 2) # arguments: start, stop, step

x

array([0, 2, 4, 6, 8])

In [38]:
x = np.arange(-1, 1, 0.1)

x

array([ -1.00000000e+00,  -9.00000000e-01,  -8.00000000e-01,
        -7.00000000e-01,  -6.00000000e-01,  -5.00000000e-01,
        -4.00000000e-01,  -3.00000000e-01,  -2.00000000e-01,
        -1.00000000e-01,  -2.22044605e-16,   1.00000000e-01,
         2.00000000e-01,   3.00000000e-01,   4.00000000e-01,
         5.00000000e-01,   6.00000000e-01,   7.00000000e-01,
         8.00000000e-01,   9.00000000e-01])

In [39]:
x, y = np.mgrid[0:5, 0:5]

In [40]:
x

array([[0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2],
       [3, 3, 3, 3, 3],
       [4, 4, 4, 4, 4]])

In [41]:
y

array([[0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4]])

## Array Methods

In [42]:
A = np.array((4, 3, 2, 1))
A

array([4, 3, 2, 1])

In [None]:
A.T

In [43]:
A.sort()              # Sorts A in place
A

array([1, 2, 3, 4])

In [44]:
A.sum()               # Sum

10

In [45]:
A.mean()              # Mean

2.5

In [46]:
A.max()               # Max

4

In [48]:
A.argmax()            # Returns the index of the maximal element

3

In [49]:
A.cumsum()            # Cumulative sum of the elements of A

array([ 1,  3,  6, 10], dtype=int32)

In [50]:
A.cumprod()           # Cumulative product of the elements of A

array([ 1,  2,  6, 24], dtype=int32)

In [51]:
A.var()               # Variance

1.25

In [52]:
A.std()               # Standard deviation


1.1180339887498949

In [53]:
A.shape = (2, 2)
A.T                   # Equivalent to A.transpose()

array([[1, 3],
       [2, 4]])

Another method worth knowing is searchsorted()

If z is a nondecreasing array, then z.searchsorted(a) returns the index of the first element of z that is >= a

In [55]:
z = np.linspace(2, 4, 5)
z                                     # z[1]

array([ 2. ,  2.5,  3. ,  3.5,  4. ])

In [56]:
z.searchsorted(3.2)

3

In [58]:
z.searchsorted(3.5)

3

Many of the methods discussed above have equivalent functions in the NumPy namespace


In [59]:
a = np.array((4, 3, 2, 1))
a

array([4, 3, 2, 1])

In [60]:
np.sum(a)

10

In [61]:
np.mean(a)


2.5

### Operations on Arrays

In [62]:
a = np.array([1, 2, 3, 4])
b = np.array([5, 6, 7, 8])
a + b

array([ 6,  8, 10, 12])

In [63]:
a = np.array([1, 2, 3, 4])
b = np.array([5, 6, 7, 8,9])
a + b

ValueError: operands could not be broadcast together with shapes (4,) (5,) 

In [64]:
a = np.array([1, 2, 3, 4])
b = np.array([5, 6, 7, 8])
a * b

array([ 5, 12, 21, 32])

In [65]:
a + 10

array([11, 12, 13, 14])

In [66]:
a * 10

array([10, 20, 30, 40])

The two dimensional arrays follow the same general rules

In [67]:
A = np.ones((2, 2))
A

array([[ 1.,  1.],
       [ 1.,  1.]])

In [68]:
A = np.ones((2, 2))
B = np.ones((2, 2))
A + B

array([[ 2.,  2.],
       [ 2.,  2.]])

In [69]:
A + 10

array([[ 11.,  11.],
       [ 11.,  11.]])

In [70]:
A * B

array([[ 1.,  1.],
       [ 1.,  1.]])

In particular, A * B is not the matrix product, it is an element-wise product

### Matrix Multiplication

In [71]:
import numpy as np

A = np.ones((2, 2))
B = np.ones((2, 2))
A @ B

array([[ 2.,  2.],
       [ 2.,  2.]])

In [72]:
#We can also use @ to take the inner product of two flat arrays
A = np.array((1, 2))
B = np.array((10, 20))
print(A)
print(B)

[1 2]
[10 20]


In [73]:
A @ B

50

In [74]:
A = np.array(((1, 2), (3, 4)))
A

array([[1, 2],
       [3, 4]])

In [75]:
A.ndim

2

In [76]:
A @ (0, 1)

array([2, 4])

In [78]:
# a diagonal matrix
np.diag([1,2,3])

array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

In [79]:
# diagonal with offset from the main diagonal
a = np.diag([1,2,3], k=1)
a

array([[0, 1, 0, 0],
       [0, 0, 2, 0],
       [0, 0, 0, 3],
       [0, 0, 0, 0]])

In [80]:
np.diag(a)

array([0, 0, 0, 0])

### Mutability and Copying Arrays

In [81]:
a = np.array([42, 44])
a

array([42, 44])

In [82]:
a[-1] = 0  # Change last element to 0
a

array([42,  0])

In [84]:
a = np.random.randn(50)
a

array([  2.91638581e-01,   2.42784345e-01,   2.33710646e+00,
        -6.96044207e-01,   4.60511362e-01,  -6.49202051e-01,
        -8.39553462e-02,   8.68987943e-01,   6.96767834e-02,
        -8.24149508e-01,   3.85390114e-01,  -3.43996345e-01,
         1.20540066e-01,  -1.36572275e+00,  -4.05463903e-04,
         6.69444507e-01,   9.93043264e-01,   3.38248317e-01,
        -8.09202218e-01,  -2.10620280e-01,   1.35700018e+00,
        -2.03065926e-01,   5.04881797e-02,   1.10618688e+00,
         5.46635850e-01,   8.13529136e-01,   1.23276488e-01,
         8.00796699e-02,   1.57495731e+00,   6.04325776e-01,
        -4.03571463e-01,   2.97363024e-01,  -2.86676768e-01,
        -1.15603449e-01,   1.56534739e+00,   9.11298344e-01,
        -1.11753740e+00,   8.94128575e-02,  -1.83734513e-02,
         1.52051754e+00,  -1.93853771e+00,   1.20128300e+00,
         2.85727458e-01,  -1.48051646e+00,  -5.05749660e-01,
         9.43145302e-01,  -4.72462521e-02,   1.55506918e+00,
         4.17501378e-01,

In [78]:
b = a
b[0] = 0.0
a

array([ 0.        , -1.25816816,  0.03333618])

In [79]:
a = np.random.randn(3)
a

array([ 0.37690151, -1.28222838,  0.72245413])

In [85]:
b = np.empty_like(a)  # empty array with same shape as a
np.copyto(b, a)  # copy to b from a
b

array([  2.91638581e-01,   2.42784345e-01,   2.33710646e+00,
        -6.96044207e-01,   4.60511362e-01,  -6.49202051e-01,
        -8.39553462e-02,   8.68987943e-01,   6.96767834e-02,
        -8.24149508e-01,   3.85390114e-01,  -3.43996345e-01,
         1.20540066e-01,  -1.36572275e+00,  -4.05463903e-04,
         6.69444507e-01,   9.93043264e-01,   3.38248317e-01,
        -8.09202218e-01,  -2.10620280e-01,   1.35700018e+00,
        -2.03065926e-01,   5.04881797e-02,   1.10618688e+00,
         5.46635850e-01,   8.13529136e-01,   1.23276488e-01,
         8.00796699e-02,   1.57495731e+00,   6.04325776e-01,
        -4.03571463e-01,   2.97363024e-01,  -2.86676768e-01,
        -1.15603449e-01,   1.56534739e+00,   9.11298344e-01,
        -1.11753740e+00,   8.94128575e-02,  -1.83734513e-02,
         1.52051754e+00,  -1.93853771e+00,   1.20128300e+00,
         2.85727458e-01,  -1.48051646e+00,  -5.05749660e-01,
         9.43145302e-01,  -4.72462521e-02,   1.55506918e+00,
         4.17501378e-01,

In [86]:
b[:] = 1
b

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [87]:
a

array([  2.91638581e-01,   2.42784345e-01,   2.33710646e+00,
        -6.96044207e-01,   4.60511362e-01,  -6.49202051e-01,
        -8.39553462e-02,   8.68987943e-01,   6.96767834e-02,
        -8.24149508e-01,   3.85390114e-01,  -3.43996345e-01,
         1.20540066e-01,  -1.36572275e+00,  -4.05463903e-04,
         6.69444507e-01,   9.93043264e-01,   3.38248317e-01,
        -8.09202218e-01,  -2.10620280e-01,   1.35700018e+00,
        -2.03065926e-01,   5.04881797e-02,   1.10618688e+00,
         5.46635850e-01,   8.13529136e-01,   1.23276488e-01,
         8.00796699e-02,   1.57495731e+00,   6.04325776e-01,
        -4.03571463e-01,   2.97363024e-01,  -2.86676768e-01,
        -1.15603449e-01,   1.56534739e+00,   9.11298344e-01,
        -1.11753740e+00,   8.94128575e-02,  -1.83734513e-02,
         1.52051754e+00,  -1.93853771e+00,   1.20128300e+00,
         2.85727458e-01,  -1.48051646e+00,  -5.05749660e-01,
         9.43145302e-01,  -4.72462521e-02,   1.55506918e+00,
         4.17501378e-01,

### Vectorized Functions

In [88]:
z = np.array([0, 2, 3])
np.sin(z)

array([ 0.        ,  0.90929743,  0.14112001])

In [90]:
# alternative way
n = len(z)
y = np.empty(n)
for i in range(n):
    y[i] = np.sin(z[i])

In [91]:
y

array([ 0.        ,  0.90929743,  0.14112001])

In [92]:
z

array([0, 2, 3])

In [93]:
(1 / np.sqrt(2 * np.pi)) * np.exp(- 0.5 * z**2)

array([ 0.39894228,  0.05399097,  0.00443185])

In [94]:
import numpy as np

x = np.random.randn(4)
x

array([-0.3125394 ,  0.09168042, -1.61006062, -1.22882829])

In [95]:
np.where(x > 0, 1, 0)  # Insert 1 if x > 0 true, otherwise 0       

array([0, 1, 0, 0])

In [97]:
def f(x): 
    return 1 if x > 0 else 0

f = np.vectorize(f)
f(x)                # Passing the same vector x as in the previous example

array([0, 1, 0, 0])

### Comparisons

In [98]:
z = np.array([2, 3])
y = np.array([2, 3])
z == y

array([ True,  True], dtype=bool)

In [99]:
y[0] = 5
z == y

array([False,  True], dtype=bool)

In [100]:
z != y

array([ True, False], dtype=bool)

In [101]:
z = np.linspace(0, 10, 5)
z

array([  0. ,   2.5,   5. ,   7.5,  10. ])

In [102]:
z > 3

array([False, False,  True,  True,  True], dtype=bool)

In [103]:
b = z > 3
b

array([False, False,  True,  True,  True], dtype=bool)

In [104]:
z[b]

array([  5. ,   7.5,  10. ])

In [105]:
z[z > 3]

array([  5. ,   7.5,  10. ])

In [106]:
z = np.random.randn(10000)  # Generate standard normals
y = np.random.binomial(10, 0.5, size=1000)    # 1,000 draws from Bin(10, 0.5)
y.mean()

4.944

In [197]:
np.random.binomial?

In [108]:
A = np.array([[1, 2], [3, 4]])

In [109]:
np.linalg.inv(A)           # Compute the inverse

array([[-2. ,  1. ],
       [ 1.5, -0.5]])