In [2]:
import numpy as np

# Numpy


*   Numpy array is a **grid of values**, **same type**, index by a **tuple of nonnegative integers**
*   Only supports numeric data types np.int8, np.int32, np.float32, np.half(float 16), etc..<br>
* Number of dimensions is **rank** of array.
  - array of 2x3x4 has **rank 3**
* Shape of an array is a **tuple of integers** giving the **size** of the array along each dimension

## Caution!
* Python arrays and Numpy arrays are different objects!
  - type only works with Python arrays, *shape* for numpy

In [3]:
a = np.array([[1, 2, 3], [2, 3, 4]])
print(type(a), a.shape, a[0][0])
print(a)

<class 'numpy.ndarray'> (2, 3) 1
[[1 2 3]
 [2 3 4]]


In [9]:
# Shape is your hammer. Good tool to debug np array
print(a.shape)

(2, 3)


In [10]:
a = np.zeros((2,2))
print(a)

[[0. 0.]
 [0. 0.]]


In [3]:
b = np.ones((1,2,5,6,7))
print(b.shape)

(1, 2, 5, 6, 7)


In [4]:
c = np.full((2,2), 7)
print(c)

[[7 7]
 [7 7]]


In [5]:
d = np.eye(3)
print(d)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [6]:
e = np.random.random((2,2)) * 100 # create an array filled with random values
print(e)

[[55.79767934 17.32041205]
 [96.71761295 88.50905637]]


# Arrays
* Slicing: similar to python lists, numpy arrays can be sliced
* since arrays may be multidimensional, you must specify a slice for each dimension of the array
* Example matrix of **shape** (3,4) (rows by 4 columns) and **rank 2**<br>

In [5]:
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
# use slicing to pull out the subarray consisting of the first 2 rows and columns 1 and 2
b = a[:2, 1:3]
print(b)

[[2 3]
 [6 7]]


A **slice of an array is a view into the same data**, so modifying it will modify the original array

In [12]:
b[0,0] = 77
print(a)

[[ 1 77  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [4]:
# How can we make a copy(not reference) of an array
c = np.copy(a)
c[0,0] = 100
print(c) # c has the changes but a does not
print(a)

[[100   2   3]
 [  2   3   4]]
[[1 2 3]
 [2 3 4]]


In [6]:
# mix integer indexing with slice indexing
row_r1 = a[1, :] # Rank 1 view of the second row of a
row_r2 = a[1:2, :] # Rank 2 view of the second row of a
row_r3 = a[[1], :] # Rank 2 view of the second row of a
print(row_r1, row_r1.shape)
print(row_r2, row_r2.shape)
print(row_r3, row_r3.shape)

[5 6 7 8] (4,)
[[5 6 7 8]] (1, 4)
[[5 6 7 8]] (1, 4)


## Integer array indexing
- When you index into numpy arrays using slicing, resulting array view will always be a subarray of original array.
- In contrast, integer array indexing allows you to construct arbitrary arrays using the data from another array.

In [7]:
# we can make the same distinction when accessing columns of an array:
col_r1 = a[:, 1]
col_r2 = a[:, 1:2]
print(col_r1, col_r1.shape)
print(col_r2, col_r2.shape)

[2 3] (2,)
[[2]
 [3]] (2, 1)


In [6]:
# an example of integer array indexing
# returned array will have shape (2,)
print (' => ')
print(a[[0,1], [0, 1]]) # first argument is rows and second is columns (r, c) -> (0,0) and 1, 1

 => 
[1 3]


In [9]:
a = np.array([[1,2], [3,4], [5,6]])

In [10]:
# returned array will have shape(3,)
print( " => ")
print( a[[0,1,2], [0,1,0]])

 => 
[1 4 5]


In [12]:
# above example is equiv to this
print(np.array([a[0,0], a[1,1], a[2,0]]))

[1 4 5]


In [14]:
# when using integer array indexing, you can reuse the same
# element from the source array
print(a[[0,0], [1,1]])

# equivalent to previous integer array indexing example
print(np.array([a[0,1], a[0,1]]))

[2 2]
[2 2]


- One useful trick with integer array indexing is **selecting** or mutating **one element** from each row of a matrix

In [15]:
# create a new array from which we will select elements
a = np.array([[1,2,3], [4,5,6], [7,8,9], [10,11,12]])
print(a)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [16]:
# create an array of indices
b = np.array([0,2,0,1])
print(b)

[0 2 0 1]


In [18]:
print(np.arange(4))
# select one element from each row of a using the indices in b
print(a[np.arange(4), b]) # prints [1 6 7 11]

[0 1 2 3]
[ 1  6  7 11]


In [19]:
# mutate one element from each row of a using the indices in b
a[np.arange(4), b] += 10
print(a)

[[11  2  3]
 [ 4  5 16]
 [17  8  9]
 [10 21 12]]


## Boolean indexing
- pick out arbitrary elements of an array
- typically used to select elemetns of array that satisfy some condition

In [20]:
bool_idx = (a > 2)
print(bool_idx)

[[ True False  True]
 [ True  True  True]
 [ True  True  True]
 [ True  True  True]]


In [21]:
# we use boolean array indexing to construct a "rank 1 array"
# consisting of element of a corresponding to the True values
# of bool_idx
print(a[bool_idx])

[11  3  4  5 16 17  8  9 10 21 12]


In [22]:
# can do all of the above in a single conscise statement
print(a[a >2])

[11  3  4  5 16 17  8  9 10 21 12]


## Datatypes
- Every numpy array is a grid of elemetns of the **same type**
- Provides a large set of numeric datatypes that you can use to construct arrays
- Numpy tries to guess a datatype when you create an array
  - functions that construct arrays usually also include optional argument to explicitly specify the datatype

In [23]:
x = np.array([1,2])
y = np.array([1.0,2.0])
z = np.array([1,2], dtype=np.int64)

print(x.dtype, y.dtype, z.dtype)

int64 float64 int64


## Array math
- basic math funcs operate elementwise on arrays
- available as both ooperator overloads, and funcs in numpy module

In [24]:
x = np.array([[1,2],[3,4]], dtype=np.float64)
y = np.array([[5,6],[7,8]], dtype=np.float64)

# elementwise sum; both produce the array
print(x + y) # operator overload
print(np.add(x,y)) # add available as func

[[ 6.  8.]
 [10. 12.]]
[[ 6.  8.]
 [10. 12.]]


In [25]:
# elementwise difference; both produce the array
print(x-y)
print(np.subtract(x,y))

[[-4. -4.]
 [-4. -4.]]
[[-4. -4.]
 [-4. -4.]]


In [26]:
# elementwise product; both produce the array
print(x * y)
print(np.multiply(x,y))

[[ 5. 12.]
 [21. 32.]]
[[ 5. 12.]
 [21. 32.]]


In [28]:
# elementwise division
print(x/y)
print(np.divide(x,y))

[[0.2        0.33333333]
 [0.42857143 0.5       ]]
[[0.2        0.33333333]
 [0.42857143 0.5       ]]


In [29]:
# elementwise square root
print(np.sqrt(x))

[[1.         1.41421356]
 [1.73205081 2.        ]]


## elementwise multiplicatoin vs dot product
- `*` is elementwise multiplication, **not matrix multiplication**
- use the *dot* function to computer inner products of vectors
- *dot* is available both as a func in numpy module and as an instance method of array objects

In [30]:
v = np.array([9,10])
w = np.array([11,12])

print(v.dot(w))
print(np.dot(v,w))

219
219


In [31]:
x = np.array([[1,2], [3,4]])
# matrix / vector product; both produce rank 1 array
print(x.dot(v))
print(np.dot(x, v))

[29 67]
[29 67]


In [33]:
y = np.array([[5,6], [7,8]])
# matrix / matrix product; both produce rank 2 array
print(x * y)
print(np.dot(x,y))

[[ 5 12]
 [21 32]]
[[19 22]
 [43 50]]


In [36]:
# Np provides many useful functions for performing computations on arrays; one of the most useful is sum
x = np.array([[1,2], [3,4]])
print("x = ")
print(x)
print("sum = ")
print(np.sum(x)) # compute sum of all elements
print("sum along axis 0 = ")
print(np.sum(x, axis=0)) # compute sum of each column
print("sum along axis 1 =")
print(np.sum(x, axis=1)) # compute sum of each row

x = 
[[1 2]
 [3 4]]
sum = 
10
sum along axis 0 = 
[4 6]
sum along axis 1 =
[3 7]


- To transpose a matrix, simply use the T attribute of an array object

In [37]:
print(x)
print(x.T)

[[1 2]
 [3 4]]
[[1 3]
 [2 4]]


- Numpy arrays can also be reshaped using `.reshape()`

In [40]:
print(x.shape)

y = np.reshape(x, (1,4))
print(y.shape)
print(y)

(2, 2)
(1, 4)
[[1 2 3 4]]


## Broadcasting
- powerful mechanism **allows numpy to work with arrays of different shapes** when performing arithmetic operations
- Frequently we have a **smaller array and a larger array**, we want to use the **smaller array multiple times to perform some operation on larger array**

In [42]:
# we will add the vector v to each row of the matrix x,
# storing result in matrix y
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10,11,12]])
v = np.array([1,0,1])
y = np.empty_like(x) # create empty matrix with same shape as x
print("x : ", x)
print("v : ", v)

x :  [[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
v :  [1 0 1]


We would like to add `v` to `x`. Here is naive way of doing it (**method 1**)

In [43]:
for i in range(4):
  y[i, :] = x[i, :] + v

print(y)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


- Above example works however when x is large the explicit for loop can be **very slow**

In [46]:
vv = np.tile(v, (4,1))
print(vv)

[[1 0 1]
 [1 0 1]
 [1 0 1]
 [1 0 1]]


In [47]:
y = x + vv # add x and vv elementwise
print(y)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


- Numpy broadcasting allows us to perform this computation **without actually creating multiple copies of v**
- Consider this verison using **broadcasting** **(method 3)**

In [50]:
# we will add the vector v to each row of the matrix x
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10,11,12]])
v = np.array([1,0,1])
y = x + v # add v to each row of x using broadcasting

print(x)
print('')
print(v)
print('')
print(y)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]

[1 0 1]

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


## Broadcasting
- term "broadcasting" describe how numpy treats arrays with different shapes during arithmetic operations
- smaller array is "braodcast" across the larger array so that they have compatible shapes
- simplest broadcasting occurs when an array and a scalar value are combined in an operation

In [51]:
a = np.array([1.0, 2.0, 3.0])
b = 2.0
print(a*b)

[2. 4. 6.]


## Explanation
- we can think of scalar b being stretched **during arithmetic operation** in an array with same shape as a
- new elements in b are simple copies of original scalar
- **stretching analogy is only conceptual**
  - NumPy is smart enough to use the original scalar value without actually making copies, so that broadcasting operations are as **memory and computationally efficient** as possible

In [53]:
x = np.arange(4)
y = np.ones(5)
print(x, x.shape)
print(y, y.shape)

[0 1 2 3] (4,)
[1. 1. 1. 1. 1.] (5,)


In [54]:
# What will be output? Error - trailing dim does not match
print((x + y).shape)

ValueError: operands could not be broadcast together with shapes (4,) (5,) 

In [55]:
xx = x.reshape(4,1)
print(xx, xx.shape)
print(y, y.shape)

[[0]
 [1]
 [2]
 [3]] (4, 1)
[1. 1. 1. 1. 1.] (5,)


In [57]:
# what will be output? (4,5)
print((xx + y).shape)

(4, 5)


# Outer Product using Broadcasting
- broadcasting provides convenient way of taking the outer product (or any other outer operation) of two arrays
- following example shows an **outer product operation** of two 1-d arrays:

In [58]:
a = np.array([0.0, 10.0, 20.0, 30.0])
b = np.array([1.0, 2.0, 3.0])

print(a, a.shape)
print(b, b.shape)

[ 0. 10. 20. 30.] (4,)
[1. 2. 3.] (3,)


In [59]:
# will produce error due to shapes
print( a * b)

ValueError: operands could not be broadcast together with shapes (4,) (3,) 

In [60]:
# here the newaxis index operator inserts a new axis into a, making it a 2-d 4x1 array
print(a * b.reshape(3,1))

[[ 0. 10. 20. 30.]
 [ 0. 20. 40. 60.]
 [ 0. 30. 60. 90.]]
