# Arrays and Vectorized Computation

In [3]:
import numpy as np

## The Numpy ndarray: a multidimensional Array Object

In [4]:
data = np.random.randn(2, 3)

data

array([[ 0.45996374, -0.16796351, -1.02037328],
       [-0.69594963,  0.63766283, -0.52513676]])

Then write mathematical operations with data:

In [5]:
print(data + data)

print(data * 10)


[[ 0.91992747 -0.33592701 -2.04074656]
 [-1.39189927  1.27532566 -1.05027352]]
[[  4.59963736  -1.67963506 -10.20373281]
 [ -6.95949633   6.3766283   -5.25136761]]


Every array has a *shape*, a tuple indicating the size of each dimension; a *dtype*, an object describing the data type of the array.

In [6]:
print(data.shape)

data.dtype

(2, 3)


dtype('float64')

## Creating ndarrays

The easiest way to create an array is to use the **array** function, it accepts any sequence-like object and produces a new Numpy array contains the passed data.

In [7]:
data1 = [6, 7.5, 8, 0, 1]

arr1 = np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [8]:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
arr2
print(arr2.ndim)
print(arr2.shape)

2
(2, 4)


There are other ways to create Numpy array - **zeros** and **ones**.
To create a multidimensional array, pass tuple for the shape.

In [9]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [10]:
np.zeros((2,6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [11]:
np.empty((2, 3, 2)) 
# it's not safe to assume the np.empty will return array of zeros, 
# it returns uninitialized memory and might contain non-zero garbage values

array([[[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]]])

**arange** is an array-valued version of the build-in Python *range* function:

In [12]:
np.arange(5)

array([0, 1, 2, 3, 4])

Other array creation functions

In [13]:
np.array((2, 3)) # convert input to ndarray

array([2, 3])

In [14]:
np.asarray(1) # convert input to ndarray, but don't copy if the input is already a ndarray

array(1)

In [15]:
np.ones((1, 3)) # produce a ndarray of all 1s with the provided shape 

array([[1., 1., 1.]])

In [16]:
np.ones_like(np.arange(5)) # produce a ndarray of all 1s, and the shape is the same as input

array([1, 1, 1, 1, 1])

In [17]:
np.full((2, 3), 11)
np.full_like(np.arange(5), 11)

array([11, 11, 11, 11, 11])

In [18]:
np.eye(3) # create a square N * N itentiy matrix

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [19]:
np.identity(3) # same as eye

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

## Arithmetic with Numpy Arrays (Vectorization)

In [20]:
arr = np.array([[1, 2, 3], [4, 5, 6]])

print(arr*arr)
print(arr+arr)
print(arr-arr)

[[ 1  4  9]
 [16 25 36]]
[[ 2  4  6]
 [ 8 10 12]]
[[0 0 0]
 [0 0 0]]


Work with scalars:

In [21]:
print(1/arr)
print(arr ** 2)


[[1.         0.5        0.33333333]
 [0.25       0.2        0.16666667]]
[[ 1  4  9]
 [16 25 36]]


## Basic Indexing and Slicing

One-dimensional arrays are simple:

In [22]:
arr = np.arange(10)
print(arr[5])

print(arr[5:8])

arr[5:8] = 12
print(arr)

arr_slice = arr[5:8]
arr_slice[:] = 64

print('arr: ', arr) # change of the slice will also change the original array values
print('arr_slice: ', arr_slice)

5
[5 6 7]
[ 0  1  2  3  4 12 12 12  8  9]
arr:  [ 0  1  2  3  4 64 64 64  8  9]
arr_slice:  [64 64 64]


With higher dimensional arrays, you have more options.

In [23]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[2]

array([7, 8, 9])

In [24]:
print(arr2d[2][2])
print(arr2d[2, 2])

9
9


In multidimensional arrays, if you omit the later indices, the returned objected will be a lower dimensional ndarray consisting of all the data along the higher dimensions.

In [25]:
arr3d = np.array([[[1,2,3], [4,5,6]], [[7,8,9], [10, 11, 12]]])
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [26]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [27]:
old_values = arr3d[0].copy()

# assign scalar value to a array
arr3d[0] = 64
arr3d

array([[[64, 64, 64],
        [64, 64, 64]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [28]:
arr3d[0] = old_values
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [29]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [30]:
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

In [31]:
arr2d[1, :2]

array([4, 5])

In [32]:
# the third column but only the first two rows
arr2d[:2, 2]

array([3, 6])

In [33]:
# slice only hiegher dimensional axes
arr2d[:, :1]

array([[1],
       [4],
       [7]])

## Boolean Indexing

In [34]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])

data = np.random.randn(7, 4)
data

array([[ 1.50131853e+00, -1.12490070e+00, -1.70362433e+00,
        -1.43187829e-03],
       [-6.89257675e-01,  1.84104465e+00,  6.75925945e-01,
        -1.18901459e+00],
       [ 7.03464617e-01, -4.66442479e-01, -8.13374236e-01,
        -4.72077783e-01],
       [ 4.80373275e-02, -7.76589196e-01, -1.87979159e-01,
        -4.28504853e-01],
       [ 2.78891111e-01, -2.48707258e-01, -9.12340494e-01,
        -7.55059851e-02],
       [ 1.02167062e+00,  3.61987286e-01,  6.47320568e-01,
         8.93650992e-02],
       [-4.88822072e-01,  4.72883920e-01, -6.29989751e-01,
        -4.56234721e-01]])

If we want to select all the rows with name 'Bob'

In [35]:
names == 'Bob'

data[names == 'Bob'] # the first and 4th row

array([[ 1.50131853e+00, -1.12490070e+00, -1.70362433e+00,
        -1.43187829e-03],
       [ 4.80373275e-02, -7.76589196e-01, -1.87979159e-01,
        -4.28504853e-01]])

Select everthing except 'Bob'

In [36]:
data[~(names == 'Bob')] # other rows

array([[-0.68925767,  1.84104465,  0.67592595, -1.18901459],
       [ 0.70346462, -0.46644248, -0.81337424, -0.47207778],
       [ 0.27889111, -0.24870726, -0.91234049, -0.07550599],
       [ 1.02167062,  0.36198729,  0.64732057,  0.0893651 ],
       [-0.48882207,  0.47288392, -0.62998975, -0.45623472]])

## Fancy Indexing

We can specify the desired order:

In [37]:
arr = np.empty((8, 4))
for i in range(8):
    arr[i] = i

arr[[4, 3, 0, 6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

Using the negative indices, selects rows from the end:

In [38]:
arr[[-3, -5, -7]]

array([[5., 5., 5., 5.],
       [3., 3., 3., 3.],
       [1., 1., 1., 1.]])

Passing multiple index arrays does something diffent.

No matther of how many dimensions of array is passed, the result of fancy indexing with multiple integer arrays is always one-dimensional.

In [39]:
arr = np.arange(32).reshape((8, 4))

print('arr: ', arr)

arr[[1, 5, 7, 2], [0, 3, 1, 2]] # return (1, 0), (5, 3), (7, 1), (2, 2)

arr:  [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]]


array([ 4, 23, 29, 10])

In [40]:
arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]] # rectangular region formed by selecting of a subset of the matrix's rows and columns

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

## Transposing and Swapping Axes


In [41]:
arr = np.arange(15).reshape((3, 5))
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

The inner product of a matrix:

In [42]:
arr = np.random.randn(6, 3)

arr.T.dot(arr)

array([[ 8.04950286,  1.30030947,  1.10502496],
       [ 1.30030947,  1.8886346 , -1.00356211],
       [ 1.10502496, -1.00356211,  4.38575261]])

For higher dimensional arrays, transpose will accept a tuple of axis numbers to permute the axes:

In [45]:
arr = np.arange(16).reshape((2, 2, 4))
print(arr)

arr.transpose((1, 0, 2)) # the axes are reordered with the second axis first, the first axis second, and the last unchanged

[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]]


array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

### swapaxes function

.T is a special case of swapping axes.

In [47]:
arr.swapaxes(1, 2)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

## Universal Functions: Fast Elment-wise Array Functions
A *ufunc* is a function that perform element-wise operations on all data in a ndarrays.

In [53]:
arr = np.arange(10)

print(np.sqrt(arr))

print(np.exp(arr))

[0.         1.         1.41421356 1.73205081 2.         2.23606798
 2.44948974 2.64575131 2.82842712 3.        ]
[1.00000000e+00 2.71828183e+00 7.38905610e+00 2.00855369e+01
 5.45981500e+01 1.48413159e+02 4.03428793e+02 1.09663316e+03
 2.98095799e+03 8.10308393e+03]


In [52]:
x = np.random.randn(8)
y = np.random.randn(8)
print("x: ", x)
print("y: ", y)

print("np.maximum: ", np.maximum(x, y))

x:  [-0.26570779 -0.26371088  0.68686184 -0.52609841 -0.24222426 -0.86322635
  0.21120893  0.62800258]
y:  [ 0.46495894  0.31932212  1.27071797 -0.47231706  0.88867521 -0.8089495
 -0.05069973  0.38988494]
np.maximum:  [ 0.46495894  0.31932212  1.27071797 -0.47231706  0.88867521 -0.8089495
  0.21120893  0.62800258]


In [55]:
arr = np.random.randn(7) * 5
print("arr: ", arr)
remainder, whole_part = np.modf(arr)

print("remainder: ", remainder)
print("whole_part: ", whole_part)

arr:  [-2.38819465  0.08207963 -0.50617598 -2.95507123  1.29799226  4.15605928
  2.28935057]
remainder:  [-0.38819465  0.08207963 -0.50617598 -0.95507123  0.29799226  0.15605928
  0.28935057]
whole_part:  [-2.  0. -0. -2.  1.  4.  2.]
