# Cha4 Numpy Basics: Array and Vectorized Computation

In [1]:
import numpy as np

In [2]:
data = np.array([[2,3,4],[4,5,6]])
data*10

array([[20, 30, 40],
       [40, 50, 60]])

In [3]:
data.shape

(2, 3)

In [4]:
data.dtype

dtype('int32')

## 4.1 ndarray
### 4.1.1 Creating ndarray

In [6]:
data1 = [6,7.5,8,0,1]
arr1 = np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [9]:
data2 = [[1,2,3,4],[5,6,7,8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [10]:
arr2.ndim

2

In [11]:
arr2.shape

(2, 4)

In [12]:
arr2.dtype

dtype('int32')

In [13]:
#create a (2,3) ndarray with 0s
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [14]:
#create a (1,4)ndarray with 1s
np.ones((1,4))

array([[1., 1., 1., 1.]])

In [16]:
#create a (2,3,2) ndarray with random elements. 
np.empty((2,3,2))

array([[[1.46524926e-311, 3.16202013e-322],
        [0.00000000e+000, 0.00000000e+000],
        [1.89146896e-307, 4.31181435e-033]],

       [[1.06144942e-046, 1.46605010e-075],
        [2.23239670e+180, 1.65615171e-047],
        [1.00517899e-070, 2.67897793e-032]]])

In [19]:
#array-valued range
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [21]:
list(range(15))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

In [23]:
#create a square N*N identity matrix
np.eye(2)

array([[1., 0.],
       [0., 1.]])

### 4.1.2 Data Types for ndarrays

In [28]:
data2 = [['1','2'],['3','4']]
arr2 = np.array(data2)
arr2

array([['1', '2'],
       ['3', '4']], dtype='<U1')

In [29]:
arr2.dtype

dtype('<U1')

In [32]:
#use arr.astype() to change data type of the ndarray
arr2 = arr2.astype(np.float16)
arr2

array([[1., 2.],
       [3., 4.]], dtype=float16)

### 4.1.3 Operations Between Arrays and Scalars

In [37]:
1/arr2

array([[1.    , 0.5   ],
       [0.3333, 0.25  ]], dtype=float16)

### 4.1.4 Basic Indexing and Slicing

In [39]:
arr3 = np.arange(10)
arr3

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [40]:
#1-dimensional array slicing: the same as list
arr3[2:5]

array([2, 3, 4])

In [41]:
arr3[8:]=10
arr3

array([ 0,  1,  2,  3,  4,  5,  6,  7, 10, 10])

In [47]:
#array slices are views on the original array. This means that
#the data is not copied, and any modifications to the view will be reflected in the source array
arr_sl = arr3[:3]
arr_sl[:3]=1000
arr3              #changing arr_sl will also change arr3

array([1000, 1000, 1000,    3,    4,    5,    6,    7,   10,   10])

In [49]:
#arr.copy() to really copy the array
arr_s = arr3[:3].copy()
arr_s[:3]=8888
#changing arr_s will not change arr3 because of the copy. 
print(arr_s,arr3)

[8888 8888 8888] [1000 1000 1000    3    4    5    6    7   10   10]


In [50]:
#the same features apply to multidimensional array as well.
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[0]

array([1, 2, 3])

In [51]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [52]:
arr3d[0]=888
arr3d

array([[[888, 888, 888],
        [888, 888, 888]],

       [[  7,   8,   9],
        [ 10,  11,  12]]])

In [53]:
arr3d[1,1]

array([10, 11, 12])

In [54]:
arr3d[1,1,2]

12

In [56]:
arr3d

array([[[888, 888, 888],
        [888, 888, 888]],

       [[  7,   8,   9],
        [ 10,  11,  12]]])

In [58]:
arr3d[1:,1:]

array([[[10, 11, 12]]])

### 4.1.5 Boolean Indexing

In [59]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])

In [62]:
data = np.random.randn(7,4)
data

array([[-1.56992665, -1.00001343, -0.86964957, -0.3993106 ],
       [ 0.2822295 , -1.21674227, -1.42109866, -0.9266151 ],
       [ 0.684471  ,  0.60672401,  1.08628593,  1.22635205],
       [ 0.34954163, -1.37691162, -0.50209029,  0.68999291],
       [ 0.16649509, -1.25837929,  0.0261368 , -0.69970378],
       [-1.25257818, -0.12620997, -2.04252086,  2.0648896 ],
       [-0.12513561, -2.23069525,  1.59048565, -0.08139282]])

In [63]:
data[names=='Bob']

array([[-1.56992665, -1.00001343, -0.86964957, -0.3993106 ],
       [ 0.34954163, -1.37691162, -0.50209029,  0.68999291]])

In [64]:
data[names!='Bob']

array([[ 0.2822295 , -1.21674227, -1.42109866, -0.9266151 ],
       [ 0.684471  ,  0.60672401,  1.08628593,  1.22635205],
       [ 0.16649509, -1.25837929,  0.0261368 , -0.69970378],
       [-1.25257818, -0.12620997, -2.04252086,  2.0648896 ],
       [-0.12513561, -2.23069525,  1.59048565, -0.08139282]])

In [67]:
data[~(names=='Bob')]

array([[ 0.2822295 , -1.21674227, -1.42109866, -0.9266151 ],
       [ 0.684471  ,  0.60672401,  1.08628593,  1.22635205],
       [ 0.16649509, -1.25837929,  0.0261368 , -0.69970378],
       [-1.25257818, -0.12620997, -2.04252086,  2.0648896 ],
       [-0.12513561, -2.23069525,  1.59048565, -0.08139282]])

In [68]:
data[~(names=='Bob'),2:]

array([[-1.42109866, -0.9266151 ],
       [ 1.08628593,  1.22635205],
       [ 0.0261368 , -0.69970378],
       [-2.04252086,  2.0648896 ],
       [ 1.59048565, -0.08139282]])

In [69]:
#and or don't work with boolean arrays, use | & instead
con = (names=='Joe')|(names=='Will')
data[con]

array([[ 0.2822295 , -1.21674227, -1.42109866, -0.9266151 ],
       [ 0.684471  ,  0.60672401,  1.08628593,  1.22635205],
       [ 0.16649509, -1.25837929,  0.0261368 , -0.69970378],
       [-1.25257818, -0.12620997, -2.04252086,  2.0648896 ],
       [-0.12513561, -2.23069525,  1.59048565, -0.08139282]])

In [70]:
data[data<0]

array([-1.56992665, -1.00001343, -0.86964957, -0.3993106 , -1.21674227,
       -1.42109866, -0.9266151 , -1.37691162, -0.50209029, -1.25837929,
       -0.69970378, -1.25257818, -0.12620997, -2.04252086, -0.12513561,
       -2.23069525, -0.08139282])

In [72]:
data[data<0] = 0
data

array([[0.        , 0.        , 0.        , 0.        ],
       [0.2822295 , 0.        , 0.        , 0.        ],
       [0.684471  , 0.60672401, 1.08628593, 1.22635205],
       [0.34954163, 0.        , 0.        , 0.68999291],
       [0.16649509, 0.        , 0.0261368 , 0.        ],
       [0.        , 0.        , 0.        , 2.0648896 ],
       [0.        , 0.        , 1.59048565, 0.        ]])

In [73]:
data[names=='Bob']=8
data

array([[8.        , 8.        , 8.        , 8.        ],
       [0.2822295 , 0.        , 0.        , 0.        ],
       [0.684471  , 0.60672401, 1.08628593, 1.22635205],
       [8.        , 8.        , 8.        , 8.        ],
       [0.16649509, 0.        , 0.0261368 , 0.        ],
       [0.        , 0.        , 0.        , 2.0648896 ],
       [0.        , 0.        , 1.59048565, 0.        ]])

In [74]:
#Selecting data from an array by boolean indexing always creates a copy of the data,even if the returned array is unchanged.
test = data[names=='Bob']
test = 16
data

array([[8.        , 8.        , 8.        , 8.        ],
       [0.2822295 , 0.        , 0.        , 0.        ],
       [0.684471  , 0.60672401, 1.08628593, 1.22635205],
       [8.        , 8.        , 8.        , 8.        ],
       [0.16649509, 0.        , 0.0261368 , 0.        ],
       [0.        , 0.        , 0.        , 2.0648896 ],
       [0.        , 0.        , 1.59048565, 0.        ]])

In [75]:
#the boolean dimension and ndarry dimension have to be the same. 
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(4,4)

In [76]:
data[names=='Joe']

IndexError: boolean index did not match indexed array along dimension 0; dimension is 4 but corresponding boolean dimension is 7

In [77]:
data[names=='Bob']

IndexError: boolean index did not match indexed array along dimension 0; dimension is 4 but corresponding boolean dimension is 7

### 4.1.6 Fancy Indexing

In [8]:
import numpy as np
arr = np.empty((6,4))
for i in range(6):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.]])

In [9]:
#simply pass a list or ndarray of integers specifying the desired order:
arr[[1,5,4]]

array([[1., 1., 1., 1.],
       [5., 5., 5., 5.],
       [4., 4., 4., 4.]])

In [10]:
#Using negative indices select rows from the end
arr[[-3,-5]]

array([[3., 3., 3., 3.],
       [1., 1., 1., 1.]])

In [15]:
arr = np.arange(32).reshape((8,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [16]:
#the result = arr[(1,0)] & arr[(3,1)] 
arr[[1,3],[0,1]]

array([ 4, 13])

In [25]:
#the way to select a square region, different from the one above:
arr[[1,3]][:,[0,1]]

array([[ 4,  5],
       [12, 13]])

In [26]:
#using np.ix_ to get the same result by selecting a square region:
arr[np.ix_([1,3],[0,1])]

array([[ 4,  5],
       [12, 13]])

### 4.1.6 Transposing and Swapping Array

In [29]:
arr = np.arange(12).reshape((3,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [30]:
#Transposing
arr.T

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [31]:
#computing the inner matrix product XTX using np.dot
np.dot(arr,arr.T)

array([[ 14,  38,  62],
       [ 38, 126, 214],
       [ 62, 214, 366]])

In [32]:
arr = np.arange(16).reshape((2, 2, 4))
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [33]:
#For higher dimensional arrays, transpose will accept a tuple of axis numbers to permute the axes
arr.transpose((1, 0, 2))

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

In [35]:
#arr.T is a special case of arr.swapaxes.
arr.swapaxes(1,2)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

## 4.2 Universal Functions: Fast Element-wise Array Functions

In [36]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [37]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [38]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [42]:
arr1 = np.random.randn(10)
arr2 = np.random.randn(10)
print(arr1, arr2)

[-0.78603208 -0.42644553 -0.33913205 -0.33694152  0.28347007  0.63655634
  1.62051896 -0.17159606 -0.21971768  1.87358015] [-0.49006499  0.52034803 -0.81398938  0.25132747  0.1517171  -0.0048776
 -0.63151183 -0.82107553  1.76792108 -1.22637048]


In [43]:
#take 2 arrays(thus, binary ufuncs) and return a single array as the result
np.maximum(arr1,arr2)

array([-0.49006499,  0.52034803, -0.33913205,  0.25132747,  0.28347007,
        0.63655634,  1.62051896, -0.17159606,  1.76792108,  1.87358015])

In [44]:
#it returns the fractional and integral parts of a floating point array:
np.modf(arr1)

(array([-0.78603208, -0.42644553, -0.33913205, -0.33694152,  0.28347007,
         0.63655634,  0.62051896, -0.17159606, -0.21971768,  0.87358015]),
 array([-0., -0., -0., -0.,  0.,  0.,  1., -0., -0.,  1.]))

In [45]:
np.abs(arr1)

array([0.78603208, 0.42644553, 0.33913205, 0.33694152, 0.28347007,
       0.63655634, 1.62051896, 0.17159606, 0.21971768, 1.87358015])

In [46]:
np.ceil(arr1)

array([-0., -0., -0., -0.,  1.,  1.,  2., -0., -0.,  2.])

In [47]:
np.greater(arr1,arr2)

array([False, False,  True, False,  True,  True,  True,  True, False,
        True])

In [48]:
arr3 = np.random.randn(10)
np.maximum(arr1,arr2,arr3)

array([-0.49006499,  0.52034803, -0.33913205,  0.25132747,  0.28347007,
        0.63655634,  1.62051896, -0.17159606,  1.76792108,  1.87358015])