n-dimensional array object or ndarray, which is fast, flexible container for large datasets in Python. Arrays enable you to perform mathematical operations on whole blocks of data using similar syntax to the equivalent operations between scalar elements

In [1]:
import numpy as np

data = np.random.randn(2, 3)
print(data)
print(data * 10)
print(data + data)
"""
An ndarray is a generic multidimensional container for homogeneous data, elements must be the same type
"""
print(data.shape)
print(data.dtype)

[[-0.18362671  0.279699   -1.08547965]
 [-0.55566853  2.08891049 -0.64020807]]
[[ -1.83626706   2.79698999 -10.85479651]
 [ -5.55668533  20.88910486  -6.40208067]]
[[-0.36725341  0.559398   -2.1709593 ]
 [-1.11133707  4.17782097 -1.28041613]]


In [4]:
# creating ndarray
data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)
print(arr1)
# nested sequences to multidimensional array
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
print(arr2)
print(arr2.ndim)
print(arr2.shape)
print(arr1.dtype)
print(arr2.dtype)

[6.  7.5 8.  0.  1. ]
[[1 2 3 4]
 [5 6 7 8]]
2
(2, 4)
float64
int32


In [15]:
# zeros and ones create arrays of 0s or 1s
zeros_int = np.zeros(10, dtype=int)
print(zeros_int, zeros_int.dtype)
zeros_ = np.zeros(10)
print(zeros_, zeros_.dtype)
print(np.zeros((3, 6)))
# empty creates an array without initializing its values
print(np.empty((2, 3, 2)))  # it's not safe to assume that np.empty will return an array of all zeros
# arange is an array-valued version of the built-in Python range function
print(np.arange(15))
"""
ones_like: takes another array and produces a ones array of the same shape and dtype
full: produce an array of the given shape and dtype with all values set to the indicated "fill value"
eye: create a square N*N identity matrix(1s on the diagonal and 0s elsewhere
"""
print(np.eye(3, 4))
print(np.identity(3))

[0 0 0 0 0 0 0 0 0 0] int32
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] float64
[[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]
[[[1. 0.]
  [0. 0.]
  [0. 1.]]

 [[0. 0.]
  [0. 0.]
  [1. 0.]]]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [19]:
# data types for ndarrays
"""
np.float64
np.int32
np.complex64/128/256

astype() to cast an array from one dtype to another
"""
arr = np.array([1, 2, 3, 4, 5])
print(arr.dtype)
float_arr = arr.astype(np.float64)
print(float_arr.dtype)
# if you have an array of strings representing numbers, you can use astype to convert them to numeric form
# be cautious when using the numpy.string_ type, as string data in NumPy is fixed size and may truncate input without warning
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)
numeric_strings.astype(float)   # cannot be converted to float64
# calling astype always creates a new array, even the new dtype is the same

int32
float64


array([ 1.25, -9.6 , 42.  ])

In [26]:
"""
arrays are important because the enable you to express batch operations on data without any "for" loops, vectorization
"""
arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64)
print(arr)
print(arr * arr)
print(arr - arr)
print(1 / arr)
# comparisons between arrays of the same size yield boolean arrays
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
print(arr2.dtype)
print(arr2)
arr2 > arr

[[1. 2. 3.]
 [4. 5. 6.]]
[[ 1.  4.  9.]
 [16. 25. 36.]]
[[0. 0. 0.]
 [0. 0. 0.]]
[[1.         0.5        0.33333333]
 [0.25       0.2        0.16666667]]
float64
[[ 0.  4.  1.]
 [ 7.  2. 12.]]


array([[False,  True, False],
       [ True, False,  True]])

In [31]:
"""
NumPy array indexing is a rich topic
"""
arr = np.arange(10)
print(arr)
print(arr[5])
print(arr[5:8])
arr[5:8] = 23
print(arr)  # if you assign a scalar value to a slice, the value is propagated(or broadcasted) to the entire selection
# array slices are "views" on the original array which means the data is deep sliced, not copied, any modifications to the view will be reflected in the source array
arr_slice = arr[5:8]
print(arr_slice)
arr_slice[1] = 233
print(arr)
# if you want a copy, use .copy()
arr_copied = arr[5:8].copy()
print(arr_copied)
arr_copied[1] = 23
print(arr_copied)
print(arr)

[0 1 2 3 4 5 6 7 8 9]
5
[5 6 7]
[ 0  1  2  3  4 23 23 23  8  9]
[23 23 23]
[  0   1   2   3   4  23 233  23   8   9]
[ 23 233  23]
[23 23 23]
[  0   1   2   3   4  23 233  23   8   9]


In [32]:
# you can pass a comma-separated list of indices to select individual elements
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(arr2d[2])
print(arr2d[0][2], arr2d[0, 2])
# similar with multidimensional array
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(arr2d[1:, :2])

[7 8 9]
3 3


In [6]:
import numpy as np
# boolean indexing
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)
print(names)
print(names == 'Bob')
print(data)
# boolean array can be passed when indexing the array
print(data[names == 'Bob'])
print(data[names == 'Bob', :2])
cond = names == 'Bob'
print(data[~cond])  # ~operator invert a general condition
mask = (names == 'Bob') | (names == 'Will')
print(mask)
print(data[mask])
"""
Setting value with boolean arrays works in a common-sense way.
To set all of the negative values in data to 0 we need only do:
"""
data[data < 0] = 0
# set whole rows or column using a one-dimensional boolean array is also easy

['Bob' 'Joe' 'Will' 'Bob' 'Will' 'Joe' 'Joe']
[ True False False  True False False False]
[[ 0.26239774  1.50561548 -0.421066    0.45083176]
 [-0.06576134  0.87952908 -1.9085828  -0.39794606]
 [ 0.08709249  0.17193017  0.64598925 -0.33727065]
 [ 0.77748831 -0.12773154 -1.35663818 -0.47997675]
 [-1.23570473  0.39664837 -0.43904985 -0.87115413]
 [-1.03589337  0.20914278 -0.90238772 -0.39897316]
 [-1.70337297 -1.00744276  0.35979811 -0.66685602]]
[[ 0.26239774  1.50561548 -0.421066    0.45083176]
 [ 0.77748831 -0.12773154 -1.35663818 -0.47997675]]
[[ 0.26239774  1.50561548]
 [ 0.77748831 -0.12773154]]
[[-0.06576134  0.87952908 -1.9085828  -0.39794606]
 [ 0.08709249  0.17193017  0.64598925 -0.33727065]
 [-1.23570473  0.39664837 -0.43904985 -0.87115413]
 [-1.03589337  0.20914278 -0.90238772 -0.39897316]
 [-1.70337297 -1.00744276  0.35979811 -0.66685602]]
[ True False  True  True  True False False]


In [11]:
# fancy indexing is a term adopted by NumPy to describe indexing using integer arrays
arr = np.empty((8, 4))
for i in range(8):
    arr[i] = i
print(arr)
print(arr[[4, 3, 0, 6]])
print(arr[[-3, -5, -7]])
# passing multiple index arrays does something different; it selects a one-dimensional array fo elements corresponding to each tuple of indices
arr = np.arange(32).reshape((8, 4))
print(arr)
print(arr[[1, 5, 7, 2], [0, 3, 1, 2]])
print(arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]])
# fancy indexing always copies the data into a new array unlike slicing

[[0. 0. 0. 0.]
 [1. 1. 1. 1.]
 [2. 2. 2. 2.]
 [3. 3. 3. 3.]
 [4. 4. 4. 4.]
 [5. 5. 5. 5.]
 [6. 6. 6. 6.]
 [7. 7. 7. 7.]]
[[4. 4. 4. 4.]
 [3. 3. 3. 3.]
 [0. 0. 0. 0.]
 [6. 6. 6. 6.]]
[[5. 5. 5. 5.]
 [3. 3. 3. 3.]
 [1. 1. 1. 1.]]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]]
[ 4 23 29 10]
[[ 4  7  5  6]
 [20 23 21 22]
 [28 31 29 30]
 [ 8 11  9 10]]


In [17]:
# transposing arrays and swapping axes
arr = np.arange(15).reshape((3, 5))
print(arr)
print(arr.T)
# when computing the inner matrix product using np.dot
print(np.dot(arr.T, arr))
arr = np.arange(16).reshape((2, 2, 4))
print(arr)
print(arr.transpose((1, 0, 2)))  # x: 0, y: 1, z: 2; 
print(arr.swapaxes(1, 2))   # swapaxes returns a view without making a copy

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
[[ 0  5 10]
 [ 1  6 11]
 [ 2  7 12]
 [ 3  8 13]
 [ 4  9 14]]
[[125 140 155 170 185]
 [140 158 176 194 212]
 [155 176 197 218 239]
 [170 194 218 242 266]
 [185 212 239 266 293]]
[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]]
[[[ 0  1  2  3]
  [ 8  9 10 11]]

 [[ 4  5  6  7]
  [12 13 14 15]]]
[[[ 0  4]
  [ 1  5]
  [ 2  6]
  [ 3  7]]

 [[ 8 12]
  [ 9 13]
  [10 14]
  [11 15]]]
