# 第4章_NumPy基础：数组和矢量计算
## NumPy 的 ndarray：一种多位数组对象

### 创建 ndarray

In [4]:
import numpy as np

In [2]:
data1 = [6, 7.5, 8, 0, 1]

In [5]:
arr1 = np.array(data1)

In [6]:
arr1

array([ 6. ,  7.5,  8. ,  0. ,  1. ])

In [7]:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]

In [8]:
arr2 = np.array(data2)

In [9]:
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [10]:
arr2.ndim

2

In [11]:
arr2.shape

(2, 4)

In [12]:
arr1.dtype

dtype('float64')

In [13]:
arr2.dtype

dtype('int64')

In [14]:
np.zeros(10)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [17]:
np.zeros((3, 6))

array([[ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.]])

In [19]:
np.empty((2, 3, 2))

array([[[  1.49166815e-154,  -2.68679353e+154],
        [  2.37663529e-312,   2.56761491e-312],
        [  8.48798317e-313,   9.33678148e-313]],

       [[  1.08221785e-312,   6.79038653e-313],
        [  8.70018275e-313,   5.79467237e+170],
        [  3.99910963e+252,   8.34402697e-309]]])

In [21]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

### ndarray 的数据类型

In [22]:
arr1 = np.array([1, 2, 3], dtype=np.float64)

In [27]:
arr2 = np.array([1, 2, 3], dtype=np.int32)

In [25]:
arr1.dtype

dtype('float64')

In [28]:
arr2.dtype

dtype('int32')

In [31]:
arr = np.array([1, 2, 3, 4, 5])

In [32]:
arr.dtype

dtype('int64')

In [34]:
float_arr = arr.astype(np.float64)

In [35]:
float_arr.dtype

dtype('float64')

In [36]:
arr.dtype

dtype('int64')

In [37]:
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])

In [38]:
arr

array([  3.7,  -1.2,  -2.6,   0.5,  12.9,  10.1])

In [40]:
arr.astype(np.int32)
# 直接截断小数点

array([ 3, -1, -2,  0, 12, 10], dtype=int32)

In [43]:
numeric_strings = np.array(['1.25', '-9.6', '42'],dtype=np.string_)

In [49]:
numeric_strings.astype(dtype=np.float64).dtype

dtype('float64')

In [50]:
numeric_strings.astype(dtype=float).dtype

dtype('float64')

In [51]:
int_array = np.arange(10)

In [52]:
int_array

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [54]:
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64)

In [55]:
int_array.astype(calibers.dtype)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.])

In [57]:
empty_uint32 = np.empty(8, dtype='u4')

In [58]:
empty_uint32

array([         0, 1075314688,          0, 1075707904,          0,
       1075838976,          0, 1072693248], dtype=uint32)

##  数组和标量之间的运算

In [60]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])

In [61]:
arr

array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.]])

In [62]:
arr * arr

array([[  1.,   4.,   9.],
       [ 16.,  25.,  36.]])

In [63]:
arr ** 2

array([[  1.,   4.,   9.],
       [ 16.,  25.,  36.]])

In [64]:
arr - arr

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [65]:
1 / arr

array([[ 1.        ,  0.5       ,  0.33333333],
       [ 0.25      ,  0.2       ,  0.16666667]])

In [66]:
arr ** 0.5

array([[ 1.        ,  1.41421356,  1.73205081],
       [ 2.        ,  2.23606798,  2.44948974]])

## 基本的索引和切片

In [67]:
arr = np.arange(10)

In [68]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [69]:
arr[5]

5

In [70]:
arr[5: 8]

array([5, 6, 7])

In [71]:
arr[5: 8] = 12

In [72]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [73]:
arr_slice = arr[5: 8]

In [74]:
arr_slice

array([12, 12, 12])

In [77]:
arr_slice[1] = 12345

In [79]:
arr_slice

array([   12, 12345,    12])

In [78]:
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,     9])

In [80]:
arr_slice[:] = 64

In [81]:
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [82]:
arr_slice2 = arr[5: 8]

In [83]:
arr_slice2

array([64, 64, 64])

In [84]:
arr_slice2 = 0

In [85]:
arr_slice2

0

In [86]:
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [87]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [88]:
arr2d[2]

array([7, 8, 9])

In [89]:
arr2d[0][2]

3

In [90]:
arr2d[0, 2]

3

In [91]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

In [92]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [93]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [95]:
old_values = arr3d[0].copy()

In [96]:
arr3d[0] = 42

In [97]:
arr3d

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [100]:
arr3d[0] = old_values

In [101]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [102]:
arr3d[1, 0]

array([7, 8, 9])

#### 切片索引

In [105]:
arr[1: 6]

array([ 1,  2,  3,  4, 64])

In [106]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [109]:
arr2d[: 2]

array([[1, 2, 3],
       [4, 5, 6]])

In [108]:
arr2d[: 2, 1:]

array([[2, 3],
       [5, 6]])

In [110]:
arr2d[: 2][1:]

array([[4, 5, 6]])

In [112]:
arr2d[1, :2]

array([4, 5])

In [116]:
arr2d[:1, :2]

array([[1, 2]])

In [117]:
arr2d[2, :1]

array([7])

In [118]:
arr2d[:, :1]

array([[1],
       [4],
       [7]])

In [120]:
arr2d[2, 1:] = 0

In [121]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 0, 0]])

### 布尔型索引

In [131]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])

In [124]:
data = np.random.randn(7, 4)

In [132]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], 
      dtype='<U4')

In [125]:
data

array([[-0.29318409, -0.14232978, -0.18910031, -1.68378314],
       [ 0.41217842,  1.77061584,  1.02559452,  1.04043027],
       [-0.24125081, -0.16887629,  0.21315701, -0.93995218],
       [ 1.01435621,  0.79018092, -0.5315254 ,  1.03949619],
       [ 1.48060455,  0.12797672, -1.2676072 ,  0.80664928],
       [-1.09434741, -0.26976141, -0.87272221, -2.59640712],
       [-0.27152476,  1.44287419,  0.06347334, -0.12319806]])

In [133]:
names == 'Bob'

array([ True, False, False,  True, False, False, False], dtype=bool)

In [134]:
data[names == 'Bob']

array([[-0.29318409, -0.14232978, -0.18910031, -1.68378314],
       [ 1.01435621,  0.79018092, -0.5315254 ,  1.03949619]])

In [135]:
data[names == 'Bob', 2:]

array([[-0.18910031, -1.68378314],
       [-0.5315254 ,  1.03949619]])

In [137]:
data[names == 'Bob', 3]

array([-1.68378314,  1.03949619])

In [139]:
data[names == 'Bob', 3:]

array([[-1.68378314],
       [ 1.03949619]])

In [140]:
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True], dtype=bool)

In [141]:
data[-(names == 'Bob')]

  if __name__ == '__main__':


array([[ 0.41217842,  1.77061584,  1.02559452,  1.04043027],
       [-0.24125081, -0.16887629,  0.21315701, -0.93995218],
       [ 1.48060455,  0.12797672, -1.2676072 ,  0.80664928],
       [-1.09434741, -0.26976141, -0.87272221, -2.59640712],
       [-0.27152476,  1.44287419,  0.06347334, -0.12319806]])

In [142]:
data[~(names == 'Bob')]

array([[ 0.41217842,  1.77061584,  1.02559452,  1.04043027],
       [-0.24125081, -0.16887629,  0.21315701, -0.93995218],
       [ 1.48060455,  0.12797672, -1.2676072 ,  0.80664928],
       [-1.09434741, -0.26976141, -0.87272221, -2.59640712],
       [-0.27152476,  1.44287419,  0.06347334, -0.12319806]])

In [149]:
mask = (names == 'Bob') | (names == 'Will')
# 按位或运算符：只要对应的二个二进位有一个为 1 时，结果位就为1。

In [150]:
mask

array([ True, False,  True,  True,  True, False, False], dtype=bool)

In [151]:
data[mask]

array([[-0.29318409, -0.14232978, -0.18910031, -1.68378314],
       [-0.24125081, -0.16887629,  0.21315701, -0.93995218],
       [ 1.01435621,  0.79018092, -0.5315254 ,  1.03949619],
       [ 1.48060455,  0.12797672, -1.2676072 ,  0.80664928]])

In [152]:
mask2 = (names == 'Bob' or 'Will')
# Python 关键字 and 和 or 在布尔型数组中无效

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [153]:
data[data < 0] = 0

In [154]:
data

array([[ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.41217842,  1.77061584,  1.02559452,  1.04043027],
       [ 0.        ,  0.        ,  0.21315701,  0.        ],
       [ 1.01435621,  0.79018092,  0.        ,  1.03949619],
       [ 1.48060455,  0.12797672,  0.        ,  0.80664928],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  1.44287419,  0.06347334,  0.        ]])

In [155]:
data[names != 'Joe'] = 7

In [156]:
data

array([[ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 0.41217842,  1.77061584,  1.02559452,  1.04043027],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  1.44287419,  0.06347334,  0.        ]])

In [157]:
arr = np.empty((8, 4))

In [159]:
for i in range(8):
    arr[i] = i

In [160]:
arr

array([[ 0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.],
       [ 5.,  5.,  5.,  5.],
       [ 6.,  6.,  6.,  6.],
       [ 7.,  7.,  7.,  7.]])

In [161]:
arr2 = np.empty((8, 4))

In [164]:
arr[[3, 4, 0, 6]]

array([[ 3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.],
       [ 0.,  0.,  0.,  0.],
       [ 6.,  6.,  6.,  6.]])

In [165]:
arr[[-3, -5, -7]]

array([[ 5.,  5.,  5.,  5.],
       [ 3.,  3.,  3.,  3.],
       [ 1.,  1.,  1.,  1.]])