### Numpy 其实就是多维数组对象

In [2]:
import numpy as np
data = [1,2,3,4,5]
n = np.array(data*2)
print(data)
print(n)

[1, 2, 3, 4, 5]
[1 2 3 4 5 1 2 3 4 5]


In [3]:
# shape属性表示获得np数组的维度（长度）
n.shape

(10,)

In [4]:
# dtype属性表示获取数组的类型
n.dtype

dtype('int32')

### 嵌套序列：是由一组等长的列表组成的列表

In [12]:
arr = [[1,2,3],[2,3,4]]
arr2 = np.array(arr)
print(arr2)
print(arr2.ndim) # ndim 代表外层维度 2
print(arr2.shape) # 代表整个维度 （2，3）


[[1 2 3]
 [2 3 4]]
2
(2, 3)


### 数据类型的判断

In [15]:
# 不好的示范来进行判断
arr = [['1','2',3,4],[5,6,7,8]]
arr2 = np.array(arr)
print(arr2)
print(arr2.dtype) # unicode 类型

[['1' '2' '3' '4']
 ['5' '6' '7' '8']]
<U11


In [16]:
arr = [[1,2,3],[2,3,4]]
arr2 = np.array(arr)
print(arr2)
print(arr2.dtype)

[[1 2 3]
 [2 3 4]]
int32


In [17]:
arr = [[1.0,2,3],[2,3,4]]
arr2 = np.array(arr)
print(arr2)
print(arr2.dtype) # 当成员当中有一个为float时，那么numpy将会推断成float64类型

[[1. 2. 3.]
 [2. 3. 4.]]
float64


### 指定长度数组的创建

In [18]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [20]:
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [21]:
np.empty((2,3,4))

array([[[1.37470822e-311, 1.37470821e-311, 1.37470821e-311,
         1.37470821e-311],
        [1.37470821e-311, 1.37470821e-311, 1.37470822e-311,
         1.37470798e-311],
        [1.37470821e-311, 1.37470821e-311, 1.37470821e-311,
         1.37470821e-311]],

       [[1.37470821e-311, 1.37470821e-311, 1.37470821e-311,
         1.37468617e-311],
        [1.37470821e-311, 1.37472454e-311, 1.37471269e-311,
         1.37470822e-311],
        [1.37473326e-311, 1.37473326e-311, 1.37472460e-311,
         1.37472416e-311]]])

In [23]:
np.arange(10) # arange是range函数的数组版本

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [24]:
arr = np.array([1.2,1.6,1.8,-2.3,-5.6])
print(arr)
print(arr.dtype)
print(arr.astype(np.int32))

[ 1.2  1.6  1.8 -2.3 -5.6]
float64
[ 1  1  1 -2 -5]


### 矢量化
数组通常不用在编写循环的情况下就可以进行批量运算


In [25]:
arr1 = np.array([1,2,3,4])
arr2 = np.array([5,6,7,8])
arr1+arr2

array([ 6,  8, 10, 12])

In [26]:
arr1 = np.array([[1,2,3,4],[1,2,3,4],[1,2,3,4]])
arr2 = np.array([[2,3,4,5],[2,3,4,5],[2,3,4,5]])
arr1+arr2


array([[3, 5, 7, 9],
       [3, 5, 7, 9],
       [3, 5, 7, 9]])

In [27]:
arr1 = np.array([[1,2,3,4],[1,2,3,4],[1,2,3,4]])
arr2 = np.array([[2,3,4,5],[2,3,4,5],[2,3,4,5]])
arr1*arr2

array([[ 2,  6, 12, 20],
       [ 2,  6, 12, 20],
       [ 2,  6, 12, 20]])

### 索引和切片

In [32]:
arr = np.arange(10)
print(arr)
print(arr[1])
print(arr[4:])

arr[0:4] = 11
print(arr)

arr_copy = arr.copy() # 如果不想修改原数组，可使用数组复制
print(arr_copy)

[0 1 2 3 4 5 6 7 8 9]
1
[4 5 6 7 8 9]
[11 11 11 11  4  5  6  7  8  9]
[11 11 11 11  4  5  6  7  8  9]


In [34]:
# 二维数组访问方式
arr1 = np.array([[1,2,3],[2,3,4]])
print(arr1[0][1])
print(arr1[0,1]) # 两者等价

2
2


In [38]:
names =np.array(['zhangbo','jack','robin'])
print(names == 'zhangbo')
print((names == 'zhangbo') & (names == 'robin')) # and 操作
print((names == 'zhangbo') | (names == 'robin')) # or 操作

[ True False False]
[False False False]
[ True False  True]


### 花式索引
利用整数数组进行索引

In [41]:
arr = np.empty((8,4))
print(arr)

for i in range(8):
    arr[i] = i
print(arr)

# 以特定的顺利来选取行中的子集，我们传入用于指定顺序的整数列表或数组
# 这里可以用负数索引来从末尾获取
print(arr[[4,3,0,6]])

[[0. 0. 0. 0.]
 [1. 1. 1. 1.]
 [2. 2. 2. 2.]
 [3. 3. 3. 3.]
 [4. 4. 4. 4.]
 [5. 5. 5. 5.]
 [6. 6. 6. 6.]
 [7. 7. 7. 7.]]
[[0. 0. 0. 0.]
 [1. 1. 1. 1.]
 [2. 2. 2. 2.]
 [3. 3. 3. 3.]
 [4. 4. 4. 4.]
 [5. 5. 5. 5.]
 [6. 6. 6. 6.]
 [7. 7. 7. 7.]]
[[4. 4. 4. 4.]
 [3. 3. 3. 3.]
 [0. 0. 0. 0.]
 [6. 6. 6. 6.]]


In [51]:
arr = np.arange(32).reshape(8,4)
print(arr)
print(arr[[1,5,7,2]])
print(arr[[1,5,7,2],[0,3,1,2]]) # 行，列 进行筛选，即4就是第2行第1列

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]]
[[ 4  5  6  7]
 [20 21 22 23]
 [28 29 30 31]
 [ 8  9 10 11]]
[ 4 23 29 10]


In [53]:
print(arr[[1,5,7,2]])
# 在选择完1,5,7,2行数据后，行不变，列进行选择（调换列的顺序）
print(arr[[1,5,7,2]][:,[0,3,1,2]])

[[ 4  5  6  7]
 [20 21 22 23]
 [28 29 30 31]
 [ 8  9 10 11]]
[[ 4  7  5  6]
 [20 23 21 22]
 [28 31 29 30]
 [ 8 11  9 10]]


### 数组转置和轴对换
转置就是重塑数组的特殊形式，常用方法T和 transpose

In [57]:
arr = np.arange(15).reshape(3,5)
print(arr)
print(arr.transpose())
print(arr.T)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
[[ 0  5 10]
 [ 1  6 11]
 [ 2  7 12]
 [ 3  8 13]
 [ 4  9 14]]
[[ 0  5 10]
 [ 1  6 11]
 [ 2  7 12]
 [ 3  8 13]
 [ 4  9 14]]


In [59]:
arr = np.arange(24).reshape(2,3,4) # 2个，三行，4列
print(arr)
print(arr.transpose(1,2,0)) # 轴的转换

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
[[[ 0 12]
  [ 1 13]
  [ 2 14]
  [ 3 15]]

 [[ 4 16]
  [ 5 17]
  [ 6 18]
  [ 7 19]]

 [[ 8 20]
  [ 9 21]
  [10 22]
  [11 23]]]


### 条件逻辑转数组 where
np.where 等同于 x if condition else y 矢量化的  

x if condition else y 表示当条件condition成立时，表达式的返回值是x，不成立时返回值是y  

np.where 的写法，用于根据一个数组产生一个新的数组，与map，reduce类似

In [63]:
x_arr = np.array([1.1,1.2,1.3])
y_arr = np.array([2.1,2.2,2.3])
condition = np.array([True,False,True])

# zip 接受的参数就是可迭代的对象
# zip()函数用于将可迭代的对象作为参数，将对象中对应的元素打包成一个个元组，然后返回有这些元组组成的列表
result = [(x if c else y) for x,y,c in zip(x_arr,y_arr,condition)]
print(result)

res = np.where(condition, x_arr, y_arr)
print(result)

[1.1, 2.2, 1.3]
[1.1, 2.2, 1.3]


In [66]:
# 值替换
arr = np.random.randn(4,4)
print(arr)

arr_1 = np.where(arr>0, 2 ,-2)
print(arr_1)

arr_2 = np.where(arr>0, 2 ,arr)
print(arr_2)

[[ 2.24785031  0.05942703 -0.97685776  0.42742218]
 [ 0.88873141 -1.22824346 -1.04129253 -0.84818713]
 [-0.82383826  0.56150997  0.18941541  0.16228071]
 [ 1.18228839 -1.5911772  -0.45523496  0.38988135]]
[[ 2  2 -2  2]
 [ 2 -2 -2 -2]
 [-2  2  2  2]
 [ 2 -2 -2  2]]
[[ 2.          2.         -0.97685776  2.        ]
 [ 2.         -1.22824346 -1.04129253 -0.84818713]
 [-0.82383826  2.          2.          2.        ]
 [ 2.         -1.5911772  -0.45523496  2.        ]]


### 数学运算
常用函数 sum mean std

In [73]:
arr = np.random.randn(4,4) # randn 生成符合标准正态分布的随机数
print(arr)
print(arr.mean()) # 均值
print(np.mean(arr))
print(arr.sum()) # 和
print(arr.std()) # 标准差

[[ 0.30729383 -1.01889955  0.17548779 -0.31025131]
 [ 0.15991876  1.31897145  1.92344924  0.7515496 ]
 [-1.04221485  0.36542056 -1.21553461  0.64050433]
 [-0.05006754 -0.6125637  -0.44927731 -0.02388955]]
0.057493571216505945
0.057493571216505945
0.9198971394640951
0.8262932652052362


In [75]:
# 在轴上进行运算
# 计算轴1上的平均值
print(arr.mean(axis=1))
# 计算0轴上的和
print(arr.sum(0))

[-0.21159231  1.03847226 -0.31295614 -0.28394952]
[-0.62506981  0.05292876  0.43412512  1.05791306]


### 排序方法

In [80]:
arr = np.random.randn(4)
print(arr)
arr.sort() # 从小到大排序
print(arr)

[ 0.33094259 -0.40751597 -1.50313919 -0.0729221 ]
[-1.50313919 -0.40751597 -0.0729221   0.33094259]


In [81]:
# 多维数组排序
arr = np.random.randn(4,4)
print(arr)
arr.sort(1) # 按轴1排序
print(arr)

[[-2.60591523  0.53345221 -0.38432862 -0.19111948]
 [ 0.99255264  1.83796089 -0.92757438  1.12701137]
 [-0.52732629  0.02744422  2.37113036  0.96989794]
 [ 2.31455464 -0.71345344 -0.73065654 -1.12411027]]
[[-2.60591523 -0.38432862 -0.19111948  0.53345221]
 [-0.92757438  0.99255264  1.12701137  1.83796089]
 [-0.52732629  0.02744422  0.96989794  2.37113036]
 [-1.12411027 -0.73065654 -0.71345344  2.31455464]]
