In [1]:
import numpy as np
np.__version__

'1.16.2'

# 关于数据的维度
* 数据组织的维度和数据属性的维度似乎不一样
* 数据组织的维度是以一维数组、二维数组、多维数组等形式来表示
* 数据属性的维度是一个数据对象具有多少个属性

注意数组的维度

一维数组是线型的，一边只有一个中括号
二维数组的面型的，一边有两个中括号
三维数组是体型的，一边有三个中括号

数组形状信息由shape属性表示，注意shape是一个元组，对于一维数组，它只有一个元素, 二维数组的shape有两个元素，先行后列

数组的维度信息由ndim属性表示，相当于len(shape)
数组元素的个数由size属性表示


In [2]:
l = [2,4,5,6,8]
a = np.array(l)
print('array:', a)
print('shape:', a.shape)
print('ndim:', a.ndim)
print('size:', a.size)

array: [2 4 5 6 8]
shape: (5,)
ndim: 1
size: 5


In [3]:
a = a.reshape(1,5)
print('array:', a)
print('shape:', a.shape)
print('ndim:', a.ndim)
print('size:', a.size)

array: [[2 4 5 6 8]]
shape: (1, 5)
ndim: 2
size: 5


In [4]:
a = a.reshape(5,1)
print('array:')
print(a)
print('shape:', a.shape)
print('ndim:', a.ndim)
print('size:', a.size)

array:
[[2]
 [4]
 [5]
 [6]
 [8]]
shape: (5, 1)
ndim: 2
size: 5


# 基本运算操作

In [5]:
# + - × ÷ 
a = np.arange(4).reshape(2,2)
b = np.array([[4,3],[2,1]])

In [6]:
a

array([[0, 1],
       [2, 3]])

In [7]:
b

array([[4, 3],
       [2, 1]])

In [8]:
a + b

array([[4, 4],
       [4, 4]])

In [9]:
a * b 

array([[0, 3],
       [4, 3]])

In [10]:
# 矩阵乘法 使用@符号，但要求Python版本3.5以上，或者使用dot方法
a @ b

array([[ 2,  1],
       [14,  9]])

In [11]:
a.dot(b)

array([[ 2,  1],
       [14,  9]])

In [12]:
np.dot(a,b)

array([[ 2,  1],
       [14,  9]])

In [13]:
# +=、-=、*=、/=诸如这些符号表示在左边操作对象上就地修改,但还不支持@=这种操作，可以使用a = a @ b
a+=b

In [14]:
a

array([[4, 4],
       [4, 4]])

In [15]:
b

array([[4, 3],
       [2, 1]])

In [16]:
a*=b

In [17]:
a

array([[16, 12],
       [ 8,  4]])

In [18]:
a/=b

TypeError: No loop matching the specified signature and casting
was found for ufunc true_divide

## 关于轴的问题

In [19]:
a = np.arange(6).reshape(3,2)
a

array([[0, 1],
       [2, 3],
       [4, 5]])

In [20]:
a.sum(axis=1)

array([1, 5, 9])

In [21]:
a.sum(axis=0)

array([6, 9])

In [22]:
a = np.arange(24).reshape(2,3,4)
a

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

In [23]:
a.sum(axis=0)

array([[12, 14, 16, 18],
       [20, 22, 24, 26],
       [28, 30, 32, 34]])

In [24]:
a.sum(axis=1)

array([[12, 15, 18, 21],
       [48, 51, 54, 57]])

In [25]:
a.sum(axis=2)

array([[ 6, 22, 38],
       [54, 70, 86]])

## 创建数组

### 从列表和元组中创建

### 使用numpy内置函数

In [26]:
# np.ones(shape)   np.ones_like(a)
# np.zeros(shape)    np.zeros_like(a)
# np.full(shape, val)    np.full_like(shape, val)
# np.eye(n)  单位矩阵
# np.empty(shape) 未被初始化
# np.arange(start, end, step)
# np.linspace(start, end, n_points)

In [27]:
# linspace 指定起始点和元素个数
np.linspace(0,10,4)

array([ 0.        ,  3.33333333,  6.66666667, 10.        ])

In [28]:
# 默认包括终点，设置endpoint=False可以不包括终点
np.linspace(0,10,4, endpoint=False)

array([0. , 2.5, 5. , 7.5])

In [29]:
# 默认创建50个点
a = np.linspace(0,10)
print(a.size)
a

50


array([ 0.        ,  0.20408163,  0.40816327,  0.6122449 ,  0.81632653,
        1.02040816,  1.2244898 ,  1.42857143,  1.63265306,  1.83673469,
        2.04081633,  2.24489796,  2.44897959,  2.65306122,  2.85714286,
        3.06122449,  3.26530612,  3.46938776,  3.67346939,  3.87755102,
        4.08163265,  4.28571429,  4.48979592,  4.69387755,  4.89795918,
        5.10204082,  5.30612245,  5.51020408,  5.71428571,  5.91836735,
        6.12244898,  6.32653061,  6.53061224,  6.73469388,  6.93877551,
        7.14285714,  7.34693878,  7.55102041,  7.75510204,  7.95918367,
        8.16326531,  8.36734694,  8.57142857,  8.7755102 ,  8.97959184,
        9.18367347,  9.3877551 ,  9.59183673,  9.79591837, 10.        ])

In [30]:
a = np.array([4,5,3,2], dtype=np.float)
a.dtype

dtype('float64')

In [31]:
np.float32([4,3,2])  # 但是np.float不行

array([4., 3., 2.], dtype=float32)

### 从文本文件中读取

In [32]:
a =  np.eye(5)
np.savetxt('eye.txt', a)

## 拼接和分割

In [33]:
# concatenate
a = np.arange(5)
b = np.zeros(5)
# np.concatenate(a,b) # 错
np.concatenate((a,b))

array([0., 1., 2., 3., 4., 0., 0., 0., 0., 0.])

### 拼接

### 分割

In [34]:
a = np.arange(9).reshape(3,3)
a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

平均分割，有一个标量数字表示

In [35]:
np.hsplit(a,3)

[array([[0],
        [3],
        [6]]), array([[1],
        [4],
        [7]]), array([[2],
        [5],
        [8]])]

指定分割，用元组(a,b)表示在序号为a、b上分别切一刀

In [36]:
a = np.arange(15).reshape(3,5)
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [37]:
np.hsplit(a, (1,3))

[array([[ 0],
        [ 5],
        [10]]), array([[ 1,  2],
        [ 6,  7],
        [11, 12]]), array([[ 3,  4],
        [ 8,  9],
        [13, 14]])]

## 改变数组形状


In [38]:
a = np.ones((2,3,4))
a

array([[[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]],

       [[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]])

In [39]:
# ndarray.reshape不改变原数组
a.reshape(4,6)

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [40]:
a

array([[[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]],

       [[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]])

In [41]:
# ndarray.resize改变原来数组
a.resize(4,6)   # 就地修改，没有返回值

In [42]:
a

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [43]:
a = np.arange(24).reshape(2,3,4)
b = a.flatten()  
b

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [44]:
a[0,0] = 100
a

array([[[100, 100, 100, 100],
        [  4,   5,   6,   7],
        [  8,   9,  10,  11]],

       [[ 12,  13,  14,  15],
        [ 16,  17,  18,  19],
        [ 20,  21,  22,  23]]])

In [45]:
b

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [46]:
# np.ravel(a)
a = np.arange(24).reshape(2,3,4)
b = np.ravel(a)
b

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [47]:
a[0,0] = 100
a

array([[[100, 100, 100, 100],
        [  4,   5,   6,   7],
        [  8,   9,  10,  11]],

       [[ 12,  13,  14,  15],
        [ 16,  17,  18,  19],
        [ 20,  21,  22,  23]]])

In [48]:
b

array([100, 100, 100, 100,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23])

In [49]:
# 转置
a = np.arange(8).reshape(2,4)
a

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [50]:
a.T

array([[0, 4],
       [1, 5],
       [2, 6],
       [3, 7]])

In [51]:
a.transpose(1,0)

array([[0, 4],
       [1, 5],
       [2, 6],
       [3, 7]])

In [52]:
# 翻转
a = np.arange(15).reshape(5,3)
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [53]:
a[::-1,:]

array([[12, 13, 14],
       [ 9, 10, 11],
       [ 6,  7,  8],
       [ 3,  4,  5],
       [ 0,  1,  2]])

## 数据类型

### 数据类型转换

In [54]:
# ndarray.astype方法一定会创建一个新的数组，即使两个数组类型一样
a = np.arange(24)
b = a.astype(np.float)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [55]:
b

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23.])

In [56]:
# 还可以直接使用Numy内置的类型名称进行类型转换
a = np.arange(10)
b = np.float32(a)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [57]:
b

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32)

In [58]:
# ! 注意np.float表示python内置浮点类型，不能用于numpy数组类型的转换，可以使用np.float32或者np.float64
b = np.float(a)   

TypeError: only size-1 arrays can be converted to Python scalars

In [59]:
a = np.full((4,3), 1.0)
a

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [60]:
b = np.int8(a)
b

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]], dtype=int8)

### 创建自定义数据类型

In [61]:
t = np.dtype([('name', np.str_, 40), ('num_items', np.int), ('price', np.float)])
t

dtype([('name', '<U40'), ('num_items', '<i4'), ('price', '<f8')])

## 索引、切片和遍历

In [62]:
b = np.arange(6).reshape(3,2)
for i in b:    # 对于二维数组，相当与遍历行
    print(i)

[0 1]
[2 3]
[4 5]


In [63]:
for i in b.flat:
    print(i)

0
1
2
3
4
5


###  索引运算

In [64]:
a = np.arange(15).reshape(3,5)
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [65]:
a[2,3]

13

In [66]:
a[1:, 3]

array([ 8, 13])

In [67]:
a[0:3:2, 0:5:2]

array([[ 0,  2,  4],
       [10, 12, 14]])

In [68]:
a[[0,2],[3,4]]

array([ 3, 14])

# 通用函数

In [69]:
a = np.arange(8)
a

array([0, 1, 2, 3, 4, 5, 6, 7])

In [70]:
np.diff(a)

array([1, 1, 1, 1, 1, 1, 1])

In [71]:
a = np.random.randint(0,100,(3,4))
a

array([[49, 63, 74, 29],
       [22,  6, 23, 90],
       [86,  3, 70, 59]])

In [72]:
np.diff(a)

array([[ 14,  11, -45],
       [-16,  17,  67],
       [-83,  67, -11]])

In [73]:
# np.maximum和np.max的区别
# 前者是基于元素较两个数组，返回一个数组
# 后者是全局比较

# 布尔运算

## 等于

In [74]:
a = np.arange(5)
b = (a==2)
c = a[b]
a

array([0, 1, 2, 3, 4])

In [75]:
b

array([False, False,  True, False, False])

In [76]:
c

array([2])

In [77]:
a = np.array([[2,4,5,4],[9,4,1,4]])
b = (a==4)
c = a[b]
a

array([[2, 4, 5, 4],
       [9, 4, 1, 4]])

In [78]:
b

array([[False,  True, False,  True],
       [False,  True, False,  True]])

In [79]:
c

array([4, 4, 4, 4])

In [80]:
# where函数
a = np.arange(5)
b = np.array([4,3,2,4,9])
cond = np.array([True,False,False,True,True])
print(a)
print(b)
# True取ａ，False取ｂ
np.where(cond,a,b)

[0 1 2 3 4]
[4 3 2 4 9]


array([0, 3, 2, 3, 4])

## 与或非 & | 

# 拷贝问题

In [81]:
a = np.arange(6).reshape(2,3)
b = a
c = a.view()
d = a.copy()
a

array([[0, 1, 2],
       [3, 4, 5]])

In [82]:
id(a)

2485935351888

In [83]:
id(b)

2485935351888

In [84]:
id(c)

2485935349968

In [85]:
id(d)

2485935350528

In [86]:
a[0] = 9999
a

array([[9999, 9999, 9999],
       [   3,    4,    5]])

In [87]:
b

array([[9999, 9999, 9999],
       [   3,    4,    5]])

In [88]:
c

array([[9999, 9999, 9999],
       [   3,    4,    5]])

In [89]:
d

array([[0, 1, 2],
       [3, 4, 5]])

In [90]:
a.resize(3,2)
a

array([[9999, 9999],
       [9999,    3],
       [   4,    5]])

In [91]:
# b和a完全相同
b

array([[9999, 9999],
       [9999,    3],
       [   4,    5]])

In [92]:
# c和a指向相同的实际数据部分
# 实际数据部分相同表示数据部分在内存中的存储形式是一样的
# 但是数据在numpy数组中的表现形式是不一样的
# 也就说一个内存形式的数据可以映射为不同的numpy数组表现形式，与之对应的操作也相应发生变化
c

array([[9999, 9999, 9999],
       [   3,    4,    5]])

In [93]:
d

array([[0, 1, 2],
       [3, 4, 5]])

# 随机函数

# 排序

In [94]:
# np.sort 默认按照最高轴对数组a进行进行从小到大的排序
a = np.random.randint(0,100,(3,4))
print(a)
print(np.sort(a))
sorted = np.sort(a)

[[23 96 36 50]
 [38 46 79 12]
 [36 48 90 71]]
[[23 36 50 96]
 [12 38 46 79]
 [36 48 71 90]]


In [97]:
# np.argsort 默认按照最高轴对数组a进行从小到大排序，但是返回的不是原来的数值，而是其在原数组的索引
argsorted = np.argsort(a)
np.argsort(a)

array([[0, 2, 3, 1],
       [3, 0, 1, 2],
       [0, 1, 3, 2]], dtype=int64)

# Broadcasting

# 矩阵乘法