In [3]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [4]:
import numpy as np

为什么要专门学习数组呢？看下面「numpy 数组」和「列表」之间的计算效率对比：两个大小都是 1000000，把每个元素翻倍，运行 10 次用 %time 记时。

In [2]:
my_arr = np.arange(1000000)
my_list = list(range(1000000))

In [3]:
%time for _ in range(10): my_arr2 = my_arr * 2

Wall time: 22.9 ms


In [4]:
%time for _ in range(10): my_list2 = [x * 2 for x in my_list]

Wall time: 764 ms


我们发现「numpy 数组」效率是「列表」效率的 27 (1.33 * 1000/48.9) 倍左右。如果元素全是数
值型变量 (numerical variable)，那么 numpy 数组明显是个很好的数据结构。

# 数组的创建

## 引入：转置

In [5]:
arr = np.array([[1,2,3],[4,5,6]])
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [6]:
arr.T

array([[1, 4],
       [2, 5],
       [3, 6]])

In [6]:
arr = np.arange(16).reshape((2,2,4))
arr

arr[0,1,1]

arr[:,:,0]
arr = arr.transpose(1,0,2)
arr[:,:,0]

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

5

array([[ 0,  4],
       [ 8, 12]])

array([[ 0,  8],
       [ 4, 12]])

多维数组转置：交换相应维度的形状（shape）和跨度（stride）

## 初次印象

先沿着最后一个轴排，例如：三维数组，依次按照axis2, axis1, axis0的顺序

In [37]:
import numpy as np

arr = np.arange(24).reshape((4,3,2))
arr[3,0,0]
arr[3,0,1]

18

19

![jupyter](./numpy_axis_00.jpg)

![jupyter](./numpy_axis_01.jpg)

## 创建

带着上面这个对轴的认识，接下来我们用代码来创建 numpy 数组，有三种方式：
1. 按步就班的 np.array() 用在列表和元组上
2. 定隔定点的 np.arange() 和 np.linspace()
3. 一步登天的 np.ones(), np.zeros(), np.eye() 和 np.random.random()

### 按部就班

In [9]:
l = [3.5, 5, 2, 8, 4.2]
np.array(l)

array([3.5, 5. , 2. , 8. , 4.2])

In [10]:
t = (3.5, 5, 2, 8, 4.2)
np.array(t)

array([3.5, 5. , 2. , 8. , 4.2])

### 定隔定点法

更常见的两种创建 numpy 数组方法：

1. 定隔的 arange：固定元素大小间隔
2. 定点的 linspace：固定元素个数

In [11]:
print( np.arange(8) )
print( np.arange(2,8) )
print( np.arange(2,8,2))

[0 1 2 3 4 5 6 7]
[2 3 4 5 6 7]
[2 4 6]


In [12]:
print( np.linspace(2,6,3) )
print( np.linspace(3,8,11) )

[2. 4. 6.]
[3.  3.5 4.  4.5 5.  5.5 6.  6.5 7.  7.5 8. ]


### 一步登天法

NumPy 还提供一次性
1. 用 zeros() 创建全是 0 的 n 维数组
2. 用 ones() 创建全是 1 的 n 维数组
3. 用 random() 创建随机 n 维数组
4. 用 eye() 创建对角矩阵 (二维数组)

In [4]:
np.zeros(5)
print( np.zeros(5) ) # 标量5代表形状(5,)
print( np.ones((2,3)) )
print( np.random.random((2,3,4)) )

array([0., 0., 0., 0., 0.])

[0. 0. 0. 0. 0.]
[[1. 1. 1.]
 [1. 1. 1.]]
[[[0.45830446 0.71002805 0.97473638 0.26554138]
  [0.11523616 0.87182653 0.03350133 0.84582963]
  [0.04185916 0.02869058 0.04212034 0.70041217]]

 [[0.50563228 0.60718029 0.72925016 0.6224785 ]
  [0.97917675 0.49110077 0.96074389 0.42802305]
  [0.51485229 0.35675727 0.03554558 0.45934356]]]


In [14]:
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [15]:
np.eye(4, k=1)

array([[0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.]])

In [16]:
np.eye(4, k=-1)

array([[0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.]])

## 数组性质

### 一维数组

In [5]:
arr = np.array([3.5, 5, 2, 8, 4.2])
dir(arr)

['T',
 '__abs__',
 '__add__',
 '__and__',
 '__array__',
 '__array_finalize__',
 '__array_function__',
 '__array_interface__',
 '__array_prepare__',
 '__array_priority__',
 '__array_struct__',
 '__array_ufunc__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__complex__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__ilshift__',
 '__imatmul__',
 '__imod__',
 '__imul__',
 '__index__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__irshift__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lshift__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdivmod__',
 '__

In [7]:
print( 'The type is', type(arr) )
print( 'The dimension is', arr.ndim )
print( 'The length of array is', len(arr) ) # 数组长度，注意这个说法只对一维数组有意义
print( 'The number of elements is', arr.size ) # 数组元素个数
print( 'The shape of array is', arr.shape )
# 跨度，即在某一维度下为了获取到下一个元素需要「跨过」的字节数 (用元组来表示)，
# float64 是 8 个字节数 (bytes)，因此跨度为 8
print( 'The stride of array is', arr.strides )  
print( 'The type of elements is', arr.dtype )

The type is <class 'numpy.ndarray'>
The dimension is 1
The length of array is 5
The number of elements is 5
The shape of array is (5,)
The stride of array is (8,)
The type of elements is float64


![jupyter](./numpy_stride_00.jpg)

In [10]:
arr_int = np.array([3, 5, 2, 8, 4])
print( 'The type is', type(arr_int) )
print( 'The dimension is', arr_int.ndim )
print( 'The length of array is', len(arr_int) ) # 数组长度，注意这个说法只对一维数组有意义
print( 'The number of elements is', arr_int.size ) # 数组元素个数
print( 'The shape of array is', arr_int.shape )
# 跨度，即在某一维度下为了获取到下一个元素需要「跨过」的字节数 (用元组来表示)，
# int32 是 4 个字节数 (bytes)，因此跨度为 4
print( 'The stride of array is', arr_int.strides )  
print( 'The type of elements is', arr_int.dtype )

The type is <class 'numpy.ndarray'>
The dimension is 1
The length of array is 5
The number of elements is 5
The shape of array is (5,)
The stride of array is (4,)
The type of elements is int32


### 二维数组

In [11]:
l2 = [[1, 2, 3], [4, 5, 6]]
arr2d = np.array(l2)
arr2d

array([[1, 2, 3],
       [4, 5, 6]])

In [12]:
print( 'The type is', type(arr2d) )
print( 'The dimension is', arr2d.ndim )
print( 'The length of array is', len(arr2d) ) # 第一个维度
print( 'The number of elements is', arr2d.size )
print( 'The shape of array is', arr2d.shape )
print( 'The stride of array is', arr2d.strides )
print( 'The type of elements is', arr2d.dtype )

The type is <class 'numpy.ndarray'>
The dimension is 2
The length of array is 2
The number of elements is 6
The shape of array is (2, 3)
The stride of array is (12, 4)
The type of elements is int32


![jupyter](./numpy_stride_01.jpg)

### 多维数组

In [13]:
arr4d = np.random.random( (3,2,2,3) )

print( 'The type is', type(arr4d) )
print( 'The dimension is', arr4d.ndim )  # 第一个维度
print( 'The length of array is', len(arr4d) )
print( 'The number of elements is', arr4d.size )
print( 'The shape of array is', arr4d.shape )
print( 'The stride of array is', arr4d.strides )
print( 'The type of elements is', arr4d.dtype )

The type is <class 'numpy.ndarray'>
The dimension is 4
The length of array is 3
The number of elements is 36
The shape of array is (3, 2, 2, 3)
The stride of array is (96, 48, 24, 8)
The type of elements is float64


![jupyter](./numpy_stride_02.jpg)

# 数组的存载

## numpy 自身的 .npy 格式

用 np.save 函数将 numpy 数组保存为 .npy 格式，具体写法如下：np.save( ‘’文件名”，数组 )

In [16]:
arr_disk = np.arange(8)
np.save("arr_disk", arr_disk)
arr_disk

array([0, 1, 2, 3, 4, 5, 6, 7])

arr_disk.npy 保存在 Jupyter Notebook 所在的根目录下。要加载它也很简单，用 np.load( "文件名" ) 即可：

In [17]:
np.load("arr_disk.npy")

array([0, 1, 2, 3, 4, 5, 6, 7])

## 文本 .txt 格式

用 np.savetxt 函数将 numpy 数组保存为 .txt 格式，具体写法如下：np.save( ‘’文件名”，数组 )

In [18]:
arr_text = np.array([[1., 2., 3.], [4., 5., 6.]])
np.savetxt("arr_from_text.txt", arr_text)

arr_from_text.txt 保存在 Jupyter Notebook 所在的根目录下，用 Notepad 打开看里面确实存储着 [[1,2,3], [4,5,6]]。

用 np.loadtxt( "文件名" ) 即可加载该文件

In [19]:
np.loadtxt("arr_from_text.txt")

array([[1., 2., 3.],
       [4., 5., 6.]])

## 文本 .csv 格式

In [15]:
arr = np.loadtxt("arr_from_text.txt")
np.savetxt("arr_from_csv.csv", arr, delimiter=';')

我们已经在 arr_from_csv 的 csv 文件里写进去了 [[1,2,3], [4,5,6]]，每行的元素是由「分号 ;」来分隔的，展示如下：

用 np.genfromtxt( "文件名" ) 即可加载该文件

In [16]:
np.genfromtxt("arr_from_csv.csv")

array([nan, nan])

奇怪的是数组里面都是 nan，原因是没有设定好「分隔符 ;」，那么函数 genfromtxt 读取的两个元素是

1;2;3

4;5;6

它们当然不是数字拉，Numpy 只能用两个 nan (Not a Number) 来代表上面的四不像了。

带上「分隔符 ;」再用 np.genfromtxt( "文件名"，分隔符 ) 即可加载该文件

In [17]:
np.genfromtxt("arr_from_csv.csv", delimiter=";")

array([[1., 2., 3.],
       [4., 5., 6.]])

# 数组的获取

获取数组是通过索引 (indexing) 和切片 (slicing) 来完成的，
1. 切片slicing是获取一段特定位置的元素
2. 索引indexing是获取一个特定位置的元素
索引和切片的方式和列表一模一样。


对于一维数组 arr,
1. 切片slicing写法是 arr[start : stop : step]
2. 索引indexing写法是 arr[index]

因此，切片的操作是可以用索引操作来实现的 (一个一个总能凑成一段)，只是没必要罢了。
为了简化，我们在本章三节标题里把切片和索引都叫做索引。
索引数组有三种形式，正规索引 (normal indexing)、布尔索引 (boolean indexing) 和花式索引 (fancy indexing)。

**切片得到的是原数组的一个视图 (view) ，修改切片中的内容会改变原数组**

**索引得到的是原数组的一个复制 (copy)，修改索引中的内容不会改变原数组**

## 正规索引

In [23]:
arr = np.arange(10)
arr

print('\n indexing: copy')
arr[6]
a = arr[6]
a = 1000
arr
arr[6] = 87
arr

print('\n slicing: view')
arr[5:8]
b = arr[5:8]
b[1] = 12
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])


 indexing: copy


6

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

array([ 0,  1,  2,  3,  4,  5, 87,  7,  8,  9])


 slicing: view


array([ 5, 87,  7])

array([ 0,  1,  2,  3,  4,  5, 12,  7,  8,  9])

只有每个维度均只有一个元素时为索引indexing，其余均为切片slicing。

因为indexing返回的类型为单个元素，而slicing返回的元素为np.ndarray。

时刻记得：万物皆对象。

In [48]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d

print('\n indexing: copy')
# 用 arr2d[0][2] 来索引第一行第三列
a = arr2d[0][2]
a = 102
arr2d[0,2] #索引二维数组打了两个中括号好麻烦，索引五维数组不是要打了五个中括号？还有一个简易方法，用 arr2d[0, 2] 也可以索引第一行第三列


print('\n slicing: view')
# 用 arr2d[2] 来切片第三行，更严格的说法是索引「轴 0」上的第三个元素。
print('\n arr2d[2]')

a = arr2d[2]
a[1] = 15
arr2d

a = 27
arr2d

arr2d[2] = 15
arr2d
# 用 arr2d[:2] 切片前两行，更严格的说法是索引「轴 0」上的前两个元素。
print('\n arr2d[:3]')
a = arr2d[:2]
a[1,1] = 23
arr2d
# 用 arr2d[:, [0,2]] 切片第一列和第三列
print('\n arr2d[:,[0.2]]')
arr2d[:,[0,2]] 
# 用 arr2d[1, :2] 切片第二行的前两个元素
print('\n arr2d[1,:2]')
arr2d[1, :2]
# 用 arr2d[:2, 2] 切片第三列的前两个元素
print('\n arr2d[:2,2]')
arr2d[:2, 2]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])


 indexing: copy


3


 slicing: view

 arr2d[2]


array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7, 15,  9]])

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7, 15,  9]])

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [15, 15, 15]])


 arr2d[:3]


array([[ 1,  2,  3],
       [ 4, 23,  6],
       [15, 15, 15]])


 arr2d[:,[0.2]]


array([[ 1,  3],
       [ 4,  6],
       [15, 15]])


 arr2d[1,:2]


array([ 4, 23])


 arr2d[:2,2]


array([3, 6])

In [43]:
a = arr2d[2]
a
b = arr2d[0,2]
b
type(a)

array([15, 15, 15])

3

numpy.ndarray

## 布尔索引

In [5]:
code = np.array(['BABA', 'FB', 'JD', 'BABA', 'JD', 'FB'])
code
price = np.array([[170,177,169],[150,159,153],
                  [24,27,26],[165,170,167],
                  [22,23,20],[155,116,157]])
price

array(['BABA', 'FB', 'JD', 'BABA', 'JD', 'FB'], dtype='<U4')

array([[170, 177, 169],
       [150, 159, 153],
       [ 24,  27,  26],
       [165, 170, 167],
       [ 22,  23,  20],
       [155, 116, 157]])

In [6]:
code == 'BABA'

price[ code == 'BABA' ]

price[ code == 'BABA', 1: ]

price[ (code == 'FB')|(code == 'JD') ]

price
price[ price < 25 ] = 0
price

array([ True, False, False,  True, False, False])

array([[170, 177, 169],
       [165, 170, 167]])

array([[177, 169],
       [170, 167]])

array([[150, 159, 153],
       [ 24,  27,  26],
       [ 22,  23,  20],
       [155, 116, 157]])

array([[170, 177, 169],
       [150, 159, 153],
       [ 24,  27,  26],
       [165, 170, 167],
       [ 22,  23,  20],
       [155, 116, 157]])

array([[170, 177, 169],
       [150, 159, 153],
       [  0,  27,  26],
       [165, 170, 167],
       [  0,   0,   0],
       [155, 116, 157]])

## 花式索引

In [11]:
arr = np.arange(32).reshape(8,4)
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [14]:
#假设你想按特定顺序来获取第 5, 4 和 7 行时
arr[ [4,3,6] ]

# 假设你想按特定顺序来获取倒数第 4, 3 和 6 行时 (即正数第 4, 5 和 2 行)
arr[ [-4,-3,-6] ]


# 你还能更灵活的设定「行」和「列」中不同的索引，如下
arr_t1 = arr[ [1,5,7,2], [0,3,1,2] ]
arr_t1

# 检查一下，上行代码获取的元素，它们确实是 4, 23, 29 和 10。如果不用花式索引，就要写下面繁琐但等价的代码：
arr_t2 = np.array( [ arr[1,0], arr[5,3], 
            arr[7,1], arr[2,2] ] )
arr_t2
np.allclose(arr_t1, arr_t2)

# 我们可以把交换列，把原先的 [0,1,2,3] 的列换成 [0,3,1,2]
arr[:,[0,3,1,2]] 

array([[16, 17, 18, 19],
       [12, 13, 14, 15],
       [24, 25, 26, 27]])

array([[16, 17, 18, 19],
       [20, 21, 22, 23],
       [ 8,  9, 10, 11]])

array([ 4, 23, 29, 10])

array([ 4, 23, 29, 10])

True

array([[ 0,  3,  1,  2],
       [ 4,  7,  5,  6],
       [ 8, 11,  9, 10],
       [12, 15, 13, 14],
       [16, 19, 17, 18],
       [20, 23, 21, 22],
       [24, 27, 25, 26],
       [28, 31, 29, 30]])

# 数组的变形

## 重塑 (reshape) 和打平 (ravel, flatten)

重塑是从低维到高维,打平是从高维到低维

### reshape

In [1]:
import numpy as np

arr = np.arange(12)
print( arr )
arr.reshape((4,3))

[ 0  1  2  3  4  5  6  7  8  9 10 11]


array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

当重塑高维矩阵时，不想花时间算某一维度的元素个数时，可以用「-1」取代，程序会自动帮你计算出来。

In [5]:
arr.reshape((2,-1))
arr.reshape((-1,6))

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

### ravel &. flatten

用 ravel() 或flatten() 函数将二维数组 arr 打平成一维数组。

In [15]:
arr = np.arange(12).reshape((4,-1))
arr

arr.ravel()

arr.flatten()

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [18]:
arr = np.arange(12)

arr.reshape((4,-1), order = 'F') # 'C'：行主序C语言，'F'：列主序Fortran语言
arr.ravel()
arr.flatten()

arr = arr.reshape((4,-1), order = 'F')
arr.ravel()
arr.flatten()

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

1. ravel() 按「行主序」打平时没有复制原数组，按「列主序」在打平时复制了原数组
2. flatten() 在打平时复制了原数组

arr = np.arange(6).reshape((2,3))
arr

print('\n flatten 行主序：')
flatten_C = arr.flatten()
flatten_C
print(arr)
flatten_C[0] = 10000
flatten_C
print( arr )

print('\n flatten 列主序：')
flatten_F = arr.flatten(order = 'F')
flatten_F
print(arr)
flatten_F[0] = 10000
flatten_F
print( arr )

print('\n ravel 列主序：')
ravel_F = arr.ravel(order = 'F')
ravel_F
print(arr)
ravel_F[0] = 10000
ravel_F
print( arr )

print('\n ravel 行主序：')
ravel_C = arr.ravel()
ravel_C
print(arr)
ravel_C[0] = 10000
ravel_C
print( arr )

## 合并 (concatenate, stack) 和分裂 (split)

合并是多合一，分裂是一分多

### concatenate &. stack

使用「合并」函数有三种选择：

1. 有通用的 concatenate
2. 有专门的 vstack（竖直）, hstack（水平）, dstack（深度）
3. 有极简的 r_, c_

In [28]:
arr1 = np.arange(6).reshape((2,3))
arr1
arr2 = np.arange(7,13,1).reshape((2,3))
arr2

array([[0, 1, 2],
       [3, 4, 5]])

array([[ 7,  8,  9],
       [10, 11, 12]])

In [31]:
np.concatenate([arr1,arr2], axis = 0)

np.concatenate([arr1,arr2], axis = 1)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 7,  8,  9],
       [10, 11, 12]])

array([[ 0,  1,  2,  7,  8,  9],
       [ 3,  4,  5, 10, 11, 12]])

![jupyter](./python_numpy_stack.png)

In [41]:
np.vstack((arr1,arr2))
np.hstack((arr1,arr2))
np.dstack((arr1,arr2))


np.dstack((arr1,arr2)).shape

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 7,  8,  9],
       [10, 11, 12]])

array([[ 0,  1,  2,  7,  8,  9],
       [ 3,  4,  5, 10, 11, 12]])

array([[[ 0,  7],
        [ 1,  8],
        [ 2,  9]],

       [[ 3, 10],
        [ 4, 11],
        [ 5, 12]]])

(2, 3, 2)

In [53]:
arr1
arr2

np.r_[arr1, arr2] # 竖直合并，每个元素都是一row
np.c_[arr1, arr2] # 水平合并，每个元素都是一column

print( np.r_[-2:2:1, [0]*3, 5, 6] ) #参数可以是切片

np.r_['r', arr1, arr2] #第一个参数可以是控制参数，转换为matrix
np.r_['c', arr1, arr2] #第一个参数可以是控制参数，转换为matrix
np.c_['r', arr1, arr2] #第一个参数可以是控制参数，转换为matrix
np.c_['c', arr1, arr2] #第一个参数可以是控制参数，转换为matrix

np.r_['r', [1,2,3], [4,5,6]] #第一个参数可以是控制参数，转换为matrix
np.r_['c', [1,2,3], [4,5,6]]
np.c_['r', [1,2,3], [4,5,6]] #第一个参数可以是控制参数，转换为matrix
np.c_['c', [1,2,3], [4,5,6]]

np.r_[np.array([1,2,3]).reshape((1,-1)), np.array([4,5,6]).reshape((1,-1))]


array([[0, 1, 2],
       [3, 4, 5]])

array([[ 7,  8,  9],
       [10, 11, 12]])

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 7,  8,  9],
       [10, 11, 12]])

array([[ 0,  1,  2,  7,  8,  9],
       [ 3,  4,  5, 10, 11, 12]])

[-2 -1  0  1  0  0  0  5  6]


matrix([[ 0,  1,  2],
        [ 3,  4,  5],
        [ 7,  8,  9],
        [10, 11, 12]])

matrix([[ 0,  1,  2],
        [ 3,  4,  5],
        [ 7,  8,  9],
        [10, 11, 12]])

matrix([[ 0,  1,  2,  7,  8,  9],
        [ 3,  4,  5, 10, 11, 12]])

matrix([[ 0,  1,  2,  7,  8,  9],
        [ 3,  4,  5, 10, 11, 12]])

matrix([[1, 2, 3, 4, 5, 6]])

matrix([[1],
        [2],
        [3],
        [4],
        [5],
        [6]])

matrix([[1, 4],
        [2, 5],
        [3, 6]])

matrix([[1, 4],
        [2, 5],
        [3, 6]])

array([[1, 2, 3],
       [4, 5, 6]])

第一个参数可以是控制参数，如果它写成 ‘a,b,c’ 的形式，其中

a：代表轴，按「轴 a」来合并

b：合并后数组维度至少是 b

c：在第 c 维上做维度提升

In [54]:
print( np.r_['0,2,0', [1,2,3], [4,5,6]] )
print( np.r_['0,2,1', [1,2,3], [4,5,6]] )
print( np.r_['1,2,0', [1,2,3], [4,5,6]] )
print( np.r_['1,2,1', [1,2,3], [4,5,6]] )

[[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
[[1 2 3]
 [4 5 6]]
[[1 4]
 [2 5]
 [3 6]]
[[1 2 3 4 5 6]]


![jupyter](./numpy_rc.jpg)

### split

使用「分裂」函数有两种选择

1. 有通用的 split
2. 有专门的 hsplit, vsplit

In [56]:
arr = np.arange(25).reshape((5,5))
print( arr )

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]


In [59]:
first, second, third = np.split(arr,[1,3])
first
second
third

array([[0, 1, 2, 3, 4]])

array([[ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

array([[15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [60]:
first, second, third = np.split(arr,[1,3], axis=1)
first
second
third

array([[ 0],
       [ 5],
       [10],
       [15],
       [20]])

array([[ 1,  2],
       [ 6,  7],
       [11, 12],
       [16, 17],
       [21, 22]])

array([[ 3,  4],
       [ 8,  9],
       [13, 14],
       [18, 19],
       [23, 24]])

![jupyter](./numpy_split.jpg)

In [61]:
np.hsplit(arr, [1,3])

[array([[ 0],
        [ 5],
        [10],
        [15],
        [20]]), array([[ 1,  2],
        [ 6,  7],
        [11, 12],
        [16, 17],
        [21, 22]]), array([[ 3,  4],
        [ 8,  9],
        [13, 14],
        [18, 19],
        [23, 24]])]

## 重复 (repeat) 和拼接 (tile)

### repeat

repeat() 复制的是数组的每一个元素，参数有几种设定方法：
1. 一维数组：用标量和列表来复制元素的个数
2. 多维数组：用标量和列表来复制元素的个数，用轴来控制复制的行和列

In [65]:
import numpy as np

arr = np.arange(3)
arr
arr.repeat(4)
arr.repeat([2,3,4])

array([0, 1, 2])

array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [71]:
arr2d = np.arange(6).reshape((2,3))
arr2d
arr2d.repeat(2, axis = 0)
arr2d.repeat([2,3,4], axis=1)

array([[0, 1, 2],
       [3, 4, 5]])

array([[0, 1, 2],
       [0, 1, 2],
       [3, 4, 5],
       [3, 4, 5]])

array([[0, 0, 1, 1, 1, 2, 2, 2, 2],
       [3, 3, 4, 4, 4, 5, 5, 5, 5]])

### tile

函数 tile() 复制的是数组本身，参数有几种设定方法：
1. 标量：把数组当成一个元素，一列一列复制
2. 形状：把数组当成一个元素，按形状复制

In [76]:
arr2d = np.arange(6).reshape((2,3))
arr2d
np.tile(arr2d,2)
np.tile(arr2d, (2,3))
np.tile(arr2d,(2,1))

array([[0, 1, 2],
       [3, 4, 5]])

array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5]])

array([[0, 1, 2, 0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5, 3, 4, 5],
       [0, 1, 2, 0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5, 3, 4, 5]])

array([[0, 1, 2],
       [3, 4, 5],
       [0, 1, 2],
       [3, 4, 5]])

## 其他操作 (sort, insert, delete, copy)

### sort

#### 直接排序

In [79]:
arr = np.array([5,3,2,6,1,4])
print( 'Before sorting', arr )
arr.sort()
print( 'After sorting', arr )
arr[::-1]

Before sorting [5 3 2 6 1 4]
After sorting [1 2 3 4 5 6]


array([6, 5, 4, 3, 2, 1])

用来排序 numpy 用两种方式：
1. arr.sort()
2. np.sort( arr )

第一种 sort 会改变 arr，第二种 sort 在排序时创建了 arr 的一个复制品，不会改变 arr。

In [87]:
arr = np.random.randint( 40, size=(3,4) )
arr

arr[:, 0].sort() 
arr

np.sort(arr[:,1])
arr


arr.sort(axis=1)
arr

array([[39,  9, 25, 23],
       [ 5, 27, 33, 20],
       [26, 25, 13,  3]])

array([[ 5,  9, 25, 23],
       [26, 27, 33, 20],
       [39, 25, 13,  3]])

array([ 9, 25, 27])

array([[ 5,  9, 25, 23],
       [26, 27, 33, 20],
       [39, 25, 13,  3]])

array([[ 5,  9, 23, 25],
       [20, 26, 27, 33],
       [ 3, 13, 25, 39]])

#### 间接排序

In [90]:
score = np.array([100, 60, 99, 80, 91])
idx = score.argsort()
idx
score[idx]

arr = np.random.randint( 40, size=(3,4) )
arr
arr[:, arr[0].argsort()]

array([1, 3, 4, 2, 0], dtype=int64)

array([ 60,  80,  91,  99, 100])

array([[29,  2, 29, 31],
       [ 0, 39, 19, 22],
       [ 4,  9, 16, 12]])

array([[ 2, 29, 29, 31],
       [39,  0, 19, 22],
       [ 9,  4, 16, 12]])

## 插入、删除

1. 用insert()函数在某个特定位置之前插入元素
2. 用delete()函数删除某些特定元素

In [92]:
a = np.arange(6)
a
np.insert(a, 1, 100)
a
np.delete(a, [1,3])
a

array([0, 1, 2, 3, 4, 5])

array([  0, 100,   1,   2,   3,   4,   5])

array([0, 1, 2, 3, 4, 5])

array([0, 2, 4, 5])

array([0, 1, 2, 3, 4, 5])

## copy

In [94]:
a = np.arange(6)
a_copy = a.copy()
print( 'Before changing value, a is', a )
print( 'Before changing value, a_copy is', a_copy )
a_copy[-1] = 99
print( 'After changing value, a_copy is', a_copy )
print( 'After changing value, a is', a )

Before changing value, a is [0 1 2 3 4 5]
Before changing value, a_copy is [0 1 2 3 4 5]
After changing value, a_copy is [ 0  1  2  3  4 99]
After changing value, a is [0 1 2 3 4 5]


# 数组运算

1. 元素层面 (element-wise) 计算
2. 线性代数 (linear algebra) 计算
3. 元素整合 (element aggregation) 计算
4. 广播机制 (broadcasting) 计算

## element-wise

1. 二元运算 (binary operation)：加减乘除
2. 数学函数：倒数、平方、指数、对数
3. 比较运算 (comparison)

In [96]:
arr1 = np.array([[1., 2., 3.], [4., 5., 6.]])
arr1
arr2 = np.ones((2,3)) * 2
arr2

array([[1., 2., 3.],
       [4., 5., 6.]])

array([[2., 2., 2.],
       [2., 2., 2.]])

In [101]:
arr1 + arr2 + 1
arr1 - arr2
arr1 * arr2
arr1 / arr2

1 / arr1
arr1 ** 2
np.exp(arr1)
np.log(arr1)

arr1 > arr2
arr1 > 3
np.allclose(arr1, arr2)

array([[4., 5., 6.],
       [7., 8., 9.]])

array([[-1.,  0.,  1.],
       [ 2.,  3.,  4.]])

array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]])

array([[0.5, 1. , 1.5],
       [2. , 2.5, 3. ]])

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

array([[  2.71828183,   7.3890561 ,  20.08553692],
       [ 54.59815003, 148.4131591 , 403.42879349]])

array([[0.        , 0.69314718, 1.09861229],
       [1.38629436, 1.60943791, 1.79175947]])

array([[False, False,  True],
       [ True,  True,  True]])

array([[False, False, False],
       [ True,  True,  True]])

False

## linear algebra

如果你非要二维数组 arr2d 进项矩阵运算，那么可以通过调用以下函数来实现：
1. A = np.mat(arr2d)
2. A = np.asmatrix(arr2d)

下面我们分别对「数组」和「矩阵」从创建、转置、求逆和相乘四个方面看看它们的同异。

In [104]:
arr2d = np.array([[1,2],[3,1]])
arr2d
A = np.mat(arr2d)
A

arr2d.T
arr2d.transpose()
A.T

array([[1, 2],
       [3, 1]])

matrix([[1, 2],
        [3, 1]])

array([[1, 3],
       [2, 1]])

array([[1, 3],
       [2, 1]])

matrix([[1, 3],
        [2, 1]])

In [107]:
np.linalg.inv(arr2d)
A.I
A**-1

array([[-0.2,  0.4],
       [ 0.6, -0.2]])

matrix([[-0.2,  0.4],
        [ 0.6, -0.2]])

matrix([[-0.2,  0.4],
        [ 0.6, -0.2]])

In [113]:
arr2d
A

arr = np.array([1,2])
arr
b = np.asmatrix(arr).T

arr2d*arr
A*b

arr2d*arr2d
A*A


array([[1, 2],
       [3, 1]])

matrix([[1, 2],
        [3, 1]])

array([1, 2])

array([[1, 4],
       [3, 2]])

matrix([[5],
        [5]])

array([[1, 4],
       [9, 1]])

matrix([[7, 4],
        [6, 7]])

In [114]:
arr2d
arr

np.dot(arr2d, arr)
np.dot(arr2d, arr2d)

array([[1, 2],
       [3, 1]])

array([1, 2])

array([5, 5])

array([[7, 4],
       [6, 7]])

## element aggragation

在数组中，元素可以以不同方式整合 (aggregation)。拿求和 (sum) 函数来说，我们可以对数组
1. 所有的元素求和
2. 在某个轴 (axis) 上的元素求和

In [2]:
import numpy as np

arr = np.arange(1,7).reshape((2,3))
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [6]:
print( 'The total sum is', arr.sum() )
print( 'The sum across rows is', arr.sum(axis=0) )
print( 'The sum across columns is', arr.sum(axis=1) )

arr.sum(axis = 0)
arr.sum(axis=0, keepdims=True) # 保持维度

The total sum is 21
The sum across rows is [5 7 9]
The sum across columns is [ 6 15]


array([5, 7, 9])

array([[5, 7, 9]])

![jupyter](./numpy_axis_03.jpg)

![jupyter](./numpy_axis_sum_4dim.jpg)

In [9]:
arr = np.arange(1,25).reshape((2,2,2,3))
print(arr)


print( 'The total sum is', arr.sum() )
print( 'The sum on axis0 is', arr.sum(axis=0) )
print( 'The sum on axis1 is', arr.sum(axis=1) )
print( 'The sum on axis2 is', arr.sum(axis=2) )
print( 'The sum on axis3 is', arr.sum(axis=3) )

[[[[ 1  2  3]
   [ 4  5  6]]

  [[ 7  8  9]
   [10 11 12]]]


 [[[13 14 15]
   [16 17 18]]

  [[19 20 21]
   [22 23 24]]]]
The total sum is 300
The sum on axis0 is [[[14 16 18]
  [20 22 24]]

 [[26 28 30]
  [32 34 36]]]
The sum on axis1 is [[[ 8 10 12]
  [14 16 18]]

 [[32 34 36]
  [38 40 42]]]
The sum on axis2 is [[[ 5  7  9]
  [17 19 21]]

 [[29 31 33]
  [41 43 45]]]
The sum on axis3 is [[[ 6 15]
  [24 33]]

 [[42 51]
  [60 69]]]


除了sum 函数，整合函数还包括 min, max, mean, std 和 cumsum，分别是求最小值、最大值、均值、标准差和累加，这些函数对数组里的元素整合方式和sum 函数相同，就不多讲了。总结来说我们可以对数组
1. 所有的元素整合
2. 在某个轴 (axis) 上的元素整合

整合函数= {sum, min, max, mean, std, cumsum}

## broadcasting

当对两个形状不同的数组按元素操作时，可能会触发「广播机制」。具体做法，先适当复制元素使得这两个数组形状相同后再按元素操作，两个步骤：
1. 广播轴 (broadcast axis)：比对两个数组的维度，将形状小的数组的维度 (轴) 补齐
2. 复制元素：顺着补齐的轴，将形状小的数组里的元素复制，使得最终形状和另一个数组吻合

In [12]:
print('\n 例一：标量和一维数组')
arr = np.arange(5)
arr
arr + 2


print('\n 例二：一维数组和二维数组')
arr = np.arange(12).reshape((4,3))
arr
arr.mean(axis=0)
arr - arr.mean(axis=0)


 例一：标量和一维数组


array([0, 1, 2, 3, 4])

array([2, 3, 4, 5, 6])


 例二：一维数组和二维数组


array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

array([4.5, 5.5, 6.5])

array([[-4.5, -4.5, -4.5],
       [-1.5, -1.5, -1.5],
       [ 1.5,  1.5,  1.5],
       [ 4.5,  4.5,  4.5]])

当我们对两个数组操作时，如果它们的形状
1. 不相容 (incompatible)，广播机制不能进行
2. 相容 (compatible)，广播机制可以进行

因此，进行广播机制分两步：
1. 检查两个数组形状是否兼容，即从两个形状元组最后一个元素，来检查：它们是否相等、是否有一个等于1
2. 一旦它们形状兼容，确定两个数组的最终形状。

In [15]:
# 用个例子来应用以上广播机制规则
print('\n 例三：维度一样，形状不一样')

a = np.array([[1,2,3]])
b = np.array([[4],[5],[6]])
a
b
print( 'The shape of a is', a.shape )
print( 'The shape of b is', b.shape )

c = a + b
print( 'The shape of c is', c.shape )
print( 'a is', a )
print( 'b is', b )
print( 'c = a + b =', c )


 例三：维度一样，形状不一样


array([[1, 2, 3]])

array([[4],
       [5],
       [6]])

The shape of a is (1, 3)
The shape of b is (3, 1)
The shape of c is (3, 3)
a is [[1 2 3]]
b is [[4]
 [5]
 [6]]
c = a + b = [[5 6 7]
 [6 7 8]
 [7 8 9]]


1. 检查数组 a 和 b 形状是否兼容，从两个形状元组 (1, 3) 和 (3, 1)最后一个元素开始检查，发现它们都满足『有一个等于 1』的条件。
2. 因此它们形状兼容，两个数组的最终形状为 (max(1,3), max(3,1)) = (3, 3)

In [18]:
print('\n 例四：维度不一样')

a = np.arange(5)
b = np.array(2)
a
b
print( 'The dimension of a is', a.ndim, 'and the shape of a is', a.shape )
print( 'The dimension of b is', b.ndim, 'and the shape of b is', b.shape )

# 我们把缺失的维度用 1 补齐得到 (5,) 和 (1,)
c = a + b
print( 'The dimension of c is', c.ndim, 'and the shape of c is', c.shape, '\n' )
print( 'a is', a )
print( 'b is', b )
print( 'c = a + b =', c )


 例四：维度不一样


array([0, 1, 2, 3, 4])

array(2)

The dimension of a is 1 and the shape of a is (5,)
The dimension of b is 0 and the shape of b is ()
The dimension of c is 1 and the shape of c is (5,) 

a is [0 1 2 3 4]
b is 2
c = a + b = [2 3 4 5 6]


In [22]:
# 练习：

a = np.array( [[[1,2,3], [4,5,6]]] )
b1 = np.array( [[1,1,1], [2,2,2], [3,3,3]] )
b2 = np.arange(3).reshape((1,3))
b3 = np.arange(6).reshape((2,3))
b4 = np.arange(12).reshape((2,2,3))
b5 = np.arange(6).reshape((2,1,3))
a
b1
b2
b3
b4
b5

array([[[1, 2, 3],
        [4, 5, 6]]])

array([[1, 1, 1],
       [2, 2, 2],
       [3, 3, 3]])

array([[0, 1, 2]])

array([[0, 1, 2],
       [3, 4, 5]])

array([[[ 0,  1,  2],
        [ 3,  4,  5]],

       [[ 6,  7,  8],
        [ 9, 10, 11]]])

array([[[0, 1, 2]],

       [[3, 4, 5]]])

The dimension of a is 3 and the shape of a is (1, 2, 3)
The dimension of b1 is 2 and the shape of b1 is (3, 3) 



ValueError: operands could not be broadcast together with shapes (1,2,3) (3,3) 

In [26]:
print( 'The dimension of a is', a.ndim, 'and the shape of a is', a.shape )
print( 'The dimension of b1 is', b1.ndim, 'and the shape of b1 is', b1.shape, '\n')
c1 = a + b1
a
b1
c1
print( c1.shape )
# 形状从后往前看：元组最后一个都是 3，兼容；倒数第二个是 3 和 2，即不相等，也没有一个是 1，不兼容！

The dimension of a is 3 and the shape of a is (1, 2, 3)
The dimension of b1 is 2 and the shape of b1 is (3, 3) 



ValueError: operands could not be broadcast together with shapes (1,2,3) (3,3) 

In [27]:
print( 'The dimension of a is', a.ndim, 'and the shape of a is', a.shape )
print( 'The dimension of b2 is', b2.ndim, 'and the shape of b2 is', b2.shape, '\n' )
c2 = a + b2
a
b2
c2
print( c2.shape )

The dimension of a is 3 and the shape of a is (1, 2, 3)
The dimension of b2 is 2 and the shape of b2 is (1, 3) 



array([[[1, 2, 3],
        [4, 5, 6]]])

array([[0, 1, 2]])

array([[[1, 3, 5],
        [4, 6, 8]]])

[[[1 3 5]
  [4 6 8]]]
(1, 2, 3)


In [28]:
print( 'The dimension of a is', a.ndim, 'and the shape of a is', a.shape )
print( 'The dimension of b3 is', b3.ndim, 'and the shape of b3 is', b3.shape, '\n' )
c3 = a + b3
a
b3
c3
print( c3.shape )

The dimension of a is 3 and the shape of a is (1, 2, 3)
The dimension of b3 is 2 and the shape of b3 is (2, 3) 



array([[[1, 2, 3],
        [4, 5, 6]]])

array([[0, 1, 2],
       [3, 4, 5]])

array([[[ 1,  3,  5],
        [ 7,  9, 11]]])

(1, 2, 3)


In [29]:
print( 'The dimension of a is', a.ndim, 'and the shape of a is', a.shape )
print( 'The dimension of b4 is', b4.ndim, 'and the shape of b4 is', b4.shape, '\n' )
c4 = a + b4
a
b4
c4
print( c4.shape )

The dimension of a is 3 and the shape of a is (1, 2, 3)
The dimension of b4 is 3 and the shape of b4 is (2, 2, 3) 



array([[[1, 2, 3],
        [4, 5, 6]]])

array([[[ 0,  1,  2],
        [ 3,  4,  5]],

       [[ 6,  7,  8],
        [ 9, 10, 11]]])

array([[[ 1,  3,  5],
        [ 7,  9, 11]],

       [[ 7,  9, 11],
        [13, 15, 17]]])

(2, 2, 3)


In [30]:
print( 'The dimension of a is', a.ndim, 'and the shape of a is', a.shape )
print( 'The dimension of b5 is', b5.ndim, 'and the shape of b5 is', b5.shape )
c5 = a + b5
a
b5
c5
print( c5.shape )

The dimension of a is 3 and the shape of a is (1, 2, 3)
The dimension of b5 is 3 and the shape of b5 is (2, 1, 3)


array([[[1, 2, 3],
        [4, 5, 6]]])

array([[[0, 1, 2]],

       [[3, 4, 5]]])

array([[[ 1,  3,  5],
        [ 4,  6,  8]],

       [[ 4,  6,  8],
        [ 7,  9, 11]]])

(2, 2, 3)
