# Numpy
- Numpy是Python的一个library库
- Numpy主要支持矩阵操作和运算
- Numpy非常高效，core代码由C语言写成
- pandas也是基于Numpy构建的一个library
- 现在比较流行的机器学习库框架（Tensorflow/PyTorch等），语法都与Numpy比较接近

# 目录
- 数组简介和数组的构造（ndarray）
- 数组的取值和赋值
- 数学运算
- broadcasting广播
- 文件输入输出
- 线性代数运算
- 小项目：用Numpy写一个softmax

In [2]:
import numpy as np

In [2]:
np

<module 'numpy' from 'd:\\anaconda\\envs\\myenv\\lib\\site-packages\\numpy\\__init__.py'>

## ndarray 数组

np.array去从一个list初始化一个数组

In [3]:
a = np.array([1,2,3])

In [4]:
a

array([1, 2, 3])

In [5]:
type(a)

numpy.ndarray

In [6]:
a[1]

2

In [7]:
a[2]=5

In [8]:
a

array([1, 2, 5])

In [10]:
b = np.array([[1,2,3],[4,5,6]])
b

array([[1, 2, 3],
       [4, 5, 6]])

In [11]:
b.shape

(2, 3)

In [12]:
a.shape

(3,)

查看每个element的大小

In [13]:
b.itemsize #每一个元素占4个byte

4

In [14]:
b.dtype #元素是32位整型数据

dtype('int32')

In [15]:
b.size #元素数

6

In [16]:
a.size

3

内置创建数组函数

In [17]:
a = np.zeros((2,3))
a

array([[0., 0., 0.],
       [0., 0., 0.]])

In [18]:
a.dtype

dtype('float64')

In [24]:
b = np.ones((1,2))
b

array([[1., 1.]])

In [23]:
c = np.full((2,2),7)
c

array([[7, 7],
       [7, 7]])

In [25]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [27]:
np.random.random((2,4)) #[0,1]随机数

array([[0.75931231, 0.1284825 , 0.54649077, 0.7018235 ],
       [0.76176526, 0.20663917, 0.56164015, 0.97439269]])

In [28]:
np.random.rand(2,4) #[0,1]均匀分布样本值

array([[0.86668524, 0.46806235, 0.25200681, 0.26749928],
       [0.28515922, 0.4808885 , 0.00301024, 0.81069994]])

In [29]:
np.random.randn(2,4) #standard normal distribution标准正态分布

array([[ 0.78119371, -1.53380777, -0.17565157, -1.14084655],
       [-0.15985442,  0.47683354,  1.05812241,  0.84753375]])

In [30]:
np.empty((2,3,2)) #未初始化的

array([[[1.38814591e-311, 3.16202013e-322],
        [0.00000000e+000, 0.00000000e+000],
        [0.00000000e+000, 3.34881736e-061]],

       [[1.08778433e-071, 2.87933229e+180],
        [5.49983709e+170, 1.32391116e-071],
        [2.44298953e-052, 1.34890507e+161]]])

In [31]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

linspace也是一个常见的初始化数据的手段，产生一连串等距的数组

In [32]:
np.linspace(2.0, 3.0 ,5) #规定开始和结尾值 以及数量，产生等间距的数组

array([2.  , 2.25, 2.5 , 2.75, 3.  ])

## tensor的形状

numpy可以很容易的把一维数组转换成二维，三维

In [35]:
arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [34]:
arr.reshape(4,2) #未改变原arr

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [36]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [37]:
arr.shape = 2 ,4 

In [38]:
arr

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

在某一维度上写-1，会自动推导出正确维度

In [39]:
arr

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [40]:
arr.reshape(4,-1)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

从其它ndarray中获取shape信息然后reshape

In [41]:
other_arr = np.ones((2,2,2))

In [42]:
other_arr

array([[[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]]])

In [44]:
arr.reshape(other_arr.shape) #变成别的array的维度

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

高维数组用ravel来拉平

In [45]:
arr.ravel()

array([0, 1, 2, 3, 4, 5, 6, 7])

In [46]:
other_arr.ravel()

array([1., 1., 1., 1., 1., 1., 1., 1.])

In [47]:
arr.reshape(-1)

array([0, 1, 2, 3, 4, 5, 6, 7])

## 数组的数据类型  dtype

生成数组时可以指定数组类型，若不指定则自动匹配合适类型

In [48]:
arr = np.array([1,2,3])
arr.dtype

dtype('int32')

In [51]:
arr = np.array([4,5,6], dtype=np.int64)
arr.dtype

dtype('int64')

有时候需要ndarray是一个特定的数据类型，可以用astype复制数组并转换数据类型

In [52]:
int_arr = np.array([1,2,3,4,5])
int_arr.dtype

dtype('int32')

In [53]:
int_arr.astype(np.float64)

array([1., 2., 3., 4., 5.])

In [54]:
int_arr

array([1, 2, 3, 4, 5])

In [55]:
float_arr = np.array([3,6, 1.2, 7.0 ,-3.4])
float_arr

array([ 3. ,  6. ,  1.2,  7. , -3.4])

In [56]:
float_arr.astype(np.int) # astype浮点数转换整型，舍弃小数部分

array([ 3,  6,  1,  7, -3])

In [70]:
str_arr = np.array(['1.4','2.7','3.8'])
str_arr

array(['1.4', '2.7', '3.8'], dtype='<U3')

In [76]:
str_arr = np.array(['1.4','2.7','3.5'], dtype=np.string_)
str_arr

array([b'1.4', b'2.7', b'3.5'], dtype='|S3')

In [77]:
str_arr.dtype

dtype('S3')

In [73]:
str_arr.astype(dtype=np.float64) # astype把字符串转换为数组

array([1.4, 2.7, 3.5])

In [79]:
int_arr = np.arange(10)
int_arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [80]:
float_arr

array([ 3. ,  6. ,  1.2,  7. , -3.4])

In [83]:
int_arr.astype(float_arr.dtype)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

##  Array indexing 取值赋值 

像list一样切片（多维数组可以从各个维度同时切片）

In [86]:
a = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
a

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [87]:
a[1,3]

8

In [88]:
a[0,0]

1

In [89]:
c = [[1,2,3,4],[5,6,7,8],[9,10,11,12]]
c

[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]

In [90]:
c[1,3]

TypeError: list indices must be integers or slices, not tuple

In [91]:
c[1][3]

8

In [92]:
a[1:3,2:4] # 3,4位置不包括

array([[ 7,  8],
       [11, 12]])

一种不建议的切片赋值方法, 表明切片出来的部分即原内存里的值

In [94]:
b = a[1:3,2:4]
b

array([[ 7,  8],
       [11, 12]])

In [95]:
b[0,0] = 77
b

array([[77,  8],
       [11, 12]])

In [96]:
a

array([[ 1,  2,  3,  4],
       [ 5,  6, 77,  8],
       [ 9, 10, 11, 12]])

复制出一个新的数组，需要copy()这个方法

In [97]:
d = a.copy()
d

array([[ 1,  2,  3,  4],
       [ 5,  6, 77,  8],
       [ 9, 10, 11, 12]])

In [98]:
d[0,0]=88888
d

array([[88888,     2,     3,     4],
       [    5,     6,    77,     8],
       [    9,    10,    11,    12]])

In [100]:
a # 表明复制的矩阵里的值并不是原内存的值

array([[ 1,  2,  3,  4],
       [ 5,  6, 77,  8],
       [ 9, 10, 11, 12]])

再回到数组切片问题上，探讨高维数组

In [101]:
a = np.arange(12).reshape(3,4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [103]:
a[1,:] #拿出第一行

array([4, 5, 6, 7])

In [104]:
a[1,:].shape

(4,)

In [105]:
a[[1],:] #将第一维度作为一个list传进去，第一维度就不会被压扁

array([[4, 5, 6, 7]])

In [106]:
a[[1],:].shape

(1, 4)

In [107]:
a[1:2,:]

array([[4, 5, 6, 7]])

In [108]:
a[1:2,:].shape

(1, 4)

dots(...)

In [3]:
c = np.arange(360).reshape(2,3,3,4,5)
c

array([[[[[  0,   1,   2,   3,   4],
          [  5,   6,   7,   8,   9],
          [ 10,  11,  12,  13,  14],
          [ 15,  16,  17,  18,  19]],

         [[ 20,  21,  22,  23,  24],
          [ 25,  26,  27,  28,  29],
          [ 30,  31,  32,  33,  34],
          [ 35,  36,  37,  38,  39]],

         [[ 40,  41,  42,  43,  44],
          [ 45,  46,  47,  48,  49],
          [ 50,  51,  52,  53,  54],
          [ 55,  56,  57,  58,  59]]],


        [[[ 60,  61,  62,  63,  64],
          [ 65,  66,  67,  68,  69],
          [ 70,  71,  72,  73,  74],
          [ 75,  76,  77,  78,  79]],

         [[ 80,  81,  82,  83,  84],
          [ 85,  86,  87,  88,  89],
          [ 90,  91,  92,  93,  94],
          [ 95,  96,  97,  98,  99]],

         [[100, 101, 102, 103, 104],
          [105, 106, 107, 108, 109],
          [110, 111, 112, 113, 114],
          [115, 116, 117, 118, 119]]],


        [[[120, 121, 122, 123, 124],
          [125, 126, 127, 128, 129],
          [130, 131, 1

In [4]:
c[[1],...,3,:] 
#第一个维度取第二个值的lis，倒数第二个维度取第四个值，最后维度全取，中间维度省略

array([[[[195, 196, 197, 198, 199],
         [215, 216, 217, 218, 219],
         [235, 236, 237, 238, 239]],

        [[255, 256, 257, 258, 259],
         [275, 276, 277, 278, 279],
         [295, 296, 297, 298, 299]],

        [[315, 316, 317, 318, 319],
         [335, 336, 337, 338, 339],
         [355, 356, 357, 358, 359]]]])

In [5]:
c[[1],...,3,:].shape #所以第四维度变成一个值，而第一维度不被压扁

(1, 3, 3, 5)

更高级操作，更自由的取值赋值

In [116]:
a = np.arange(6).reshape(2,3)
a

array([[0, 1, 2],
       [3, 4, 5]])

In [117]:
a[[0,1,1],[0,1,2]] #相当于下面操作

array([0, 4, 5])

In [119]:
np.array([a[0,0],a[1,1],a[1,2]])

array([0, 4, 5])

In [120]:
a = np.arange(4*5*6).reshape(4,5,6)
a

array([[[  0,   1,   2,   3,   4,   5],
        [  6,   7,   8,   9,  10,  11],
        [ 12,  13,  14,  15,  16,  17],
        [ 18,  19,  20,  21,  22,  23],
        [ 24,  25,  26,  27,  28,  29]],

       [[ 30,  31,  32,  33,  34,  35],
        [ 36,  37,  38,  39,  40,  41],
        [ 42,  43,  44,  45,  46,  47],
        [ 48,  49,  50,  51,  52,  53],
        [ 54,  55,  56,  57,  58,  59]],

       [[ 60,  61,  62,  63,  64,  65],
        [ 66,  67,  68,  69,  70,  71],
        [ 72,  73,  74,  75,  76,  77],
        [ 78,  79,  80,  81,  82,  83],
        [ 84,  85,  86,  87,  88,  89]],

       [[ 90,  91,  92,  93,  94,  95],
        [ 96,  97,  98,  99, 100, 101],
        [102, 103, 104, 105, 106, 107],
        [108, 109, 110, 111, 112, 113],
        [114, 115, 116, 117, 118, 119]]])

In [121]:
a[np.arange(4),np.arange(4),[1,3,4,2]]

array([  1,  39,  76, 110])

In [122]:
a[[0,0],[1,1]]

array([[ 6,  7,  8,  9, 10, 11],
       [ 6,  7,  8,  9, 10, 11]])

看懂下面的操作

In [124]:
a = np.arange(12).reshape(4,3)
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [125]:
b = np.array([0,2,0,1])
b

array([0, 2, 0, 1])

In [126]:
a[np.arange(4),b]

array([ 0,  5,  6, 10])

In [127]:
a[np.arange(4),b] += 100

In [128]:
a

array([[100,   1,   2],
       [  3,   4, 105],
       [106,   7,   8],
       [  9, 110,  11]])

## Numpy的条件判断

比较fashion的取法之一，用条件判断去取，但很好用

In [129]:
a = np.arange(6).reshape(3,2)
a

array([[0, 1],
       [2, 3],
       [4, 5]])

In [130]:
bool_idx = a >2
bool_idx

array([[False, False],
       [False,  True],
       [ True,  True]])

用刚才的布尔型数组作为下标就可以取出符合条件的元素了

In [131]:
a[bool_idx]

array([3, 4, 5])

In [132]:
a[a>2] # 一句话也可以 

array([3, 4, 5])

还有很多切片的细节操作，可以看官方文档

## 简单数学运算

逐个元素的计算

In [135]:
x = np.array([[1,2],[3,4]],dtype=np.float64)
x

array([[1., 2.],
       [3., 4.]])

In [136]:
y = np.array([[5,6],[7,8]],dtype=np.float64)
y

array([[5., 6.],
       [7., 8.]])

In [137]:
x + y

array([[ 6.,  8.],
       [10., 12.]])

In [138]:
np.add(x,y)

array([[ 6.,  8.],
       [10., 12.]])

In [139]:
x - y

array([[-4., -4.],
       [-4., -4.]])

In [140]:
np.subtract(x,y)

array([[-4., -4.],
       [-4., -4.]])

In [141]:
x * y

array([[ 5., 12.],
       [21., 32.]])

In [142]:
np.multiply(x,y)

array([[ 5., 12.],
       [21., 32.]])

In [143]:
x / y

array([[0.2       , 0.33333333],
       [0.42857143, 0.5       ]])

In [144]:
np.divide(x,y)

array([[0.2       , 0.33333333],
       [0.42857143, 0.5       ]])

In [145]:
np.sqrt(x) #平方根

array([[1.        , 1.41421356],
       [1.73205081, 2.        ]])

In [146]:
x ** 2

array([[ 1.,  4.],
       [ 9., 16.]])

一个数组的运算函数

In [147]:
x.sum()

10.0

In [148]:
np.sum(x)

10.0

In [149]:
x

array([[1., 2.],
       [3., 4.]])

In [150]:
np.sum(x,axis=0) #沿行方向求和

array([4., 6.])

In [152]:
np.sum(x,axis=1) #沿列方向求和

array([3., 7.])

In [6]:
x = np.arange(15).reshape(3,5)
x

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [8]:
x.sum(1)

array([10, 35, 60])

In [155]:
x.sum(0)

array([15, 18, 21, 24, 27])

In [156]:
np.mean(x)

7.0

In [157]:
np.mean(x,0)

array([5., 6., 7., 8., 9.])

In [158]:
np.mean(x,1)

array([ 2.,  7., 12.])

In [160]:
x.cumsum(axis=0) #沿行方向求累积和

array([[ 0,  1,  2,  3,  4],
       [ 5,  7,  9, 11, 13],
       [15, 18, 21, 24, 27]], dtype=int32)

In [161]:
x.cumprod(axis=0) #沿列方向求累积乘

array([[  0,   1,   2,   3,   4],
       [  0,   6,  14,  24,  36],
       [  0,  66, 168, 312, 504]], dtype=int32)

keepdims这个parameter可以保留维度

In [162]:
x.mean(1)

array([ 2.,  7., 12.])

In [163]:
x.mean(1,keepdims=True)

array([[ 2.],
       [ 7.],
       [12.]])

数组的排序

In [167]:
arr = np.random.rand(8)
arr

array([0.14685956, 0.57553253, 0.3857002 , 0.62841193, 0.38444008,
       0.48469927, 0.12313679, 0.42851911])

In [168]:
arr.sort()

In [169]:
arr

array([0.12313679, 0.14685956, 0.38444008, 0.3857002 , 0.42851911,
       0.48469927, 0.57553253, 0.62841193])

二维数组在某维度上排序

In [170]:
arr = np.random.randn(5,3)
arr

array([[ 1.01768248,  2.11742553,  1.47344098],
       [ 1.397114  , -0.96924718, -0.8250129 ],
       [-0.2870235 , -0.29374241, -1.65245566],
       [-1.1304505 ,  0.01760517,  0.37350355],
       [ 0.8580163 , -0.29153599,  0.61745194]])

In [172]:
arr.sort(0) #沿行方向排序
arr

array([[-1.1304505 , -0.96924718, -1.65245566],
       [-0.2870235 , -0.29374241, -0.8250129 ],
       [ 0.8580163 , -0.29153599,  0.37350355],
       [ 1.01768248,  0.01760517,  0.61745194],
       [ 1.397114  ,  2.11742553,  1.47344098]])

In [173]:
arr.sort(1) #沿列方向排序
arr

array([[-1.65245566, -1.1304505 , -0.96924718],
       [-0.8250129 , -0.29374241, -0.2870235 ],
       [-0.29153599,  0.37350355,  0.8580163 ],
       [ 0.01760517,  0.61745194,  1.01768248],
       [ 1.397114  ,  1.47344098,  2.11742553]])

如何找出某个dimention上最大的index

In [178]:
arr = np.random.rand(5,3)
arr

array([[0.15375783, 0.92314492, 0.02765689],
       [0.20490729, 0.50965749, 0.92394432],
       [0.31128438, 0.71564327, 0.30380057],
       [0.59101766, 0.44116979, 0.25587543],
       [0.18881242, 0.26454143, 0.82213169]])

In [179]:
np.argmax(arr,1) #沿列方向取出最大值的坐标

array([1, 2, 1, 0, 2], dtype=int64)

找出最大的k个index

In [180]:
arr

array([[0.15375783, 0.92314492, 0.02765689],
       [0.20490729, 0.50965749, 0.92394432],
       [0.31128438, 0.71564327, 0.30380057],
       [0.59101766, 0.44116979, 0.25587543],
       [0.18881242, 0.26454143, 0.82213169]])

In [181]:
arr.argsort(1) #沿列方向index做一个排序

array([[2, 0, 1],
       [0, 1, 2],
       [2, 0, 1],
       [2, 1, 0],
       [0, 1, 2]], dtype=int64)

In [183]:
arr.argsort(1)[:, -2:] #行留下，列上找到最大的两个数字的index

array([[0, 1],
       [1, 2],
       [0, 1],
       [1, 0],
       [1, 2]], dtype=int64)

In [185]:
arr.argsort(1)[:, -2:][:, ::-1] #比起上面多了一个排序颠倒

array([[1, 0],
       [2, 1],
       [1, 0],
       [0, 1],
       [2, 1]], dtype=int64)

## broadcasting 传播
- 如果用一个小矩阵去和一个大矩阵操作，那么希望小矩阵能和大矩阵一块一块逐个操作

In [186]:
x = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
x

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [187]:
y = np.zeros_like(x)
y

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [189]:
v = np.array([1,0,1])
v

array([1, 0, 1])

我们需要x逐行加v最后输出到y

粗暴方式：for循环

In [192]:
for i in range(x.shape[0]):
    for j in range(x.shape[1]):
        y[i,j] = x[i,j] + v[j]

In [193]:
y

array([[ 2,  2,  4],
       [ 5,  5,  7],
       [ 8,  8, 10],
       [11, 11, 13]])

In [194]:
y = np.zeros_like(x)
y

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [195]:
for i in range(x.shape[0]):
    y[i] = x[i] + v

In [196]:
y

array([[ 2,  2,  4],
       [ 5,  5,  7],
       [ 8,  8, 10],
       [11, 11, 13]])

上述方法不高效，下面看看数据较大时运算时间

In [204]:
import time
start = time.time()
x = 200 * np.ones((5000,6000))
v = 300 * np.ones((6000))
y = np.zeros_like(x)
for i in range(x.shape[0]):
    for j in range(x.shape[1]):
        y[i,j] = x[i,j] + v[j]
print(y)
print("TIME:{}".format(time.time()-start))

[[500. 500. 500. ... 500. 500. 500.]
 [500. 500. 500. ... 500. 500. 500.]
 [500. 500. 500. ... 500. 500. 500.]
 ...
 [500. 500. 500. ... 500. 500. 500.]
 [500. 500. 500. ... 500. 500. 500.]
 [500. 500. 500. ... 500. 500. 500.]]
TIME:22.06438636779785


In [205]:
import time
start = time.time()
x = 200 * np.ones((5000,6000))
v = 300 * np.ones((6000))
y = np.zeros_like(x)
for i in range(x.shape[0]):
    y[i] = x[i] + v
print(y)
print("TIME:{}".format(time.time()-start))

[[500. 500. 500. ... 500. 500. 500.]
 [500. 500. 500. ... 500. 500. 500.]
 [500. 500. 500. ... 500. 500. 500.]
 ...
 [500. 500. 500. ... 500. 500. 500.]
 [500. 500. 500. ... 500. 500. 500.]
 [500. 500. 500. ... 500. 500. 500.]]
TIME:0.9967741966247559


下面用broadcasting运算

In [206]:
import time
start = time.time()
x = 200 * np.ones((5000,6000))
v = 300 * np.ones((6000))
y = np.zeros_like(x)
y=x+v
print(y)
print("TIME:{}".format(time.time()-start))

[[500. 500. 500. ... 500. 500. 500.]
 [500. 500. 500. ... 500. 500. 500.]
 [500. 500. 500. ... 500. 500. 500.]
 ...
 [500. 500. 500. ... 500. 500. 500.]
 [500. 500. 500. ... 500. 500. 500.]
 [500. 500. 500. ... 500. 500. 500.]]
TIME:0.7612249851226807


broadcasting是如何工作的
当操作两个array时，numpy会逐个比较它们的shape，在下述情况下，两个arrays会兼容并输出broadcasting结果
- 相等
- 其中一个为1，（进而可进行拷贝拓展已至，shape匹配）
- 当两个ndarray维度不完全相同时，rank较小的哪个ndarray会自动被在前面加上一个一维维度，直到与另外一个ndarray的rank相同时再检查是否匹配

例:A和B加结果为C，先在B前面补一维度1，再匹配
- A 8 x 1 x 6 x 1
- B ___7 x 1 x 5
- C 8 x 7 x 6 x 5

个人理解，维度补全，拉伸成相同ndarray最后相加

In [207]:
v = np.array([1,2,3])
x = np.array([4,5])

In [208]:
v.reshape(3,1) + x

array([[5, 6],
       [6, 7],
       [7, 8]])

In [209]:
v[:, None] + x

array([[5, 6],
       [6, 7],
       [7, 8]])

In [9]:
x = np.array([[1,2,3],[4,5,6]])
x

array([[1, 2, 3],
       [4, 5, 6]])

In [10]:
w = np.array([4,5])

In [11]:
x+w

ValueError: operands could not be broadcast together with shapes (2,3) (2,) 

In [12]:
(x.T+w).T

array([[ 5,  6,  7],
       [ 9, 10, 11]])

In [13]:
x + w[:, None]

array([[ 5,  6,  7],
       [ 9, 10, 11]])

In [14]:
x * 2

array([[ 2,  4,  6],
       [ 8, 10, 12]])

In [21]:
w.T

array([4, 5])

## 逻辑运算

where可以帮我们选择是取第一个ndarray的元素还是第二个的

In [25]:
x_arr = np.arange(5)
y_arr = np.arange(5,10)
print(x_arr)
print(y_arr)

[0 1 2 3 4]
[5 6 7 8 9]


In [24]:
cond = np.array([True, False, True, True, False])
np.where(cond, x_arr, y_arr)

array([0, 6, 2, 3, 9])

In [26]:
arr = np.random.randn(4,4)
arr

array([[-1.10346681,  0.77870452,  0.08964861, -0.00510951],
       [-1.21259575, -0.3548166 , -0.35604442, -1.97321147],
       [ 1.2331396 , -1.74761151, -0.6555187 ,  0.58260273],
       [-0.11983156, -0.0340349 , -0.71988099,  1.31681718]])

In [27]:
np.where(arr>0, 2, -2)

array([[-2,  2,  2, -2],
       [-2, -2, -2, -2],
       [ 2, -2, -2,  2],
       [-2, -2, -2,  2]])

In [34]:
cond1 = np.array([True, False, True, True, False])
cond2 = np.array([False, True, False, True, False])
cond1 & cond2 #&表示and |表示or

array([False, False, False,  True, False])

In [35]:
np.where(cond1 & cond2, 0 ,np.where(cond1, 1, np.where(cond2, 2, 3)))

array([1, 2, 1, 0, 3])

In [36]:
arr

array([[-1.10346681,  0.77870452,  0.08964861, -0.00510951],
       [-1.21259575, -0.3548166 , -0.35604442, -1.97321147],
       [ 1.2331396 , -1.74761151, -0.6555187 ,  0.58260273],
       [-0.11983156, -0.0340349 , -0.71988099,  1.31681718]])

In [37]:
(arr>0).sum() #默认Ture为1  False为0

5

In [38]:
cond

array([ True, False,  True,  True, False])

In [39]:
cond.any() #有没有一个是True

True

In [40]:
cond.all() #是不是全部为True

False

## 数组的拼接
- 百度numpy中的拼接（详细）

连接法concatenate

In [41]:
arr1 = np.arange(6).reshape(2,3)
arr2 = np.arange(6,12).reshape(2,3)
arr1

array([[0, 1, 2],
       [3, 4, 5]])

In [42]:
arr2

array([[ 6,  7,  8],
       [ 9, 10, 11]])

In [43]:
np.concatenate([arr1,arr2], axis=0) #在行上拼接，即行数变化

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [44]:
np.concatenate([arr1,arr2], axis=1) #在列上拼接，即列数变化

array([[ 0,  1,  2,  6,  7,  8],
       [ 3,  4,  5,  9, 10, 11]])

堆叠法stack

In [45]:
np.vstack([arr1,arr2]) #vertical stack按垂直方向堆叠

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [46]:
np.hstack([arr1,arr2]) #horizontal stack按水平方向堆叠

array([[ 0,  1,  2,  6,  7,  8],
       [ 3,  4,  5,  9, 10, 11]])

反向操作拆分一个数组，split方法

In [48]:
arr = np.random.rand(6,6)
arr

array([[0.76535589, 0.04800562, 0.43452466, 0.559973  , 0.06974988,
        0.87514558],
       [0.7767015 , 0.87198279, 0.6826433 , 0.69419767, 0.97502371,
        0.6765657 ],
       [0.26749094, 0.46399763, 0.01506181, 0.51474054, 0.07564475,
        0.19092801],
       [0.47201797, 0.70955131, 0.88159592, 0.59167071, 0.1514742 ,
        0.4072713 ],
       [0.53428597, 0.04674683, 0.96138137, 0.97024739, 0.22658554,
        0.46542114],
       [0.97287491, 0.54089063, 0.53213074, 0.01459712, 0.9763929 ,
        0.222065  ]])

In [49]:
first, second, third = np.split(arr,[1,3],axis=0) #在1和3 index上做拆分 在行上
print(first)
print()
print(second)
print()
print(third)

[[0.76535589 0.04800562 0.43452466 0.559973   0.06974988 0.87514558]]

[[0.7767015  0.87198279 0.6826433  0.69419767 0.97502371 0.6765657 ]
 [0.26749094 0.46399763 0.01506181 0.51474054 0.07564475 0.19092801]]

[[0.47201797 0.70955131 0.88159592 0.59167071 0.1514742  0.4072713 ]
 [0.53428597 0.04674683 0.96138137 0.97024739 0.22658554 0.46542114]
 [0.97287491 0.54089063 0.53213074 0.01459712 0.9763929  0.222065  ]]


In [50]:
arr

array([[0.76535589, 0.04800562, 0.43452466, 0.559973  , 0.06974988,
        0.87514558],
       [0.7767015 , 0.87198279, 0.6826433 , 0.69419767, 0.97502371,
        0.6765657 ],
       [0.26749094, 0.46399763, 0.01506181, 0.51474054, 0.07564475,
        0.19092801],
       [0.47201797, 0.70955131, 0.88159592, 0.59167071, 0.1514742 ,
        0.4072713 ],
       [0.53428597, 0.04674683, 0.96138137, 0.97024739, 0.22658554,
        0.46542114],
       [0.97287491, 0.54089063, 0.53213074, 0.01459712, 0.9763929 ,
        0.222065  ]])

In [51]:
first, second, third = np.split(arr,[1,3],axis=1) #在1和3 index上做拆分 在列上
print(first)
print()
print(second)
print()
print(third)

[[0.76535589]
 [0.7767015 ]
 [0.26749094]
 [0.47201797]
 [0.53428597]
 [0.97287491]]

[[0.04800562 0.43452466]
 [0.87198279 0.6826433 ]
 [0.46399763 0.01506181]
 [0.70955131 0.88159592]
 [0.04674683 0.96138137]
 [0.54089063 0.53213074]]

[[0.559973   0.06974988 0.87514558]
 [0.69419767 0.97502371 0.6765657 ]
 [0.51474054 0.07564475 0.19092801]
 [0.59167071 0.1514742  0.4072713 ]
 [0.97024739 0.22658554 0.46542114]
 [0.01459712 0.9763929  0.222065  ]]


In [52]:
blocks = np.split(arr, 3, axis=1) #在列上平均分3份
blocks

[array([[0.76535589, 0.04800562],
        [0.7767015 , 0.87198279],
        [0.26749094, 0.46399763],
        [0.47201797, 0.70955131],
        [0.53428597, 0.04674683],
        [0.97287491, 0.54089063]]),
 array([[0.43452466, 0.559973  ],
        [0.6826433 , 0.69419767],
        [0.01506181, 0.51474054],
        [0.88159592, 0.59167071],
        [0.96138137, 0.97024739],
        [0.53213074, 0.01459712]]),
 array([[0.06974988, 0.87514558],
        [0.97502371, 0.6765657 ],
        [0.07564475, 0.19092801],
        [0.1514742 , 0.4072713 ],
        [0.22658554, 0.46542114],
        [0.9763929 , 0.222065  ]])]

In [53]:
np.concatenate(blocks, axis=1)

array([[0.76535589, 0.04800562, 0.43452466, 0.559973  , 0.06974988,
        0.87514558],
       [0.7767015 , 0.87198279, 0.6826433 , 0.69419767, 0.97502371,
        0.6765657 ],
       [0.26749094, 0.46399763, 0.01506181, 0.51474054, 0.07564475,
        0.19092801],
       [0.47201797, 0.70955131, 0.88159592, 0.59167071, 0.1514742 ,
        0.4072713 ],
       [0.53428597, 0.04674683, 0.96138137, 0.97024739, 0.22658554,
        0.46542114],
       [0.97287491, 0.54089063, 0.53213074, 0.01459712, 0.9763929 ,
        0.222065  ]])

用repeat来重复ndarray中的元素，按元素重复

In [54]:
arr = np.arange(3)
arr

array([0, 1, 2])

In [55]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [56]:
arr.repeat([2,3,4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [58]:
arr = np.random.rand(2,2)
arr

array([[0.41316995, 0.51632592],
       [0.3397071 , 0.6134727 ]])

In [59]:
arr.repeat(2, axis=0)

array([[0.41316995, 0.51632592],
       [0.41316995, 0.51632592],
       [0.3397071 , 0.6134727 ],
       [0.3397071 , 0.6134727 ]])

In [60]:
arr.repeat(2, axis=1)

array([[0.41316995, 0.41316995, 0.51632592, 0.51632592],
       [0.3397071 , 0.3397071 , 0.6134727 , 0.6134727 ]])

Tile函数按整体重复

In [65]:
np.tile(arr, 3)

array([[0.41316995, 0.51632592, 0.41316995, 0.51632592, 0.41316995,
        0.51632592],
       [0.3397071 , 0.6134727 , 0.3397071 , 0.6134727 , 0.3397071 ,
        0.6134727 ]])

In [62]:
np.tile(arr, (2,3))

array([[0.41316995, 0.51632592, 0.41316995, 0.51632592, 0.41316995,
        0.51632592],
       [0.3397071 , 0.6134727 , 0.3397071 , 0.6134727 , 0.3397071 ,
        0.6134727 ],
       [0.41316995, 0.51632592, 0.41316995, 0.51632592, 0.41316995,
        0.51632592],
       [0.3397071 , 0.6134727 , 0.3397071 , 0.6134727 , 0.3397071 ,
        0.6134727 ]])

## numpy的文件输入和输出

In [4]:
arr = np.random.randn(6,4)
arr

array([[-0.01556003, -0.44477499,  0.09058559, -1.11226661],
       [ 0.1429796 ,  2.62464656,  1.69414247, -0.50330806],
       [ 0.40486065,  1.5169045 ,  1.58838425, -0.48910856],
       [ 1.458767  , -2.15770117,  0.50698204,  0.97149063],
       [-0.28222268,  0.09853853, -0.39342081, -0.20261822],
       [ 0.73099748,  1.34228062,  1.97594662, -1.50691372]])

In [4]:
np.save("some_array",arr)

In [6]:
np.load("some_array.npy")

array([[ 0.34184045, -2.24448633,  2.16313997, -0.38300608],
       [-0.21931653,  0.41009724,  0.75957645,  0.15118029],
       [-0.5985444 , -0.17403422, -0.52248099,  0.38464227],
       [-0.4671466 , -1.26710896, -0.35290132,  0.35180721],
       [-0.68708955,  0.5865243 ,  0.91319589, -0.27517805],
       [-1.18681677,  0.88260384,  1.33412138, -0.34808684]])

In [5]:
np.savetxt("array.csv",arr,delimiter=',')

In [6]:
arr2 = np.loadtxt("array.csv",delimiter=',')

In [7]:
arr2

array([[-0.01556003, -0.44477499,  0.09058559, -1.11226661],
       [ 0.1429796 ,  2.62464656,  1.69414247, -0.50330806],
       [ 0.40486065,  1.5169045 ,  1.58838425, -0.48910856],
       [ 1.458767  , -2.15770117,  0.50698204,  0.97149063],
       [-0.28222268,  0.09853853, -0.39342081, -0.20261822],
       [ 0.73099748,  1.34228062,  1.97594662, -1.50691372]])

多个数组一起压缩存储

In [8]:
arr1 = np.arange(15).reshape(3,5)

In [9]:
np.savez("array_archive.npz", a=arr1 ,b=arr2)

In [10]:
arch = np.load("array_archive.npz")
arch['a']

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [11]:
arch['b']

array([[-0.01556003, -0.44477499,  0.09058559, -1.11226661],
       [ 0.1429796 ,  2.62464656,  1.69414247, -0.50330806],
       [ 0.40486065,  1.5169045 ,  1.58838425, -0.48910856],
       [ 1.458767  , -2.15770117,  0.50698204,  0.97149063],
       [-0.28222268,  0.09853853, -0.39342081, -0.20261822],
       [ 0.73099748,  1.34228062,  1.97594662, -1.50691372]])

## numpy和scipy的相关数学运算
- 高级线性代数运算

In [19]:
x = np.array([[1,2],[3,4]],dtype=np.float64)
x

array([[1., 2.],
       [3., 4.]])

In [20]:
y = np.array([[5,6],[7,8]],dtype=np.float64)
y

array([[5., 6.],
       [7., 8.]])

In [21]:
v = np.array([9,10])
w = np.array([11,12])

### 内积dot

In [16]:
v.dot(w)

219

In [17]:
np.dot(v,w)

219

### 矩阵乘法dot

In [22]:
np.dot(x,y)

array([[19., 22.],
       [43., 50.]])

In [18]:
x.dot(v)

array([29., 67.])

In [19]:
np.dot(x,v)

array([29., 67.])

In [20]:
v.dot(x)

array([39., 58.])

In [21]:
np.dot(v,x)

array([39., 58.])

In [22]:
m = np.arange(12).reshape(3,4)
m

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [25]:
n = np.arange(4*5).reshape(4,5)
n

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [26]:
np.dot(m,n)

array([[ 70,  76,  82,  88,  94],
       [190, 212, 234, 256, 278],
       [310, 348, 386, 424, 462]])

In [27]:
np.dot(n,m)

ValueError: shapes (4,5) and (3,4) not aligned: 5 (dim 1) != 3 (dim 0)

In [28]:
m.dot(n)

array([[ 70,  76,  82,  88,  94],
       [190, 212, 234, 256, 278],
       [310, 348, 386, 424, 462]])

In [29]:
n.dot(m)

ValueError: shapes (4,5) and (3,4) not aligned: 5 (dim 1) != 3 (dim 0)

### 内积inner

In [30]:
x

array([[1., 2.],
       [3., 4.]])

In [31]:
y

array([[5., 6.],
       [7., 8.]])

In [32]:
np.inner(x,y)  #逐行做点积

array([[17., 23.],
       [39., 53.]])

In [45]:
X = np.arange(24).reshape(2,3,4) #高维数组上，即在最后一个维度上的乘积的和
Y = np.arange(36).reshape(3,3,4)
np.inner(X,Y).shape

(2, 3, 3, 3)

### 转置

In [34]:
x.T

array([[1., 3.],
       [2., 4.]])

In [35]:
v.T

array([ 9, 10])

### 高维tensor的转置

In [42]:
X.shape

(2, 3, 4)

In [40]:
X.transpose((1,0,2)).shape #相当于变维度的顺序,数字顺序像转置一样变化

(3, 2, 4)

In [41]:
X.swapaxes(1,2).shape #第2和第3维度反转

(2, 4, 3)

### matmul
- matrix multiply
- 非常常用，用于计算矩阵乘法

In [4]:
x = np.arange(24).reshape(2,3,4)
y = np.arange(8).reshape(4,2)
np.matmul(x,y).shape

(2, 3, 2)

In [6]:
x

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

In [7]:
y

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [5]:
np.matmul(x,y)

array([[[ 28,  34],
        [ 76,  98],
        [124, 162]],

       [[172, 226],
        [220, 290],
        [268, 354]]])

In [3]:
np.dot(x,y)

array([[[ 28,  34],
        [ 76,  98],
        [124, 162]],

       [[172, 226],
        [220, 290],
        [268, 354]]])

从下面这个例子可以看出dot和matmul的区别

In [8]:
z = np.arange(16).reshape(2,4,2)
z

array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5],
        [ 6,  7]],

       [[ 8,  9],
        [10, 11],
        [12, 13],
        [14, 15]]])

In [18]:
np.matmul(x,z).shape 
#视为两个（3，4）和（4，2）矩阵相乘得到（3，2）然后拼接在一起，多一个维度2

(2, 3, 2)

In [10]:
np.dot(x,z).shape
#类似于broadcasting，将x的最后一维和y的倒数第二维消去，

(2, 3, 2, 2)

In [14]:
w = np.arange(24).reshape(3,4,2)
x.shape

(2, 3, 4)

In [12]:
np.matmul(x,w).shape

ValueError: operands could not be broadcast together with remapped shapes [original->remapped]: (2,3,4)->(2,newaxis,newaxis) (3,4,2)->(3,newaxis,newaxis) and requested shape (3,2)

In [13]:
np.dot(x,w).shape

(2, 3, 3, 2)

In [15]:
v = np.arange(24).reshape(2,3,4)

In [16]:
np.matmul(x,v).shape

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 3 is different from 4)

In [17]:
np.dot(x,v)

ValueError: shapes (2,3,4) and (2,3,4) not aligned: 4 (dim 2) != 3 (dim 1)

##### 个人总结：上面实例说明，若是三维矩阵x与y，dot操作是只在最后两个维度上进行二维矩阵的运算，剩余维度拼接；而matmul操作是必须第一维度相同（例子中为2）视为有2个二维矩阵分别做矩阵乘法运算，然后拼接，所以比起dot少一个维度2

### outer product
- 向量张量积

In [23]:
a = np.linspace(-5,5,3)
a

array([-5.,  0.,  5.])

In [24]:
b = np.arange(2,5)
b

array([2, 3, 4])

In [25]:
np.outer(a ,b)

array([[-10., -15., -20.],
       [  0.,   0.,   0.],
       [ 10.,  15.,  20.]])

#### 计算行列式determinant
- 使用了linear algebra模块，缩写linalg

In [29]:
x = np.array([[1,5],[2,1]])
np.linalg.det(x)

-9.000000000000002

#### 计算逆矩阵inverse

In [30]:
x_inv = np.linalg.inv(x)
x_inv

array([[-0.11111111,  0.55555556],
       [ 0.22222222, -0.11111111]])

#### 计算伪逆矩阵pseudo-inverse

In [31]:
x = np.array([[1,2,3],[2,4,6],[1,3,5]]) #奇异矩阵
np.linalg.det(x)

0.0

In [32]:
np.linalg.inv(x)

LinAlgError: Singular matrix

In [34]:
np.linalg.pinv(x)

array([[ 0.43333333,  0.86666667, -1.33333333],
       [ 0.13333333,  0.26666667, -0.33333333],
       [-0.16666667, -0.33333333,  0.66666667]])

In [35]:
y = np.arange(12).reshape(3,4)
np.linalg.pinv(y)

array([[-0.3375    , -0.1       ,  0.1375    ],
       [-0.13333333, -0.03333333,  0.06666667],
       [ 0.07083333,  0.03333333, -0.00416667],
       [ 0.275     ,  0.1       , -0.075     ]])

#### 计算matrix的norm范数

In [36]:
x = np.arange(15).reshape(3,5)
np.linalg.norm(x, "fro")

31.85906464414798

#### 计算singular value decomposition(SVD)奇异值分解

In [37]:
x

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [41]:
U, s, V =np.linalg.svd(x)

In [42]:
U

array([[-0.15425367,  0.89974393,  0.40824829],
       [-0.50248417,  0.28432901, -0.81649658],
       [-0.85071468, -0.3310859 ,  0.40824829]])

In [43]:
s

array([3.17420265e+01, 2.72832424e+00, 8.10792259e-16])

In [44]:
np.dot(U, U.T) # 说明U V为正交矩阵

array([[ 1.00000000e+00,  3.55968130e-17,  8.81306529e-17],
       [ 3.55968130e-17,  1.00000000e+00, -8.56119755e-17],
       [ 8.81306529e-17, -8.56119755e-17,  1.00000000e+00]])

In [45]:
np.dot(V, V.T)

array([[ 1.00000000e+00, -1.07948583e-16,  1.03010797e-16,
        -4.31553747e-17,  3.14828113e-17],
       [-1.07948583e-16,  1.00000000e+00, -2.65729438e-17,
        -4.65085723e-18, -1.85790042e-17],
       [ 1.03010797e-16, -2.65729438e-17,  1.00000000e+00,
        -5.80855733e-17, -5.45277588e-17],
       [-4.31553747e-17, -4.65085723e-18, -5.80855733e-17,
         1.00000000e+00, -1.05262553e-16],
       [ 3.14828113e-17, -1.85790042e-17, -5.45277588e-17,
        -1.05262553e-16,  1.00000000e+00]])

### 随堂小项目
-用numpy写一个softmax

什么是softmax函数
一维softmax

$$ \frac{e^{f_i}}{\sum_je^{f_j}} $$

自己先试一下(答案正确)：

In [26]:
import numpy as np
a = np.random.randint(0,10,20)
a

array([4, 3, 2, 8, 0, 3, 3, 8, 9, 6, 9, 0, 0, 8, 7, 4, 7, 4, 7, 9])

In [27]:
b = np.exp(a)
b

array([5.45981500e+01, 2.00855369e+01, 7.38905610e+00, 2.98095799e+03,
       1.00000000e+00, 2.00855369e+01, 2.00855369e+01, 2.98095799e+03,
       8.10308393e+03, 4.03428793e+02, 8.10308393e+03, 1.00000000e+00,
       1.00000000e+00, 2.98095799e+03, 1.09663316e+03, 5.45981500e+01,
       1.09663316e+03, 5.45981500e+01, 1.09663316e+03, 8.10308393e+03])

In [28]:
c = np.sum(b)
c

37179.89412959738

In [29]:
d = b/c
d

array([1.46848589e-03, 5.40225770e-04, 1.98737954e-04, 8.01766131e-02,
       2.68962573e-05, 5.40225770e-04, 5.40225770e-04, 8.01766131e-02,
       2.17942630e-01, 1.08507246e-02, 2.17942630e-01, 2.68962573e-05,
       2.68962573e-05, 8.01766131e-02, 2.94953276e-02, 1.46848589e-03,
       2.94953276e-02, 1.46848589e-03, 2.94953276e-02, 2.17942630e-01])

In [30]:
def softmax_dim1(n):
    a = np.random.randint(0,10,n)
    b = np.exp(a)
    c = np.sum(b)
    d = b/c
    return d

In [31]:
softmax_dim1(20)

array([4.77375504e-05, 2.60638194e-03, 3.86821377e-01, 4.77375504e-05,
       5.23505807e-02, 4.77375504e-05, 5.23505807e-02, 1.29764116e-04,
       3.86821377e-01, 2.60638194e-03, 7.08488066e-03, 1.92587024e-02,
       5.23505807e-02, 1.29764116e-04, 1.92587024e-02, 7.08488066e-03,
       3.52735438e-04, 2.60638194e-03, 9.58834331e-04, 7.08488066e-03])

In [32]:
np.sum(softmax_dim1(20))

1.0

再来试一下更高维且更大的数据，对行做softmax

In [79]:
m = np.random.randint(1,11,(10,10)) * 10 +1000
m

array([[1010, 1100, 1030, 1010, 1090, 1090, 1090, 1080, 1070, 1020],
       [1020, 1040, 1050, 1040, 1030, 1040, 1010, 1080, 1100, 1020],
       [1010, 1020, 1020, 1050, 1070, 1050, 1010, 1010, 1010, 1090],
       [1040, 1040, 1080, 1030, 1060, 1010, 1030, 1060, 1080, 1100],
       [1070, 1090, 1030, 1060, 1020, 1080, 1090, 1050, 1100, 1070],
       [1080, 1100, 1060, 1090, 1100, 1090, 1080, 1070, 1010, 1030],
       [1100, 1040, 1080, 1060, 1100, 1010, 1050, 1060, 1070, 1030],
       [1010, 1090, 1050, 1070, 1010, 1080, 1080, 1020, 1040, 1100],
       [1080, 1100, 1010, 1030, 1090, 1090, 1050, 1080, 1020, 1010],
       [1030, 1020, 1040, 1090, 1080, 1040, 1050, 1090, 1050, 1090]])

In [80]:
np.exp(m)

  """Entry point for launching an IPython kernel.


array([[inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf]])

指数函数很容易数据溢出，所以用下面公式使得数据限制在一定范围内（负数据就不会溢出了）

$$ \frac{e^{f_i}}{\sum_je^{f_j}}=\frac{Ce^{f_i}}{C\sum_je^{f_j}}=\frac{e^{f_i+log C}}{\sum_je^{f_j+log C}}=\frac{e^{f_i-max_k f_k}}{\sum_je^{f_j-max_k f_k}}$$

自己先来试一下(结果正确，操作上有不同，可以简化)：

In [64]:
m1 = np.exp((m.T - m[list(range(10)),[np.argmax(m,axis=1)]]).T)
m1

array([[8.75651076e-27, 9.35762297e-14, 1.92874985e-22, 4.24835426e-18,
        1.80485139e-35, 1.00000000e+00, 8.75651076e-27, 4.24835426e-18,
        1.92874985e-22, 1.92874985e-22],
       [4.24835426e-18, 4.24835426e-18, 3.97544974e-31, 1.92874985e-22,
        4.53999298e-05, 4.24835426e-18, 2.06115362e-09, 9.35762297e-14,
        1.00000000e+00, 8.19401262e-40],
       [2.06115362e-09, 1.80485139e-35, 8.75651076e-27, 8.75651076e-27,
        2.06115362e-09, 1.00000000e+00, 1.00000000e+00, 1.92874985e-22,
        1.80485139e-35, 1.92874985e-22],
       [8.75651076e-27, 4.53999298e-05, 1.00000000e+00, 1.92874985e-22,
        1.92874985e-22, 1.92874985e-22, 2.06115362e-09, 8.19401262e-40,
        1.80485139e-35, 1.00000000e+00],
       [1.00000000e+00, 4.53999298e-05, 4.53999298e-05, 8.75651076e-27,
        8.75651076e-27, 1.80485139e-35, 8.75651076e-27, 4.53999298e-05,
        4.53999298e-05, 9.35762297e-14],
       [1.80485139e-35, 8.75651076e-27, 1.00000000e+00, 2.06115362e-09,
   

In [76]:
softmax_dim2=(m1.T/np.sum(m1,axis=1)).T
softmax_dim2

array([[8.75651076e-27, 9.35762297e-14, 1.92874985e-22, 4.24835426e-18,
        1.80485139e-35, 1.00000000e+00, 8.75651076e-27, 4.24835426e-18,
        1.92874985e-22, 1.92874985e-22],
       [4.24816138e-18, 4.24816138e-18, 3.97526925e-31, 1.92866228e-22,
        4.53978686e-05, 4.24816138e-18, 2.06106005e-09, 9.35719813e-14,
        9.99954600e-01, 8.19364062e-40],
       [1.03057681e-09, 9.02425692e-36, 4.37825537e-27, 4.37825537e-27,
        1.03057681e-09, 4.99999999e-01, 4.99999999e-01, 9.64374922e-23,
        9.02425692e-36, 9.64374922e-23],
       [4.37815599e-27, 2.26994496e-05, 4.99988650e-01, 9.64353032e-23,
        9.64353032e-23, 9.64353032e-23, 1.03055342e-09, 4.09691331e-40,
        9.02405208e-36, 4.99988650e-01],
       [9.99818433e-01, 4.53916866e-05, 4.53916866e-05, 8.75492087e-27,
        8.75492087e-27, 1.80452369e-35, 8.75492087e-27, 4.53916866e-05,
        4.53916866e-05, 9.35592394e-14],
       [1.80476944e-35, 8.75611318e-27, 9.99954596e-01, 2.06106004e-09,
   

In [77]:
np.sum(softmax_dim2,1)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

课程中的答案：

In [81]:
m_row_max = m.max(axis=1, keepdims=True)
m_row_max

array([[1100],
       [1100],
       [1090],
       [1100],
       [1100],
       [1100],
       [1100],
       [1100],
       [1100],
       [1090]])

In [82]:
m -= m_row_max
m

array([[-90,   0, -70, -90, -10, -10, -10, -20, -30, -80],
       [-80, -60, -50, -60, -70, -60, -90, -20,   0, -80],
       [-80, -70, -70, -40, -20, -40, -80, -80, -80,   0],
       [-60, -60, -20, -70, -40, -90, -70, -40, -20,   0],
       [-30, -10, -70, -40, -80, -20, -10, -50,   0, -30],
       [-20,   0, -40, -10,   0, -10, -20, -30, -90, -70],
       [  0, -60, -20, -40,   0, -90, -50, -40, -30, -70],
       [-90, -10, -50, -30, -90, -20, -20, -80, -60,   0],
       [-20,   0, -90, -70, -10, -10, -50, -20, -80, -90],
       [-60, -70, -50,   0, -10, -50, -40,   0, -40,   0]])

In [83]:
m_exp = np.exp(m)
m_exp

array([[8.19401262e-40, 1.00000000e+00, 3.97544974e-31, 8.19401262e-40,
        4.53999298e-05, 4.53999298e-05, 4.53999298e-05, 2.06115362e-09,
        9.35762297e-14, 1.80485139e-35],
       [1.80485139e-35, 8.75651076e-27, 1.92874985e-22, 8.75651076e-27,
        3.97544974e-31, 8.75651076e-27, 8.19401262e-40, 2.06115362e-09,
        1.00000000e+00, 1.80485139e-35],
       [1.80485139e-35, 3.97544974e-31, 3.97544974e-31, 4.24835426e-18,
        2.06115362e-09, 4.24835426e-18, 1.80485139e-35, 1.80485139e-35,
        1.80485139e-35, 1.00000000e+00],
       [8.75651076e-27, 8.75651076e-27, 2.06115362e-09, 3.97544974e-31,
        4.24835426e-18, 8.19401262e-40, 3.97544974e-31, 4.24835426e-18,
        2.06115362e-09, 1.00000000e+00],
       [9.35762297e-14, 4.53999298e-05, 3.97544974e-31, 4.24835426e-18,
        1.80485139e-35, 2.06115362e-09, 4.53999298e-05, 1.92874985e-22,
        1.00000000e+00, 9.35762297e-14],
       [2.06115362e-09, 1.00000000e+00, 4.24835426e-18, 4.53999298e-05,
   

In [84]:
m_exp_row_sum = m_exp.sum(axis=1, keepdims=True)
m_exp_row_sum

array([[1.0001362],
       [1.       ],
       [1.       ],
       [1.       ],
       [1.0000908],
       [2.0000908],
       [2.       ],
       [1.0000454],
       [1.0000908],
       [3.0000454]])

In [85]:
m_softmax = m_exp/m_exp_row_sum
m_softmax

array([[8.19289674e-40, 9.99863817e-01, 3.97490835e-31, 8.19289674e-40,
        4.53937471e-05, 4.53937471e-05, 4.53937471e-05, 2.06087293e-09,
        9.35634862e-14, 1.80460560e-35],
       [1.80485138e-35, 8.75651074e-27, 1.92874984e-22, 8.75651074e-27,
        3.97544973e-31, 8.75651074e-27, 8.19401261e-40, 2.06115362e-09,
        9.99999998e-01, 1.80485138e-35],
       [1.80485138e-35, 3.97544973e-31, 3.97544973e-31, 4.24835425e-18,
        2.06115362e-09, 4.24835425e-18, 1.80485138e-35, 1.80485138e-35,
        1.80485138e-35, 9.99999998e-01],
       [8.75651073e-27, 8.75651073e-27, 2.06115361e-09, 3.97544972e-31,
        4.24835424e-18, 8.19401259e-40, 3.97544972e-31, 4.24835424e-18,
        2.06115361e-09, 9.99999996e-01],
       [9.35677336e-14, 4.53958077e-05, 3.97508879e-31, 4.24796853e-18,
        1.80468752e-35, 2.06096648e-09, 4.53958077e-05, 1.92857473e-22,
        9.99909206e-01, 9.35677336e-14],
       [1.03053002e-09, 4.99977300e-01, 2.12408069e-18, 2.26989343e-05,
   

In [86]:
m_softmax.sum(axis=1)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])