# numpy基础


- Numpy 是python语言的一个library
- Numpy主要支持矩阵操作和运算
- Numpy高效主要是因为，其core代码是由c语言实现的
- Pandas是基于Numpy的一个library
- 机器学习框架大部分的语法和Numpy比较像

## 目录
- 数组的构造（ndarray）
- 数组取值和赋值
- 数学运算
- broadcasting
- 逻辑运算
- 数组的高级操作
- 文件输入输出
- 小项目case：用Numpy写一个softmax

In [1]:
import numpy as np

## Arrays 数组

In [2]:
a = np.array([1,2,3])
print(a)
print(type([1,2,3]))
type(a)

[1 2 3]
<class 'list'>


numpy.ndarray

In [3]:
a[2]

3

In [4]:
a[2] = 5
a

array([1, 2, 5])

In [5]:
b = np.array([[1,2,3], [1,2,3]])
b

array([[1, 2, 3],
       [1, 2, 3]])

In [6]:
type(b)

numpy.ndarray

In [7]:
b.ndim

2

In [8]:
b.shape

(2, 3)

In [9]:
b.shape[1]#获取属性列或者特征列

3

内置的创建数组的函数

In [10]:

a = np.zeros((2,3))
a

array([[0., 0., 0.],
       [0., 0., 0.]])

In [11]:
b = np.ones((1,2))
b

array([[1., 1.]])

In [12]:
d = np.eye(3)
d

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [13]:
e = np.random.random((2, 3))
e

array([[0.34804413, 0.57167783, 0.12353118],
       [0.0269126 , 0.79242161, 0.69324411]])

In [14]:
f = np.empty((2,3,4))
f

array([[[5.58863166e-316, 7.50979782e-322, 0.00000000e+000,
         0.00000000e+000],
        [8.45593933e-307, 1.15998412e-028, 2.44171989e+232,
         8.00801729e+159],
        [9.45007090e-076, 1.14349800e-071, 1.31351131e-071,
         6.98348940e-077]],

       [[9.73487228e-072, 6.01391519e-154, 1.30358173e-076,
         3.79489742e-096],
        [7.49118156e-067, 1.03277308e-047, 8.18935154e+140,
         3.77780862e+180],
        [1.15998412e-028, 6.48224638e+170, 3.67145870e+228,
         1.22575208e+295]]])

In [15]:
f.shape

(2, 3, 4)

In [16]:
g = np.arange(15)
g

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [17]:
g.ndim

1

In [18]:
g.shape

(15,)

数组可以有不同的数据类型的数值

In [19]:
arr = np.array([1, 2,3])
print(arr.dtype)

int32


In [20]:
arr = np.array([1,2,3], dtype = np.float64)
arr.dtype

dtype('float64')

In [21]:
arr

array([1., 2., 3.])

In [22]:
#使用astype赋值数组并转化数据类型
int_arr = np.array([1,2,3,4,5])
print(int_arr, int_arr.dtype)

[1 2 3 4 5] int32


In [23]:
float_arr =  int_arr.astype(np.float64)
print(float_arr.dtype,float_arr)

float64 [1. 2. 3. 4. 5.]


In [24]:
float_arr = np.array([3.5,2.3,0.8,-2.2])
float_arr

array([ 3.5,  2.3,  0.8, -2.2])

In [25]:
int_arr = float_arr.astype(np.int64)
int_arr

array([ 3,  2,  0, -2], dtype=int64)

In [26]:
#astype把字符串转换成数组
str_arr = np.array(['1.24', '2.2', '5.8', 'asas'], dtype = np.string_)
str_arr

array([b'1.24', b'2.2', b'5.8', b'asas'], dtype='|S4')

In [27]:
float_arr = str_arr.astype(dtype = np.float)
float_arr

ValueError: could not convert string to float: b'asas'

In [28]:
#astype使用其他数组作为参数
int_arr = np.arange(10)
float_arr = np.array([2.3, 4.6, 9.8])
print(int_arr.dtype, float_arr.dtype)

int32 float64


In [29]:
int_arr.astype(dtype = float_arr.dtype)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

## Array indexing /数值取值和赋值

Numpy 提供了多种的取值方式

In [30]:
a = np.array([[1,2,3,4], [5,6,7,8],[9,10,11,12]])
print(a)
a.shape

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


(3, 4)

In [31]:
#以像list一样进行切片（多维数组可以从各个维度同时切片）
b = a[0:2,2:4].copy()#b = np.array(a.shape())
b

array([[3, 4],
       [7, 8]])

In [32]:
b[0,0] = 11111
print(b)
print(a)

[[11111     4]
 [    7     8]]
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [33]:
c = a[0:2,2:4]
c[0,0] = 11111
print(c)
print(a)

[[11111     4]
 [    7     8]]
[[    1     2 11111     4]
 [    5     6     7     8]
 [    9    10    11    12]]


In [34]:
row_rl = a[1, :]
row_rl

array([5, 6, 7, 8])

In [35]:
cor_rl = a[:, 1]
cor_rl

array([ 2,  6, 10])

In [36]:
#随意组合
a = np.array([[1,2], [3,4],[5,6]])
print(a)

[[1 2]
 [3 4]
 [5 6]]


In [37]:
print(a[[0,1,2],[0,1,0]])#a[0,0] a[1,1] a[2,0]

[1 4 5]


In [38]:
a = np.array([[1,2,3,],[4,5,6],[7,8,9],[10,11,12]])
a

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [39]:
b = np.array([0,2,0,1])
b

array([0, 2, 0, 1])

In [40]:
a[np.arange(4), b]#a[0,0] a[1,2] a[2,0] a[3,1]

array([ 1,  6,  7, 11])

In [41]:
a[np.arange(4), b] +=10
print(a)

[[11  2  3]
 [ 4  5 16]
 [17  8  9]
 [10 21 12]]


In [42]:
#比较fashion ，用条件判断
a = np.array([[1,2],[3,4], [5,6]])
a

array([[1, 2],
       [3, 4],
       [5, 6]])

In [43]:
bool_index = (a >2)
bool_index

array([[False, False],
       [ True,  True],
       [ True,  True]])

In [44]:
#基于上面的比较结果可以做进一步处理
print(a[bool_index].shape)
print(a[bool_index])

(4,)
[3 4 5 6]


In [45]:
print(a[a>2])

[3 4 5 6]


切片取值方式，如下图所示

![image.png](attachment:image.png)

![](http://old.sebug.net/paper/books/scipydoc/_images/numpy_intro_02.png)
![](http://old.sebug.net/paper/books/scipydoc/_images/numpy_intro_03.png)

### 数学运算

逐个元素的运算

In [46]:
x = np.array([[1,2], [3,4]], dtype= np.float64)
y = np.array([[5,6], [7,8]], dtype= np.float64)

In [47]:
print(x)
print(y)

[[1. 2.]
 [3. 4.]]
[[5. 6.]
 [7. 8.]]


In [48]:
x+y

array([[ 6.,  8.],
       [10., 12.]])

In [49]:
np.add(x,y)

array([[ 6.,  8.],
       [10., 12.]])

In [50]:
x-y

array([[-4., -4.],
       [-4., -4.]])

In [51]:
np.subtract(x,y)

array([[-4., -4.],
       [-4., -4.]])

In [52]:
x*y

array([[ 5., 12.],
       [21., 32.]])

In [53]:
np.multiply(x,y)

array([[ 5., 12.],
       [21., 32.]])

In [54]:
x/y

array([[0.2       , 0.33333333],
       [0.42857143, 0.5       ]])

In [55]:
np.divide(x,y)

array([[0.2       , 0.33333333],
       [0.42857143, 0.5       ]])

In [56]:
np.sqrt(x)

array([[1.        , 1.41421356],
       [1.73205081, 2.        ]])

In [57]:
v = np.array([9,10])
w = np.array([10,11])
print(v.shape)

(2,)


In [58]:
#求向量内积

In [59]:
v.dot(w)#v.T . w

200

矩阵乘法

In [60]:
x = np.array([[1,2], [3,4]])
y = np.array([[5,6], [7,8]])
print(x)
print()
print(y)

[[1 2]
 [3 4]]

[[5 6]
 [7 8]]


In [61]:
v

array([ 9, 10])

In [62]:
print((x.dot(v)).shape)

(2,)


In [63]:
x.dot(y)

array([[19, 22],
       [43, 50]])

In [64]:
np.dot(x,y)

array([[19, 22],
       [43, 50]])

In [65]:
#转置
x

array([[1, 2],
       [3, 4]])

In [66]:
x.T

array([[1, 3],
       [2, 4]])

In [67]:
v.shape

(2,)

In [68]:
print(v)
print(v.T)
v.T.shape

[ 9 10]
[ 9 10]


(2,)

In [69]:
#2维的
w = np.array([[1,2,3]])
print(w, w.shape)

[[1 2 3]] (1, 3)


In [70]:
print(w.T)
w.T.shape

[[1]
 [2]
 [3]]


(3, 1)

In [71]:
w.dot(w)#注意：两个矩阵（数组）多点乘运算维度必须匹配

ValueError: shapes (1,3) and (1,3) not aligned: 3 (dim 1) != 1 (dim 0)

In [72]:
w.dot(w.T)

array([[14]])

In [73]:
w.T.dot(w)

array([[1, 2, 3],
       [2, 4, 6],
       [3, 6, 9]])

In [74]:
#利用矩阵的转置做 dot product
arr = np.random.randn(6,3)
arr

array([[ 1.3740313 , -0.67215132, -1.82013668],
       [ 0.10895326, -1.16297891, -0.86488755],
       [ 0.9361081 ,  0.12629152, -0.70539237],
       [ 0.86139564,  1.60972119, -1.47348967],
       [ 0.8448607 , -0.15669775, -0.74574189],
       [ 1.96990022,  0.51429368, -0.27291067]])

In [75]:
print(arr.T.dot(arr))

[[ 8.11243014  1.33528149 -5.69239297]
 [ 1.33528149  4.70051137 -0.25523951]
 [-5.69239297 -0.25523951  7.36028938]]


In [76]:
np.dot(arr, arr)

ValueError: shapes (6,3) and (6,3) not aligned: 3 (dim 1) != 6 (dim 0)

In [77]:
#高维的tensor也可以做转置
arr = np.arange(16).reshape(2,2,4)#arr[x,y,z]
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [78]:
#X轴用0表示，Y轴用1表示， Z轴用2表示
print(arr.transpose((1,0,2)))#2*2*4

[[[ 0  1  2  3]
  [ 8  9 10 11]]

 [[ 4  5  6  7]
  [12 13 14 15]]]


In [79]:
print(arr.transpose((0,2,1)))#2*4*2

[[[ 0  4]
  [ 1  5]
  [ 2  6]
  [ 3  7]]

 [[ 8 12]
  [ 9 13]
  [10 14]
  [11 15]]]


In [80]:
print(arr.transpose((2,1,0)))#4*2*2

[[[ 0  8]
  [ 4 12]]

 [[ 1  9]
  [ 5 13]]

 [[ 2 10]
  [ 6 14]]

 [[ 3 11]
  [ 7 15]]]


In [81]:
print(arr.swapaxes(1,2))#1维和2维交换位置：2*4*2
print()
print(arr.transpose((0,2,1)))

[[[ 0  4]
  [ 1  5]
  [ 2  6]
  [ 3  7]]

 [[ 8 12]
  [ 9 13]
  [10 14]
  [11 15]]]

[[[ 0  4]
  [ 1  5]
  [ 2  6]
  [ 3  7]]

 [[ 8 12]
  [ 9 13]
  [10 14]
  [11 15]]]


In [82]:
x = np.arange(24).reshape(2,3,4)
y = np.arange(8).reshape(4,2)
print(x)
print()
print(y)

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]

[[0 1]
 [2 3]
 [4 5]
 [6 7]]


In [83]:
print(np.matmul(x,y).shape)#降维->升维

(2, 3, 2)


In [84]:
print(np.dot(x,y).shape)

(2, 3, 2)


In [85]:
x = np.arange(24).reshape(2,3,4)
y = np.arange(16).reshape(2,4,2)
print(x.dot(y).shape)#升维

(2, 3, 2, 2)


In [86]:
np.matmul(x,y).shape#降维

(2, 3, 2)

参考url：https://mathworld.wolfram.com/MatrixMultiplication.html

矩阵内部的运算

In [87]:
x = np.array([[1,2], [3,4],[5,6]])
x

array([[1, 2],
       [3, 4],
       [5, 6]])

In [88]:
print(np.sum(x))
print(x.sum())

21
21


In [89]:
print(np.sum(x, axis = 0))#把矩阵映射到axis轴上，可以理解为消掉axis轴。0 - x轴 ； 1- y轴

[ 9 12]


In [90]:
print(np.sum(x, axis = 1))

[ 3  7 11]


In [91]:
np.sum(x[:, 1])/x.shape[0]#逐个元素计算

4.0

In [92]:
print(np.mean(x))
print(np.mean(x, axis =0))
print(np.mean(x, axis =1))

3.5
[3. 4.]
[1.5 3.5 5.5]


 其他运算：cumulative sum, sumulative product 

In [93]:
x

array([[1, 2],
       [3, 4],
       [5, 6]])

In [94]:
print(x.cumsum(axis = 0))#按行累加
print(x.cumsum(axis = 1))#按列累加

[[ 1  2]
 [ 4  6]
 [ 9 12]]
[[ 1  3]
 [ 3  7]
 [ 5 11]]


In [95]:
print(x.cumprod(axis = 0))#按行累加
print(x.cumprod(axis = 1))#按列累加

[[ 1  2]
 [ 3  8]
 [15 48]]
[[ 1  2]
 [ 3 12]
 [ 5 30]]


In [96]:
# 一维数组的排序
arr = np.random.randn(8)*10
arr

array([ -8.24635252,  -7.01779586,  -0.91377139,   2.28879645,
         4.12082102, -11.25382417,  10.32114363,  25.77765223])

In [97]:
arr.sort()
arr

array([-11.25382417,  -8.24635252,  -7.01779586,  -0.91377139,
         2.28879645,   4.12082102,  10.32114363,  25.77765223])

In [98]:
#二维数组需要指定在某些维度上进行排序
arr = np.random.randn(5,3)*10
arr

array([[ -5.43368574,   5.09823806,  -1.73316106],
       [ 17.34117625,  -6.76701038,   2.39898862],
       [ -7.3119323 ,  -4.19981339, -12.8939226 ],
       [ 12.12975327, -12.16976942,   5.66736658],
       [  1.77988842,  17.67703864,  -8.09784901]])

In [101]:
arr.sort(1)#在第1维上做排序也就是在行上做了排序
arr

array([[-12.8939226 ,  -7.3119323 ,  -4.19981339],
       [-12.16976942,  -1.73316106,   5.09823806],
       [ -8.09784901,   1.77988842,  12.12975327],
       [ -6.76701038,   2.39898862,  17.34117625],
       [ -5.43368574,   5.66736658,  17.67703864]])

In [102]:
#小的应用：找出排序后位置在5%的数字
large_arr = np.random.randn(1000)
large_arr.sort()
print(large_arr[int(0.05*len(large_arr))])

-1.5937987537770693


## Brodcasting

In [105]:
x = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
v = np.array([1,0,1])
print(x)
print()
print(v)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]

[1 0 1]


In [106]:
y = np.embpty_like(x)
y

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [108]:
#for循环实现x和v的相加
for i in range(x.shape[0]):
    y[i,:]  = x[i, :] + v       #y[i,:] ->行向量   y[:, i]->列向量
print(y)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


In [110]:
#替换上述过程的高效处理
print(x.shape, v.shape)
x+v

(4, 3) (3,)


array([[ 2,  2,  4],
       [ 5,  5,  7],
       [ 8,  8, 10],
       [11, 11, 13]])

In [117]:
v = np.array([1,2,3])
w = np.array([4,5])
print(v.shape, w.shape)
v+w

(3,) (2,)


ValueError: operands could not be broadcast together with shapes (3,) (2,) 

In [114]:
v = v.reshape(3,1)
print(v.shape, v.ndim)
v

(3, 1) 2


array([[1],
       [2],
       [3]])

In [115]:
v + w

array([[5, 6],
       [6, 7],
       [7, 8]])

In [118]:
x = np.array([[1,2,3],[4,5,6]])#2*3
w = np.array([4,5])#2

In [119]:
(x.T + w).T

array([[ 5,  6,  7],
       [ 9, 10, 11]])

In [120]:
x + np.reshape(w, (2,1))

array([[ 5,  6,  7],
       [ 9, 10, 11]])

总结一下broadcasting，可以看看下面的图：<br>
![](http://www.astroml.org/_images/fig_broadcast_visual_1.png)

## 逻辑运算

In [121]:
x_arr = np.array([1.1,1.2,1.3,1.4,1.5])
y_arr = np.array([2.1,2.2,2.3,2.4,2.5])

In [122]:
cond = np.array([True, False, True, True, False])

In [123]:
print(np.where(cond, x_arr, y_arr))

[1.1 2.2 1.3 1.4 2.5]


In [124]:
arr = np.random.randn(4,4)
arr

array([[ 2.08248592,  1.71724065,  1.41781534,  1.16418441],
       [ 1.26042013, -0.13165595, -1.79392788, -0.39911374],
       [ 0.74865842,  0.8945284 , -0.10937078, -0.1528018 ],
       [-1.63120161,  1.15354742,  0.03791908, -0.56634068]])

In [125]:
arr > 0

array([[ True,  True,  True,  True],
       [ True, False, False, False],
       [ True,  True, False, False],
       [False,  True,  True, False]])

In [126]:
print(np.where(arr>0, 1, -1))

[[ 1  1  1  1]
 [ 1 -1 -1 -1]
 [ 1  1 -1 -1]
 [-1  1  1 -1]]


In [127]:
print(np.where(arr>0, 1, arr))

[[ 1.          1.          1.          1.        ]
 [ 1.         -0.13165595 -1.79392788 -0.39911374]
 [ 1.          1.         -0.10937078 -0.1528018 ]
 [-1.63120161  1.          1.         -0.56634068]]


### 部分高级的ndarray处理

In [128]:
#reshape改变tensor形状
arr = np.arange(8)
print(arr.shape)

(8,)


In [130]:
arr.reshape(-1,1)

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7]])

In [132]:
other_arr = np.ones((2,4))
arr.reshape(other_arr.shape)

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [136]:
#高维数组可以拉平用ravle
print(arr.ravel())
print(arr.shape)

[0 1 2 3 4 5 6 7]
(8,)


In [137]:
#连接两个二维数组
arr1 = np.array([[1,2,3], [4,5,6]])
arr2 = np.array([[7,8,9], [10,11,12]])
print(arr1, '\n', arr2)

[[1 2 3]
 [4 5 6]] 
 [[ 7  8  9]
 [10 11 12]]


In [139]:
print(np.concatenate([arr1, arr2], axis = 0).shape)
np.concatenate([arr1, arr2], axis = 0)

(4, 3)


array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [140]:
print(np.concatenate([arr1, arr2], axis = 1).shape)
np.concatenate([arr1, arr2], axis = 1)

(2, 6)


array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [141]:
#垂直和水平的stack
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [142]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

拆分数组

In [143]:
arr = np.random.rand(5,5)
arr

array([[0.50702132, 0.15888777, 0.93060981, 0.78623694, 0.07973019],
       [0.81741589, 0.93701971, 0.09738919, 0.98118426, 0.78377869],
       [0.48553044, 0.25520382, 0.38399155, 0.62553177, 0.88163441],
       [0.25852321, 0.0160842 , 0.21591975, 0.6265276 , 0.97559886],
       [0.44870712, 0.014099  , 0.97365993, 0.26794636, 0.22164263]])

In [145]:
first, second, third = np.split(arr, [1,3], axis = 0)#axis确定了方向，1：第一行   3：第三行
print(first, "\n\n", second,"\n\n",third  )

[[0.50702132 0.15888777 0.93060981 0.78623694 0.07973019]] 

 [[0.81741589 0.93701971 0.09738919 0.98118426 0.78377869]
 [0.48553044 0.25520382 0.38399155 0.62553177 0.88163441]] 

 [[0.25852321 0.0160842  0.21591975 0.6265276  0.97559886]
 [0.44870712 0.014099   0.97365993 0.26794636 0.22164263]]


In [146]:
first, second, third = np.split(arr, [1,3], axis = 1)#axis确定了方向，1：第一行   3：第三行
print(first, "\n\n", second,"\n\n",third  )

[[0.50702132]
 [0.81741589]
 [0.48553044]
 [0.25852321]
 [0.44870712]] 

 [[0.15888777 0.93060981]
 [0.93701971 0.09738919]
 [0.25520382 0.38399155]
 [0.0160842  0.21591975]
 [0.014099   0.97365993]] 

 [[0.78623694 0.07973019]
 [0.98118426 0.78377869]
 [0.62553177 0.88163441]
 [0.6265276  0.97559886]
 [0.26794636 0.22164263]]


In [148]:
#堆叠辅助
arr = np.arange(6)
arr1 = arr.reshape((3,2))
arr2 = np.random.randn(3,2)
print(arr1)
arr2

[[0 1]
 [2 3]
 [4 5]]


array([[-0.70465053, -2.70128648],
       [-1.12555538, -0.51268212],
       [-0.61581933, -0.99617491]])

In [151]:
#r_用于按行堆叠
print(np.r_[arr1, arr2])

[[ 0.          1.        ]
 [ 2.          3.        ]
 [ 4.          5.        ]
 [-0.70465053 -2.70128648]
 [-1.12555538 -0.51268212]
 [-0.61581933 -0.99617491]]


In [153]:
print(np.c_[np.r_[arr1, arr2], arr])

[[ 0.          1.          0.        ]
 [ 2.          3.          1.        ]
 [ 4.          5.          2.        ]
 [-0.70465053 -2.70128648  3.        ]
 [-1.12555538 -0.51268212  4.        ]
 [-0.61581933 -0.99617491  5.        ]]


In [154]:
np.c_[1:6, -5:0]

array([[ 1, -5],
       [ 2, -4],
       [ 3, -3],
       [ 4, -2],
       [ 5, -1]])

In [155]:
arr = np.arange(3)
arr

array([0, 1, 2])

In [156]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [159]:
arr.repeat([2,3,4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [160]:
#指定axis来重复
arr = np.random.rand(2,2)
arr

array([[0.08699867, 0.85071535],
       [0.91178645, 0.67003641]])

In [161]:
arr.repeat(2, axis = 0)

array([[0.08699867, 0.85071535],
       [0.08699867, 0.85071535],
       [0.91178645, 0.67003641],
       [0.91178645, 0.67003641]])

In [162]:
arr.repeat(2, axis = 1)

array([[0.08699867, 0.08699867, 0.85071535, 0.85071535],
       [0.91178645, 0.91178645, 0.67003641, 0.67003641]])

In [164]:
#贴瓷砖:tile
print(arr)
print()
print(np.tile(arr, 2))

[[0.08699867 0.85071535]
 [0.91178645 0.67003641]]

[[0.08699867 0.85071535 0.08699867 0.85071535]
 [0.91178645 0.67003641 0.91178645 0.67003641]]


In [166]:
print(np.tile(arr, (2,3)))

[[0.08699867 0.85071535 0.08699867 0.85071535 0.08699867 0.85071535]
 [0.91178645 0.67003641 0.91178645 0.67003641 0.91178645 0.67003641]
 [0.08699867 0.85071535 0.08699867 0.85071535 0.08699867 0.85071535]
 [0.91178645 0.67003641 0.91178645 0.67003641 0.91178645 0.67003641]]


### Numpy的文件输入和输出

In [167]:
#读取csv文件作为数组
arr = np.loadtxt('array_ex.txt', delimiter=',')
arr

array([[ 0.580052,  0.18673 ,  1.040717,  1.134411],
       [ 0.194163, -0.636917, -0.938659,  0.124094],
       [-0.12641 ,  0.268607, -0.695724,  0.047428],
       [-1.484413,  0.004176, -0.744203,  0.005487],
       [ 2.302869,  0.200131,  1.670238, -1.88109 ],
       [-0.19323 ,  1.047233,  0.482803,  0.960334]])

In [168]:
#数组文件的读写
arr = np.arange(50).reshape(2,5,5)
arr

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]],

       [[25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34],
        [35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49]]])

In [169]:
np.save('some_arrary', arr)

In [170]:
arr2 = np.load("some_arrary.npy")
arr2

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]],

       [[25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34],
        [35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49]]])

In [172]:
arr3 = np.arange(15).reshape(3,5)
np.savez("array_archive.npz", arr=arr, b = arr2, c = arr3)

In [175]:
arch = np.load("array_archive.npz")
arch['arr']

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]],

       [[25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34],
        [35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49]]])

In [177]:
arch['b']

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]],

       [[25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34],
        [35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49]]])

In [178]:
#多个数组可以一起压缩存储

##  随堂练习
用numpy写一个softmax（用于多分类
）

- 计算exponential
- 按行求和
- 每一行都要处理计算的和

In [2]:
import numpy as np
m = np.random.randn(10,10)*10 +1000
m

array([[1000.97799645,  997.95089837, 1017.2559933 , 1025.22721818,
         980.37504505,  999.49665629,  998.55083009, 1000.80243081,
        1010.11635525, 1000.96322177],
       [1005.57297946,  977.10284859, 1007.77222347,  991.12046297,
         986.84594884,  993.58507881,  992.42537237, 1009.25260663,
        1001.39962337, 1010.07707048],
       [1004.58562701, 1002.99988973, 1006.20790376, 1019.59997256,
        1011.01429799,  998.97773524, 1010.16960276, 1014.53577185,
        1014.00618075,  991.3792583 ],
       [1003.31974612,  983.45840574,  996.82792337,  993.58942725,
         987.48779735, 1004.34188783, 1009.2010255 ,  982.8863138 ,
        1011.59271184, 1017.0368725 ],
       [ 991.99700884, 1008.67062471,  981.96471762, 1016.95455282,
        1001.70320809, 1001.5463261 ,  987.16988486,  993.97190251,
         997.74011364,  998.17692442],
       [ 997.0473316 , 1019.92964481, 1013.12412855,  987.21658623,
         999.23372192,  990.38420199, 1007.4717346 ,  983

In [3]:
np.exp(m)#数值太大了，出现上溢，所以让每个数都减去所在行的最大值，对于e的操作结果是不变
m

array([[1000.97799645,  997.95089837, 1017.2559933 , 1025.22721818,
         980.37504505,  999.49665629,  998.55083009, 1000.80243081,
        1010.11635525, 1000.96322177],
       [1005.57297946,  977.10284859, 1007.77222347,  991.12046297,
         986.84594884,  993.58507881,  992.42537237, 1009.25260663,
        1001.39962337, 1010.07707048],
       [1004.58562701, 1002.99988973, 1006.20790376, 1019.59997256,
        1011.01429799,  998.97773524, 1010.16960276, 1014.53577185,
        1014.00618075,  991.3792583 ],
       [1003.31974612,  983.45840574,  996.82792337,  993.58942725,
         987.48779735, 1004.34188783, 1009.2010255 ,  982.8863138 ,
        1011.59271184, 1017.0368725 ],
       [ 991.99700884, 1008.67062471,  981.96471762, 1016.95455282,
        1001.70320809, 1001.5463261 ,  987.16988486,  993.97190251,
         997.74011364,  998.17692442],
       [ 997.0473316 , 1019.92964481, 1013.12412855,  987.21658623,
         999.23372192,  990.38420199, 1007.4717346 ,  983

In [182]:
m_row_max = m.max(axis = 1).reshape(10,1)
print(m_row_max, '\n\n', m_row_max.shape)

[[1018.16452319]
 [1014.5380414 ]
 [1012.71045755]
 [1006.77069663]
 [1014.82442754]
 [1028.37895817]
 [1011.51241228]
 [1017.62933128]
 [1015.2951497 ]
 [1008.93130151]] 

 (10, 1)


In [183]:
m = m - m_row_max
m

array([[-1.81964313e+01, -2.93482876e+01, -1.67503472e+01,
        -2.36795277e+01, -2.36959119e+01, -2.40200257e+01,
        -2.86024471e+01, -1.10862701e+01,  0.00000000e+00,
        -7.70409385e+00],
       [-8.64965495e+00, -1.43523334e+01, -8.60055051e+00,
        -1.02426098e+01, -3.68017646e+00, -5.33071989e+00,
        -1.77659818e+01,  0.00000000e+00, -1.42032964e+01,
        -7.48140267e+00],
       [-1.37595590e+01, -8.49181243e+00, -1.55712494e+01,
        -1.94521207e+01, -2.73462035e+01, -1.40928722e+01,
         0.00000000e+00, -5.31372537e-01, -3.36545388e+01,
        -9.76950949e+00],
       [ 0.00000000e+00, -1.13563641e+01, -1.85141437e+01,
        -4.80250773e+00, -1.79246437e+01, -1.20925444e+01,
        -1.46502579e+01, -2.37567081e+01, -3.26893044e+00,
        -5.34803279e+00],
       [ 0.00000000e+00, -1.04313731e+01, -2.60339243e+01,
        -1.83045974e+01, -2.19250159e+01, -1.91124242e+01,
        -2.77179704e+01, -1.08441155e+01, -2.57313663e+01,
        -1.

In [184]:
m_exp = np.exp(m)
print(m_exp, m_exp.shape)

[[1.25138313e-08 1.79556298e-13 5.31394014e-08 5.20129863e-11
  5.11677389e-11 3.70028679e-11 3.78542839e-13 1.53212459e-05
  1.00000000e+00 4.50977166e-04]
 [1.75187286e-04 5.84602672e-07 1.84004470e-04 3.56197673e-05
  2.52185244e-02 4.84058409e-03 1.92456299e-08 1.00000000e+00
  6.78557666e-07 5.63466504e-04]
 [1.05754644e-06 2.05141120e-04 1.72779546e-07 3.56493243e-09
  1.32951951e-12 7.57780384e-07 1.00000000e+00 5.87797642e-01
  2.42113776e-15 5.71683870e-05]
 [1.00000000e+00 1.16948256e-05 9.10771710e-09 8.20913489e-03
  1.64220044e-08 5.60111790e-06 4.33984112e-07 4.81496097e-11
  3.80470990e-02 4.75750083e-03]
 [1.00000000e+00 2.94925433e-05 4.93867368e-12 1.12308951e-08
  3.00667578e-10 5.00702345e-09 9.16723582e-13 1.95191305e-05
  6.68358670e-12 4.92652856e-08]
 [5.85579703e-10 1.08158109e-09 2.48060952e-15 7.44806235e-16
  1.00000000e+00 2.66359876e-13 4.79229648e-07 2.35976305e-11
  2.18836631e-11 3.64939965e-20]
 [3.16004635e-15 1.00000000e+00 2.53858795e-13 3.95990518e

In [185]:
m_sex_row_sum = m_exp.sum(axis=1).reshape(10,1)
print(m_sex_row_sum, m_sex_row_sum.shape)

[[1.00046636]
 [1.03101867]
 [1.58806194]
 [1.05103149]
 [1.00004908]
 [1.00000048]
 [1.99838388]
 [1.00013052]
 [1.05313054]
 [1.96608698]] (10, 1)


In [186]:
m_softmax = m_exp / m_sex_row_sum
print(m_softmax)

[[1.25079980e-08 1.79472599e-13 5.31146306e-08 5.19887406e-11
  5.11438872e-11 3.69856192e-11 3.78366383e-13 1.53141040e-05
  9.99533853e-01 4.50766944e-04]
 [1.69916696e-04 5.67014633e-07 1.78468611e-04 3.45481303e-05
  2.44598135e-02 4.69495290e-03 1.86666163e-08 9.69914542e-01
  6.58142948e-07 5.46514356e-04]
 [6.65935261e-07 1.29177027e-04 1.08798996e-07 2.24483211e-09
  8.37196254e-13 4.77173065e-07 6.29698359e-01 3.70135211e-01
  1.52458648e-15 3.59988395e-05]
 [9.51446279e-01 1.11269983e-05 8.66550354e-09 7.81055084e-03
  1.56246549e-08 5.32916278e-06 4.12912568e-07 4.58117670e-11
  3.61997708e-02 4.52650646e-03]
 [9.99950925e-01 2.94910960e-05 4.93843131e-12 1.12303439e-08
  3.00652823e-10 5.00677773e-09 9.16678593e-13 1.95181726e-05
  6.68325870e-12 4.92628679e-08]
 [5.85579421e-10 1.08158057e-09 2.48060833e-15 7.44805876e-16
  9.99999519e-01 2.66359748e-13 4.79229418e-07 2.35976191e-11
  2.18836526e-11 3.64939789e-20]
 [1.58130096e-15 5.00404357e-01 1.27032047e-13 1.98155380e

In [187]:
print(m_softmax.sum(axis = 1))#验证：每一行的结果和是1

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


更多的numpy细节和用法可以查看一下官网[numpy指南](http://docs.scipy.org/doc/numpy/reference/)