NumPy is the fundamental package for scientific computing with Python. It contains among other things:

- 功能强大的N维数组对象。
- 精密广播功能函数。
- 集成 C/C+和Fortran 代码的工具。
- 强大的线性代数、傅立叶变换和随机数功能

In [2]:
import numpy as np

### 数组的生成/创建

In [None]:

print(np.random.permutation(10))  # 如果传入一个整数，它会返回一个洗牌后的arange

a = [0,1,2,3,4,5]
print(np.random.permutation(a))
a = [0,1,2,3,4,5]
print(np.random.shuffle(a))
print(a)

In [None]:
np.random.randn(300).shape   # 生成300-dim的随机向量

In [None]:
np.zeros(300).shape # 生成300-dim的领向量

In [None]:
ars = np.arange(0,100000,1)
# ars

In [None]:
np.int8(12.334)

In [None]:
np.bool([1,2,0])

In [None]:
# 创建三维数组 arange的长度与ndarray的维度的乘积要相等，即 24 = 2X3X4
arr = np.arange(24).reshape(2,3,4)
arr

In [404]:
np.linspace(0,np.pi,3) #  start,end,number_element

array([0.        , 1.57079633, 3.14159265])

In [None]:
def f(x,y):   #  x=i, y=j
    return 10*x+y
df = np.fromfunction(f,(5,4),dtype=int)
df

In [372]:
cc = np.random.choice(10, 3)
cc

array([1, 2, 9])

In [380]:
import random 
random.choice(cc)

2

In [379]:
import random 
random.sample(list(cc), 2)

[2, 9]

##### 数据的保存和加载

In [None]:
d_data = {"121":"1212121"}
np.savez_compressed("./data/compressed_data.npz", d_data=d_data)

In [None]:
data = np.load("./data/compressed_data.npz", allow_pickle=True)
print(data["d_data"])

### 数组ops

In [403]:
# clip这个函数将将数组中的元素限制在a_min, a_max之间，大于a_max的就使得它等于 a_max，小于a_min,的就使得它等于a_min。
import numpy as np
x=np.array([1,2,3,5,6,7,8,9])
np.clip(x,3,8)

array([3, 3, 3, 5, 6, 7, 8, 8])

In [400]:
numpy.fabs([1,-1])

array([1., 1.])

In [None]:
# 删掉为1的维度
A = np.array([[[0], [1], [2]]])
A = np.squeeze(A)
A

##### ndarray的属性

In [None]:
import numpy as np
a = np.array([[3, 1, 2.0],[ 4, 6, 1]])

In [None]:
a.shape

In [None]:
a.size

In [None]:
a.itemsize # itemsize属性返回数组中各个元素所占用的字节数大小。

In [None]:
a.nbytes # size*itemsize

##### 转置 transpose

In [390]:
a = np.array([3, 1, 2])
b = np.array([4, 6, 2])
b.shape

(3,)

In [392]:
br = b.reshape(b.shape[0],1) #  一维数组的转置
br.shape

(3, 1)

In [None]:
b.reshape(b.shape[0],1) * a 

In [None]:
b = np.arange(24).reshape(4,6)
b

In [None]:
b.T

##### 复数

In [None]:
d = np.array([1.2+2j, 2+3j])
d

In [None]:
d.real

In [None]:
d.imag

##### flat属性，返回一个numpy.flatiter对象，即可迭代的对象。

In [None]:
e = np.arange(6).reshape(2,3)
e
f = e.flat
print(f[2])
for item in f:
    print(item)

#### 筛选/filter/map/reduce

In [None]:
a =  np.arange(7)
a[1:4] 

In [None]:
# 每间隔2个取一个数
a[ : 6: 2]

In [None]:
u = np.array([1,2,4,5,1,2,3])
np.unique(u)

In [None]:
np.ptp(u)#数组沿指定轴返回最大值减去最小值，即（max-min）

In [None]:
np.var(u) #返回方差（variance）

In [None]:
np.cumprod(u) #返回累乘积值

In [None]:
np.cumsum(u)  # 返回累加值

In [None]:
np.argmax(a,axis=1)  # axis=1 按行，找最大值

In [None]:
np.argmin(a) # 找整个矩阵的最小值

In [None]:
d = [1,2,3,4,5,10]
np.where(d==np.max(d)) # numpy array数组的最值及其索引

In [None]:
# 关联筛选
xs = np.array([0,1,2,2,5]).astype("float64")  # 创建ndarray数组时，可以指定数值类型
vs = np.array([-1,1,1,-1,1]).astype("float64")
ws = np.array([0,1,2,3,0]).astype("float64")

vd = np.diff(vs)
print(vd)
# np.where(vd==np.min(vd))[0]
xd = np.diff(xs)
print(xd)
# gap_xd = np.where(vd==-2)
# print(gap_xd)
xd = np.min(xd[np.where(vd==-2)[0]])
xd = xd[xd>0]
if len(xd)>0:
    min_gap = np.min(xd)
min_gap

In [None]:
a = np.array([])
b = np.array([1,2,3])
b[np.where(a>0)]

In [None]:
sum(ws)

In [None]:
xs += vs*(min_gap/2)
xs

In [None]:
ws[np.where(xs<0)]  #  条件筛选 下标/元素

In [None]:
di = np.diff(xs)  # 计算数组的相邻元素之间的差异
poses = np.where(di==np.min(di))[0]
# for p in poses:
#     print(p[0])
cvs = np.ones(5)
for p in poses:
    cvs[p] = -1
    cvs[p+1] = -1
cvs

In [None]:
# update vs
vs = vs * cvs
vs

In [None]:
np.argmax(a,axis=0)

##### where sql

In [47]:
# 找下标
y_train = np.array([1,1,2,3,4,5,6,7,8,9])
indexes = np.where(y_train==1)[0]
indexes

array([0, 1])

In [48]:
indexes

array([0, 1])

In [53]:
np.where(y_train>4,"gt4","lt4")  # replace

array(['lt4', 'lt4', 'lt4', 'lt4', 'lt4', 'gt4', 'gt4', 'gt4', 'gt4',
       'gt4'], dtype='<U3')

In [None]:
digit_indices = [np.where(y_train == i)[0] for i in range(10)]
digit_indices

In [None]:
# vec = np.zeros(300)
vec = np.random.randn(300)
if np.where(vec!=0)[0].shape[0]==0:
    print("---")

In [None]:
np.where(vec!=0)[0].shape[0]

### 处理数组形状

##### reshape vs resize
函数resize（）的作用跟reshape（）类似，但是会改变所作用的数组，相当于有inplace=True的效果

In [None]:
b = np.arange(12).reshape(3,4)
b

In [None]:
b.reshape(4,3)

In [None]:
b

##### ravel()和flatten()，将多维数组转换成一维数组
两者的区别在于返回拷贝（copy）还是返回视图（view）

flatten()返回一份拷贝，需要分配新的内存空间，对拷贝所做的修改不会影响原始矩阵，

ravel()返回的是视图（view），会影响原始矩阵。

In [None]:
b

In [None]:
bb = b.flatten()
bb[2] = 20
bb

In [None]:
b

In [None]:
bb = b.ravel()
bb[2] = 20
bb

In [None]:
b

In [None]:
# 用tuple指定数组的形状
b.shape=(6,2)
b

#### 复制

In [None]:
# d = np.array([[1,2,3],[4,5,6]])
c = a.view() # 视图(view)和浅复制
d = a.copy() #  深复制

### 两个数组的运算

#### 堆叠数组

##### 水平叠加

In [31]:
b = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
c = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
b.shape

(3, 4)

In [None]:
np.hstack((b,c))

In [None]:
np.column_stack((b,c))

##### 垂直叠加

In [None]:
np.vstack((b,c))

In [None]:
np.row_stack((b,c))

##### concatenate

In [None]:
np.concatenate((b,c),axis=1) # 行

In [None]:
np.concatenate((b,c),axis=0) # 列

##### 深度叠加

In [None]:
np.dstack((b,c)).shape  # 3,4

#### intersection

In [43]:
a = np.array([3, 1, 2])
b = np.array([4, 6, 2])
np.intersect1d(a,b)

array([2])

In [36]:
bb = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
cc = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,13]])
np.intersect1d(bb,cc)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

### allclose

In [46]:
# provide you with a great way of checking if two arrays are similar,
#  absolute(`a` - `b`) <= (`atol` + `rtol` * absolute(`b`))
np.allclose(a,b,0.8)

False

#### 数据拆分

水平”是horizontal

“竖直”是vertical 

In [None]:
b = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]])
b

In [None]:
np.hsplit(b, 2)  

In [None]:
np.split(b,2,axis=1)

In [None]:
b

In [None]:
np.vsplit(b, 2)

In [None]:
bb = np.dstack((b,b))
np.dsplit(bb,2)[0].shape

In [None]:
a = np.array([3, 1, 2])
b = np.array([4, 6, 2])
a * b  # 

In [None]:
np.dot(a,b) # 矩阵乘法可以使用dot函数或创建矩阵对象实现

In [None]:
sum(a*b)

##### 星号（*）乘法运算
对数组执行对应位置相乘

对矩阵执行矩阵乘法运算

In [None]:
A=np.array([[1, 2],
         [3, 4]])
B=np.array([[0, 1],
         [2, 3]])
A*B

In [None]:
(np.mat(A))*(np.mat(B))

In [None]:
a ** b

In [None]:
a+b

In [None]:
l = [0,1,3,3]
r = np.arange(4)
e = np.equal(l, r)

In [None]:
np.where(l==r) # 两个数组的比较

In [None]:
xs1 = np.array([-0.5,1.5,1.5,2.5,5.5])
xs2 = np.array([ 1.5,1.5,2.5,5.5,-0.5])

In [None]:
ises = np.equal(xs1,xs2).astype("int")
ises

In [None]:
##### ix_()函数

In [None]:
a = np.array([2,3,4,5]) # 
b = np.array([8,5,4])
c = np.array([5,4,6,8,3])
def ufunc_reduce(ufct, *vectors):
    vs = np.ix_(*vectors)
    r = ufct.identity
    for v in vs:
        r = ufct(r,v)
    return r
ufunc_reduce(np.add,a,b,c).shape

### broadcast
当数组跟一个标量进行数学运算时，标量需要根据数组的形状进行扩展(广播)，然后执行运算。

In [None]:
a = np.array([[1,3,4],[4,5,6]])
a*2

In [None]:
import numpy
import pylab
# Build a vector of 10000 normal deviates with variance 0.5^2 and mean 2
mu, sigma = 2, 0.5
v = numpy.random.normal(mu,sigma,10000)
# Plot a normalized histogram with 50 bins
pylab.hist(v, bins=50, normed=1)       # matplotlib version (plot)
pylab.show()
# Compute the histogram with numpy and then plot it
(n, bins) = numpy.histogram(v, bins=50, normed=True)  # NumPy version (no plot)
pylab.plot(.5*(bins[1:]+bins[:-1]), n)
pylab.show()

### 排序

In [393]:
import numpy as np  
dt = np.dtype([('name',  'S10'),('age',  int)]) 
a = np.array([("raju",21),("anil",25),("ravi",  17),  ("amar",27)], dtype = dt)  
print (np.sort(a, order =  'name'))

[(b'amar', 27) (b'anil', 25) (b'raju', 21) (b'ravi', 17)]


In [None]:
ass = []

##### numpy.argsort() 
函数返回的是数组值从小到大的索引值。

In [397]:
x = np.array([3,  1,  2]) 
np.argsort(x)[0:2]

array([1, 2])

In [399]:
# numpy.lexsort() 用于对多个序列进行排序。把它想象成对电子表格进行排序，每一列代表一个序列，排序时优先照顾靠后的列
nm =  ('raju','anil','ravi','amar') 
dv =  ('f.y.',  's.y.',  's.y.',  'f.y.') 
np.lexsort((dv,nm))  

array([3, 1, 0, 2])

In [3]:
# 求array中出现次数最多的元素
c = np.array([1,2,5,9,9,9,3])
d = np.argmax(np.bincount(c)) # 和列表list.count(a)统计a在列表中出现的次数很像，但又不同;返回的是0–序列最大值在这个array中出现的次数
d

9

In [6]:
np.newaxis?

[0;31mType:[0m        NoneType
[0;31mString form:[0m None
[0;31mDocstring:[0m   <no docstring>


### Sparse Matrix
高级数组之稀疏矩阵(https://www.cnblogs.com/chenzhijuan-324/p/10637028.html)

In [23]:
import numpy as np
import scipy.sparse as sp
A=np.array([[1,0,2,0],[0,0,0,0],[3,0,0,0],[1,0,0,4],[9,5,4,5]])
A.shape

(5, 4)

######  压缩稀疏行（CSR，Compressed Sparse Row）

In [15]:
AR=sp.csr_matrix(A)
AR

<5x4 sparse matrix of type '<class 'numpy.int64'>'
	with 9 stored elements in Compressed Sparse Row format>

In [16]:
AR.indptr  # 行偏移

array([0, 2, 2, 3, 5, 9], dtype=int32)

######  稀疏列矩阵CSC（Compressed Sparse Column）

In [17]:
AS=sp.csc_matrix(A)
AS

<5x4 sparse matrix of type '<class 'numpy.int64'>'
	with 9 stored elements in Compressed Sparse Column format>

In [18]:
AS.data

array([1, 3, 1, 9, 5, 2, 4, 4, 5], dtype=int64)

In [19]:
AS.indptr  # 偏移量  矩阵元素的个数的累加量   列数+1

array([0, 4, 5, 7, 9], dtype=int32)

In [20]:
AS.indices #索引  通过indptr来确定该元素属于哪一列(列索引)，通过列索引和行索引共同确认元素的位置

array([0, 2, 3, 4, 4, 0, 4, 3, 4], dtype=int32)

In [21]:
AS.nnz

9

In [22]:
AS.toarray()

array([[1, 0, 2, 0],
       [0, 0, 0, 0],
       [3, 0, 0, 0],
       [1, 0, 0, 4],
       [9, 5, 4, 5]], dtype=int64)

### Structured Arrays

In [29]:
import numpy as np
name = ['Alice', 'Bob', 'Cathy', 'Doug']
age = [25, 45, 37, 19]
weight = [55.0, 85.5, 68.0, 61.5]
# Use a compound data type for structured arrays
data = np.zeros(4, dtype={'names':('name', 'age', 'weight'),
                          'formats':('U10', 'i4', 'f8')})
data['name'] = name
data['age'] = age
data['weight'] = weight

In [30]:
data

array([('Alice', 25, 55. ), ('Bob', 45, 85.5), ('Cathy', 37, 68. ),
       ('Doug', 19, 61.5)],
      dtype=[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])