# Numpy and Pandas

## array的属性

In [1]:
import numpy as np
array = np.array([[1, 2, 3], 
                  [2, 3, 4]])
print(array)
# 数组的维数
print('number of dim;', array.ndim)
# 数据的行列数
print('shape:', array.shape)
# 数组的大小，指的是包含元素的个数
print('size:', array.size)

[[1 2 3]
 [2 3 4]]
number of dim; 2
shape: (2, 3)
size: 6


## 创建array

In [2]:
a = np.array([1, 2, 3])
print(a)

# 创建数组时可以定义数据类型
a = np.array([1, 2, 3], dtype = int)
print(a)
print(a.dtype)

a = np.array([1, 2, 3], dtype = float)
print(a)
print(a.dtype)

# 创建各种矩阵
## 全部为0
a = np.zeros( (3, 4) )
print(a)
## 全部为1
a = np.ones( (3, 4) )
print(a)
## 生成序列，arange:10开始，20结束，步长为2
a = np.arange(10, 20, 2)
print(a)
## 根据序列生成矩阵
a = np.arange(12).reshape( (3,4) )
print(a)
## 生成序列，linspace:0开始，10结束，取5个数据点 
a = np.linspace(0, 10, 5)
print(a)

[1 2 3]
[1 2 3]
int32
[ 1.  2.  3.]
float64
[[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]
[[ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]]
[10 12 14 16 18]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[  0.    2.5   5.    7.5  10. ]


## array的运算

In [14]:
a = np.array([10, 20, 30, 40])
b = np.arange(4)
print('a:', a)
print("b:", b)
# 减法：逐个元素相减
print("a - b:", a - b)
# 加法：逐个元素相加
print("a + b:", a + b)
# 乘法：逐个元素相乘
print("a * b:", a * b)
# 除法：逐个元素相除
print("a / b:", a / b)
# n次方：双星**
print("b的平方:", b ** 2)
# 调用numpy中的函数
print("sin(b):", np.sin(b))
print("cos(b):", np.cos(b))
# 将元素与某值比较
print("哪些元素大于3：", b < 3)
print("哪些元素等于3：", b == 3)
# 元素相乘与矩阵点乘
a = np.array([[1, 1], [0, 1]])
b = np.arange(4).reshape((2, 2))
print(a)
print(b)
c = a * b
print("a * b:", c)
c_dot = np.dot(a, b)
print("矩阵乘法：", c_dot)
# 随机数矩阵
a = np.random.random((2, 4))
print(a)
# 统计函数
print(np.sum(a))
print(np.min(a))
print(np.max(a))

a: [10 20 30 40]
b: [0 1 2 3]
a - b: [10 19 28 37]
a + b: [10 21 32 43]
a * b: [  0  20  60 120]
a / b: [         inf  20.          15.          13.33333333]
b的平方: [0 1 4 9]
sin(b): [ 0.          0.84147098  0.90929743  0.14112001]
cos(b): [ 1.          0.54030231 -0.41614684 -0.9899925 ]
哪些元素大于3： [ True  True  True False]
哪些元素等于3： [False False False  True]
[[1 1]
 [0 1]]
[[0 1]
 [2 3]]
a * b: [[0 1]
 [0 3]]
矩阵乘法： [[2 4]
 [2 3]]
[[ 0.48828859  0.48115898  0.93287271  0.87184191]
 [ 0.45004799  0.04470621  0.89000832  0.34918644]]
4.50811115079
0.044706208959
0.93287271136




In [18]:
# 在不同维度统计
## axis=1代表列，及时要把列干掉
print(np.sum(a, axis = 1))
## axis=1代表行，及时要把行干掉
print(np.sum(a, axis = 0))

print(np.min(a, axis = 1))
print(np.min(a, axis = 0))

print(np.max(a, axis = 1))
print(np.max(a, axis = 0))

[ 2.77416218  1.73394897]
[ 0.93833658  0.52586519  1.82288104  1.22102835]
[ 0.48115898  0.04470621]
[ 0.45004799  0.04470621  0.89000832  0.34918644]


## array的运算【2】

In [38]:
A = np.arange(2, 14).reshape((3, 4))
print(A)
# 满足条件的值的索引
print(np.argmin(A))
print(np.argmax(A))
# 统计函数的两种形式
print(np.mean(A))
print(A.mean())
print(np.sum(A))
print(A.sum())
# 部分统计函数不支持第二种形式
print(np.median(A))
print(np.average(A))

# 帕累托求和
print(np.cumsum(A))
print(np.cumsum(A, axis = 0))
print(np.cumsum(A, axis = 1))

# 累差
print(np.diff(A))
print(np.diff(A, axis = 0))
print(np.diff(A, axis = 1))

# 非零元素
print(np.nonzero(A))
## 返回两个数组，分别表示非零元素所在的行和列

# 排序
A = np.arange(14, 2, -1).reshape((3, 4))
print(np.sort(A))

# 转置和乘法
print(np.transpose(A))
print(A.T)
print(A.T.dot(A))

# clip
print(np.clip(A, 5, 9))
## A矩阵中，所有小于5的数都改写成5，大于9的数都改写成9，在两者之间的保留；

[[ 2  3  4  5]
 [ 6  7  8  9]
 [10 11 12 13]]
0
11
7.5
7.5
90
90
7.5
7.5
[ 2  5  9 14 20 27 35 44 54 65 77 90]
[[ 2  3  4  5]
 [ 8 10 12 14]
 [18 21 24 27]]
[[ 2  5  9 14]
 [ 6 13 21 30]
 [10 21 33 46]]
[[1 1 1]
 [1 1 1]
 [1 1 1]]
[[4 4 4 4]
 [4 4 4 4]]
[[1 1 1]
 [1 1 1]
 [1 1 1]]
(array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2], dtype=int64), array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], dtype=int64))
[[11 12 13 14]
 [ 7  8  9 10]
 [ 3  4  5  6]]
[[14 10  6]
 [13  9  5]
 [12  8  4]
 [11  7  3]]
[[14 10  6]
 [13  9  5]
 [12  8  4]
 [11  7  3]]
[[332 302 272 242]
 [302 275 248 221]
 [272 248 224 200]
 [242 221 200 179]]
[[9 9 9 9]
 [9 9 8 7]
 [6 5 5 5]]


## 索引

In [50]:
# 一维数组
A = np.arange(3, 15)
print(A)
print(A[3])

# 二维数组
A = np.arange(3, 15).reshape((3, 4))
print(A)
print(A[0][0])
print(A[0, 0])
## 冒号表示整行或整列的所有元素
print(A[:, 0])
print(A[0, :])
## n1:n2表示从n1开始到n2-1
print(A[0, 0:2])

# for循环
## 行循环
for row in A:
    print(row)
## 列循环
for column in A.T:
    print(column)
## 迭代元素
print(A.flatten())
for item in A.flat:
    print(item)

[ 3  4  5  6  7  8  9 10 11 12 13 14]
6
[[ 3  4  5  6]
 [ 7  8  9 10]
 [11 12 13 14]]
3
3
[ 3  7 11]
[3 4 5 6]
[3 4]
[3 4 5 6]
[ 7  8  9 10]
[11 12 13 14]
[ 3  7 11]
[ 4  8 12]
[ 5  9 13]
[ 6 10 14]
[ 3  4  5  6  7  8  9 10 11 12 13 14]
3
4
5
6
7
8
9
10
11
12
13
14


## array的合并

In [65]:
A = np.array([1, 1, 1])
B = np.array([2, 2, 2])
print("A:", A)
print("B:", B)
# 行合并
C = np.vstack((A, B))
print(C)
print(A.shape, C.shape)
# 列合并
C = np.hstack((A, B))
print(C)
print(A.shape, C.shape)

# 一维数组可以转置吗？
# print(A.T)
## 报错，发现不可转置

# 增加维度，让一维数组变成二维
print(A[np.newaxis, :])
print(A[np.newaxis, :].shape)
print(A[:, np.newaxis])
print(A[:, np.newaxis].shape)

# 列合并2
D = np.hstack((A[:, np.newaxis], B[:, np.newaxis]))
print(D)
print(D.shape)

# 行列合并
A = np.array([1, 1, 1])[:, np.newaxis]
B = np.array([2, 2, 2])[:, np.newaxis]
print(np.concatenate((A, B, B, A), axis = 0))
print(np.concatenate((A, B, B, A), axis = 1))

A: [1 1 1]
B: [2 2 2]
[[1 1 1]
 [2 2 2]]
(3,) (2, 3)
[1 1 1 2 2 2]
(3,) (6,)
[[1 1 1]]
(1, 3)
[[1]
 [1]
 [1]]
(3, 1)
[[1 2]
 [1 2]
 [1 2]]
(3, 2)
[[1]
 [1]
 [1]
 [2]
 [2]
 [2]
 [2]
 [2]
 [2]
 [1]
 [1]
 [1]]
[[1 2 2 1]
 [1 2 2 1]
 [1 2 2 1]]
