## 06 numpy 合并与分割

In [1]:
import numpy as np

In [6]:
# 两个一维数组 1x3 的两个向量
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])

In [9]:
x.ndim

1

In [11]:
y.ndim

1

### 合并 concatenate
`np.concatenate([A, A], axis=1)`  
`np.vstack([A,z])`
`np.hstack([A,z])`

In [13]:
np.concatenate([x, y]) # 合并两个1x3的向量构成一个1x6的向量

array([1, 2, 3, 3, 2, 1])

In [14]:
z = np.array([666, 666, 666])

In [16]:
np.concatenate([x, y, z])

array([  1,   2,   3,   3,   2,   1, 666, 666, 666])

In [18]:
A = np.array([[1, 2, 3],
            [4, 5, 6]])
# 两个元素， 每个元素都是含有三个元素的一个数组
# 两个样本， 每个样本有三个特征

In [19]:
np.concatenate([A, A])
# 拼接之后， 变成有四个元素， 每个元素有三个元素的数组
# 两个样本和两个样本拼接，有相同的特征

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [20]:
# 原来两个样本 的三个新的特征 axis 轴 默认0第1个维度--按照行的方向拼接，1-->第二个维度--按照列的方向拼接
np.concatenate([A, A], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [24]:
Az = np.concatenate([A,z.reshape(1,-1)])
# z本来是一维向量，需要转为一个二维矩阵1x3的二维矩阵
# 才能和A矩阵拼接

In [27]:
Az

array([[  1,   2,   3],
       [  4,   5,   6],
       [666, 666, 666]])

`np.vstack()` vertical  
对于传入的两个数组，即使维度不同也可以在垂直的方向上进行叠加  

`np.hstack()` horizontal  

但是不能传入非法的数组拼接

In [28]:
np.vstack([A,z])

array([[  1,   2,   3],
       [  4,   5,   6],
       [666, 666, 666]])

In [30]:
B = np.full((2, 2), 100) # 2x2 matrix with elem = 100
B

array([[100, 100],
       [100, 100]])

In [31]:
np.hstack([A, B])

array([[  1,   2,   3, 100, 100],
       [  4,   5,   6, 100, 100]])

In [32]:
# np.concatenate([A, B])

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 3 and the array at index 1 has size 2

### 分割
`np.split(x1, (x2, x3))`  
x1 被分割的数组
x2, x3 需要分割的位置
```
x = np.arange(10)
np.split(x, [3, 7])  

```  
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])  

[array([0, 1, 2]), array([3, 4, 5, 6]), array([7, 8, 9])]  
`A1, A2 = np.split(A, [2], axis = 1)`  
`upper, lower = np.vsplit(A, [2])`  
`left, right = np.hsplit(A, [2])`

In [33]:
x = np.arange(10) #创建一个10个元素的向量
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [34]:
np.split(x, [3, 7])

[array([0, 1, 2]), array([3, 4, 5, 6]), array([7, 8, 9])]

In [35]:
np.split(x, [3, 4, 7])

[array([0, 1, 2]), array([3]), array([4, 5, 6]), array([7, 8, 9])]

In [39]:
x1, x2 = np.split(x, [3])

In [40]:
x1

array([0, 1, 2])

In [41]:
x2

array([3, 4, 5, 6, 7, 8, 9])

In [43]:
A = np.arange(16).reshape((4, 4))
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [44]:
A1, A2 = np.split(A, [2]) # 默认基于行所在的维度进行分割


In [45]:
A1

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [46]:
A2

array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [51]:
A1, A2 = np.split(A, [2], axis = 1)

In [52]:
A1

array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])

In [53]:
A2

array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])

In [56]:
upper, lower = np.vsplit(A, [2])

In [57]:
upper

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [58]:
lower

array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [59]:
left, right = np.hsplit(A, [2])

In [60]:
left

array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])

In [61]:
right

array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])

In [63]:
data = np.arange(16).reshape(4, 4)
data
# 前三列是三个特征， 最后一列是label

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [71]:
# X, y = np.split(data, [3], axis=1)
X, y = np.hsplit(data, [-1])

In [72]:
X

array([[ 0,  1,  2],
       [ 4,  5,  6],
       [ 8,  9, 10],
       [12, 13, 14]])

In [73]:
y

array([[ 3],
       [ 7],
       [11],
       [15]])

In [76]:
y[:, 0] # 针对所有的行，和第零列的数据抽出来变成一个一维的横向量

array([ 3,  7, 11, 15])