In [1]:
import pandas as pd
import numpy as np

# 横向合并

In [3]:
data1 = pd.DataFrame({'one':['a','b','a','a','c','b'], 'two':range(6)})
data2 = pd.DataFrame({'one':['a','b','c','d'],'two':range(10,14)})

In [4]:
data1

Unnamed: 0,one,two
0,a,0
1,b,1
2,a,2
3,a,3
4,c,4
5,b,5


In [5]:
data2

Unnamed: 0,one,two
0,a,10
1,b,11
2,c,12
3,d,13


In [6]:
data = pd.merge(data1, data2, on='one')  
# 根据one列进行横向合并，只保留one中共有元素，交集

In [7]:
data

Unnamed: 0,one,two_x,two_y
0,a,0,10
1,a,2,10
2,a,3,10
3,b,1,11
4,b,5,11
5,c,4,12


In [10]:
data = pd.merge(data1, data2, on='one', how='outer')
# 根据one列进行横向合并，并集

In [11]:
data

Unnamed: 0,one,two_x,two_y
0,a,0.0,10
1,a,2.0,10
2,a,3.0,10
3,b,1.0,11
4,b,5.0,11
5,c,4.0,12
6,d,,13


In [12]:
data = pd.merge(data1, data2, on='one', how='left')
# 根据one列进行横向合并，以data1为准

In [13]:
data

Unnamed: 0,one,two_x,two_y
0,a,0,10
1,b,1,11
2,a,2,10
3,a,3,10
4,c,4,12
5,b,5,11


In [14]:
data = pd.merge(data1, data2, on='one', how='right')
# 根据one列进行横向合并，以data2为准

In [15]:
data

Unnamed: 0,one,two_x,two_y
0,a,0.0,10
1,a,2.0,10
2,a,3.0,10
3,b,1.0,11
4,b,5.0,11
5,c,4.0,12
6,d,,13


In [16]:
data1 = pd.DataFrame({'one1':['a','b','a','a','c','b'], 'two1':range(6)})
data2 = pd.DataFrame({'one2':['a','b','c','d'],'two2':range(10,14)})

In [17]:
data1

Unnamed: 0,one1,two1
0,a,0
1,b,1
2,a,2
3,a,3
4,c,4
5,b,5


In [18]:
data2

Unnamed: 0,one2,two2
0,a,10
1,b,11
2,c,12
3,d,13


In [20]:
data = pd.merge(data1, data2, left_on='one1', right_on='one2')
# 根据one列进行横向合并，data1根据one1，data2根据one2

In [21]:
data

Unnamed: 0,one1,two1,one2,two2
0,a,0,a,10
1,a,2,a,10
2,a,3,a,10
3,b,1,b,11
4,b,5,b,11
5,c,4,c,12


In [22]:
data1

Unnamed: 0,one1,two1
0,a,0
1,b,1
2,a,2
3,a,3
4,c,4
5,b,5


In [23]:
data2

Unnamed: 0,one2,two2
0,a,10
1,b,11
2,c,12
3,d,13


In [26]:
data = pd.merge(data1, data2, left_index=True, right_index=True, how='outer')
# 根据one列进行横向合并，根据index合并，并集

In [27]:
data

Unnamed: 0,one1,two1,one2,two2
0,a,0,a,10.0
1,b,1,b,11.0
2,a,2,c,12.0
3,a,3,d,13.0
4,c,4,,
5,b,5,,


In [34]:
data = pd.merge(data1, data2, left_on='two1', right_index=True, how='outer')
# 根据one列进行横向合并，左边根据two1，右边根据index

In [35]:
data

Unnamed: 0,one1,two1,one2,two2
0,a,0,a,10.0
1,b,1,b,11.0
2,a,2,c,12.0
3,a,3,d,13.0
4,c,4,,
5,b,5,,


In [36]:
data1

Unnamed: 0,one1,two1
0,a,0
1,b,1
2,a,2
3,a,3
4,c,4
5,b,5


In [37]:
data2

Unnamed: 0,one2,two2
0,a,10
1,b,11
2,c,12
3,d,13


In [39]:
data1 = data1.iloc[:4]

In [40]:
data1.join(data2)  #快速合并，根据index，且没有相同的列，index不同也可以

Unnamed: 0,one1,two1,one2,two2
0,a,0,a,10
1,b,1,b,11
2,a,2,c,12
3,a,3,d,13


In [42]:
data1 = pd.DataFrame({'one1':['a','b','a','a','c','b'], 'two1':range(6)})
data2 = pd.DataFrame({'one2':['a','b','c','d'],'two2':range(10,14)})

In [43]:
data1.join(data2)  # 以data1为准，join data2

Unnamed: 0,one1,two1,one2,two2
0,a,0,a,10.0
1,b,1,b,11.0
2,a,2,c,12.0
3,a,3,d,13.0
4,c,4,,
5,b,5,,


In [44]:
data1

Unnamed: 0,one1,two1
0,a,0
1,b,1
2,a,2
3,a,3
4,c,4
5,b,5


In [45]:
data1.assign(three=np.arange(6))  # 直接增加一列

Unnamed: 0,one1,two1,three
0,a,0,0
1,b,1,1
2,a,2,2
3,a,3,3
4,c,4,4
5,b,5,5


# 纵向合并

In [46]:
data1 = pd.DataFrame(np.random.randn(3,4), columns=['a','b','c','d'])
data2 = pd.DataFrame(np.random.randn(2,3), columns=['b','c','a'])

In [47]:
data1

Unnamed: 0,a,b,c,d
0,-0.314425,-0.571201,0.202219,0.823889
1,1.307029,-1.119135,0.734455,-0.43892
2,-0.222125,-0.569837,-0.992926,-1.197502


In [48]:
data2

Unnamed: 0,b,c,a
0,0.247811,-0.043823,-0.729572
1,-0.681311,1.23616,0.956227


In [49]:
data = pd.concat([data1, data2])

In [50]:
data

Unnamed: 0,a,b,c,d
0,-0.314425,-0.571201,0.202219,0.823889
1,1.307029,-1.119135,0.734455,-0.43892
2,-0.222125,-0.569837,-0.992926,-1.197502
0,-0.729572,0.247811,-0.043823,
1,0.956227,-0.681311,1.23616,


In [51]:
data = pd.concat([data1, data2], ignore_index=True)  # 更新index

In [52]:
data

Unnamed: 0,a,b,c,d
0,-0.314425,-0.571201,0.202219,0.823889
1,1.307029,-1.119135,0.734455,-0.43892
2,-0.222125,-0.569837,-0.992926,-1.197502
3,-0.729572,0.247811,-0.043823,
4,0.956227,-0.681311,1.23616,
