# 使用pd.merge() 合并

### merge 与 concat 的区别在有，merge 需要依据某一共同的行或列来进行合并 使用pd.merge() 合并时，会自动根据两者相同 column 名称的那一列，作为key来进行合并，注意每一列元素 的顺序不要求一致

In [2]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame

In [9]:
df1 =pd.DataFrame({'employee':['Polo','Sara','Dains'],
                'workgroup':['sail','counting','marketing']})
df2 =pd.DataFrame({'employee':['Polo','Sara','Bush'], # Bush
                 'worktime':[2,3,5]})
display(df1,df2)

Unnamed: 0,employee,workgroup
0,Polo,sail
1,Sara,counting
2,Dains,marketing


Unnamed: 0,employee,worktime
0,Polo,2
1,Sara,3
2,Bush,5


In [10]:
pd.concat([df1,df2],axis = 1)

Unnamed: 0,employee,workgroup,employee.1,worktime
0,Polo,sail,Polo,2
1,Sara,counting,Sara,3
2,Dains,marketing,Bush,5


In [12]:
pd.merge(df1,df2,how="inner")

Unnamed: 0,employee,workgroup,worktime
0,Polo,sail,2
1,Sara,counting,3


In [13]:
pd.merge(df1,df2,how="outer")

Unnamed: 0,employee,workgroup,worktime
0,Polo,sail,2.0
1,Sara,counting,3.0
2,Dains,marketing,
3,Bush,,5.0


## 1）使用how = 设置连接方式，左连接（left）、右连接（right）、内连接（inner）和外连接（outer）

In [14]:
pd.merge(df1,df2,how="left")

Unnamed: 0,employee,workgroup,worktime
0,Polo,sail,2.0
1,Sara,counting,3.0
2,Dains,marketing,


In [15]:
pd.merge(df1,df2,how="right")

Unnamed: 0,employee,workgroup,worktime
0,Polo,sail,2
1,Sara,counting,3
2,Bush,,5


In [16]:
df1 =pd.DataFrame({'employee':['Polo','Sara','Polo'],
                'workgroup':['sail','counting','marketing']})
df2 =pd.DataFrame({'employee':['Sara','Denis','Polo'], 
                 'worktime':[2,3,5]})
display(df1,df2)

Unnamed: 0,employee,workgroup
0,Polo,sail
1,Sara,counting
2,Polo,marketing


Unnamed: 0,employee,worktime
0,Sara,2
1,Denis,3
2,Polo,5


In [17]:
pd.merge(df1,df2)

Unnamed: 0,employee,workgroup,worktime
0,Polo,sail,5
1,Polo,marketing,5
2,Sara,counting,2


## 2）使用on = 显式的指定那一列为key，当有多个key相同时使用

In [18]:
df3 = pd.DataFrame({'employee':['Po','James','Dains'],
                   'workgroup':['sail','counting','marketing'],
                   'salary':[5000,8000,10000]})
df4 = pd.DataFrame({'employee':['Po','Susan','Dains'],
                    'workgroup':['marketing','counting','marketing'],
                   'work_time':[1,2,5]})
display(df3,df4)

Unnamed: 0,employee,workgroup,salary
0,Po,sail,5000
1,James,counting,8000
2,Dains,marketing,10000


Unnamed: 0,employee,workgroup,work_time
0,Po,marketing,1
1,Susan,counting,2
2,Dains,marketing,5


In [20]:
pd.merge(df3,df4)

Unnamed: 0,employee,workgroup,salary,work_time
0,Dains,marketing,10000,5


In [21]:
pd.merge(df3,df4,on = "employee")

Unnamed: 0,employee,workgroup_x,salary,workgroup_y,work_time
0,Po,sail,5000,marketing,1
1,Dains,marketing,10000,marketing,5


In [24]:
pd.merge(df3,df4,on = "workgroup",suffixes=["_left","_right"])

Unnamed: 0,employee_left,workgroup,salary,employee_right,work_time
0,James,counting,8000,Susan,2
1,Dains,marketing,10000,Po,1
2,Dains,marketing,10000,Dains,5


## 3）使用left_on 和 right_on 指定左右两边的列作为key，当左右两边的key都不相等时使用

In [25]:
df5 =pd.DataFrame({'employname':['Polo','Sara','Polo'],
                'workgroup':['sail','counting','marketing']})
df6 =pd.DataFrame({'employee':['Sara','Denis','Polo'], 
                 'worktime':[2,3,5]})
display(df5,df6)

Unnamed: 0,employname,workgroup
0,Polo,sail
1,Sara,counting
2,Polo,marketing


Unnamed: 0,employee,worktime
0,Sara,2
1,Denis,3
2,Polo,5


In [26]:
pd.merge(df5,df6,left_on = "employname",right_on = "employee")

Unnamed: 0,employname,workgroup,employee,worktime
0,Polo,sail,Polo,5
1,Polo,marketing,Polo,5
2,Sara,counting,Sara,2
