In [1]:
import pandas as pd
import numpy as np

In [2]:
df1 = pd.DataFrame({
    'key': ['a', 'b', 'c', 'd', 'e'],
    'A1': range(5),
    'B1': range(5, 10)
})

In [3]:
df1

Unnamed: 0,key,A1,B1
0,a,0,5
1,b,1,6
2,c,2,7
3,d,3,8
4,e,4,9


In [4]:
df2 = pd.DataFrame({
    'key': ['a', 'b', 'c'],
    'A2': range(3),
    'B2': range(3, 6)
})

In [5]:
df2

Unnamed: 0,key,A2,B2
0,a,0,3
1,b,1,4
2,c,2,5


In [6]:
df1, df2

(  key  A1  B1
 0   a   0   5
 1   b   1   6
 2   c   2   7
 3   d   3   8
 4   e   4   9,
   key  A2  B2
 0   a   0   3
 1   b   1   4
 2   c   2   5)

In [7]:
type(df1), type(df2)

(pandas.core.frame.DataFrame, pandas.core.frame.DataFrame)

In [8]:
# inner join
pd.merge(df1, df2, how='inner', on='key')

Unnamed: 0,key,A1,B1,A2,B2
0,a,0,5,0,3
1,b,1,6,1,4
2,c,2,7,2,5


In [9]:
# inner join
pd.merge(df1, df2, how='inner', on='key')[['key', 'A1', 'B1']]

Unnamed: 0,key,A1,B1
0,a,0,5
1,b,1,6
2,c,2,7


In [10]:
# inner join
pd.merge(df1, df2, how='inner', on='key')[['key', 'A2', 'B2']]

Unnamed: 0,key,A2,B2
0,a,0,3
1,b,1,4
2,c,2,5


In [11]:
# left join
pd.merge(df1, df2, how='left', on='key')

Unnamed: 0,key,A1,B1,A2,B2
0,a,0,5,0.0,3.0
1,b,1,6,1.0,4.0
2,c,2,7,2.0,5.0
3,d,3,8,,
4,e,4,9,,


In [12]:
type(np.nan)

float

In [13]:
# right join
pd.merge(df1, df2, how='right', on='key')

Unnamed: 0,key,A1,B1,A2,B2
0,a,0,5,0,3
1,b,1,6,1,4
2,c,2,7,2,5


In [14]:
pd.merge(df1, df2, how='right', on='key')[['A1', 'B2']]

Unnamed: 0,A1,B2
0,0,3
1,1,4
2,2,5


In [15]:
# full join/outer join
pd.merge(df1, df2, how='outer', on='key')

Unnamed: 0,key,A1,B1,A2,B2
0,a,0,5,0.0,3.0
1,b,1,6,1.0,4.0
2,c,2,7,2.0,5.0
3,d,3,8,,
4,e,4,9,,


In [16]:
lefttbl = pd.DataFrame({
    'key1': ['a', 'a', 'b', 'c'],
    'key2': ['a', 'b', 'a', 'b'],
    'A': ['A0', 'A1', 'A2', 'A3'],
    'B': ['B0', 'B1', 'B2', 'B3']
})

In [17]:
righttbl = pd.DataFrame({
    'key1': ['a', 'b', 'b', 'c'],
    'key2': ['a', 'b', 'a', 'a'],
    'C': ['C0', 'C1', 'C2', 'C3'],
    'D': ['D0', 'D1', 'D2', 'D3']
})

In [18]:
lefttbl

Unnamed: 0,key1,key2,A,B
0,a,a,A0,B0
1,a,b,A1,B1
2,b,a,A2,B2
3,c,b,A3,B3


In [19]:
righttbl

Unnamed: 0,key1,key2,C,D
0,a,a,C0,D0
1,b,b,C1,D1
2,b,a,C2,D2
3,c,a,C3,D3


In [20]:
pd.merge(lefttbl, righttbl, how='inner', on=['key1', 'key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,a,a,A0,B0,C0,D0
1,b,a,A2,B2,C2,D2
