In [1]:
import pandas as pd

In [2]:
df1 = pd.DataFrame({'col1': [0, 1], 'col_left': ['a', 'b']})
df1

Unnamed: 0,col1,col_left
0,0,a
1,1,b


In [3]:
df2 = pd.DataFrame({'col1': [1, 2, 2], 'col_right': [2, 2, 2]})
df2

Unnamed: 0,col1,col_right
0,1,2
1,2,2
2,2,2


In [4]:
pd.merge(df1, df2, on='col1', how='outer', indicator=True)

Unnamed: 0,col1,col_left,col_right,_merge
0,0,a,,left_only
1,1,b,2.0,both
2,2,,2.0,right_only
3,2,,2.0,right_only


In [5]:
pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column')

Unnamed: 0,col1,col_left,col_right,indicator_column
0,0,a,,left_only
1,1,b,2.0,both
2,2,,2.0,right_only
3,2,,2.0,right_only


In [6]:
left = pd.DataFrame({'key': [1], 'v1': [10]})
left

Unnamed: 0,key,v1
0,1,10


In [7]:
right = pd.DataFrame({'key': [1, 2], 'v1': [20, 30]})
right

Unnamed: 0,key,v1
0,1,20
1,2,30


In [8]:
pd.merge(left, right, how='outer')

Unnamed: 0,key,v1
0,1,10
1,1,20
2,2,30


In [9]:
pd.merge(left, right, how='outer').dtypes

key    int64
v1     int64
dtype: object

In [10]:
pd.merge(left, right, how='outer', on='key')

Unnamed: 0,key,v1_x,v1_y
0,1,10.0,20
1,2,,30


In [11]:
pd.merge(left, right, how='outer', on='key').dtypes

key       int64
v1_x    float64
v1_y      int64
dtype: object

In [14]:
from pandas.api.types import CategoricalDtype
import numpy as np

In [25]:
X = pd.Series(np.random.choice(['foo', 'bar'], size=(10,)))
X

0    bar
1    foo
2    bar
3    bar
4    bar
5    foo
6    bar
7    foo
8    foo
9    bar
dtype: object

In [26]:
X = X.astype(CategoricalDtype(categories=['foo', 'bar']))
X

0    bar
1    foo
2    bar
3    bar
4    bar
5    foo
6    bar
7    foo
8    foo
9    bar
dtype: category
Categories (2, object): ['foo', 'bar']

In [29]:
left = pd.DataFrame({'X': X,
   'Y': np.random.choice(['one', 'two', 'three'],
   size=(10,))})
left

Unnamed: 0,X,Y
0,bar,one
1,foo,one
2,bar,three
3,bar,two
4,bar,two
5,foo,three
6,bar,one
7,foo,one
8,foo,three
9,bar,one


In [30]:
left.dtypes

X    category
Y      object
dtype: object

In [32]:
right = pd.DataFrame({'X': pd.Series(['foo', 'bar'],
                                     dtype=CategoricalDtype(['foo', 'bar'])),
                      'Z': [1, 2]})
right 

Unnamed: 0,X,Z
0,foo,1
1,bar,2


In [33]:
right.dtypes

X    category
Z       int64
dtype: object

In [34]:
result = pd.merge(left, right, how='outer')
result

Unnamed: 0,X,Y,Z
0,bar,one,2
1,bar,three,2
2,bar,two,2
3,bar,two,2
4,bar,one,2
5,bar,one,2
6,foo,one,1
7,foo,three,1
8,foo,one,1
9,foo,three,1


In [35]:
result.dtypes

X    category
Y      object
Z       int64
dtype: object

In [36]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                     'B': ['B0', 'B1', 'B2']},
                     index=['K0', 'K1', 'K2'])
left

Unnamed: 0,A,B
K0,A0,B0
K1,A1,B1
K2,A2,B2


In [37]:
right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
                       'D': ['D0', 'D2', 'D3']},
                      index=['K0', 'K2', 'K3'])
right

Unnamed: 0,C,D
K0,C0,D0
K2,C2,D2
K3,C3,D3


In [38]:
result = left.join(right)
result

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2


In [39]:
result = left.join(right, how='outer')
result

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2
K3,,,C3,D3


In [40]:
result = left.join(right, how='inner')
result

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K2,A2,B2,C2,D2


In [41]:
result = pd.merge(left, right, left_index=True, right_index=True, how='outer')
result

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2
K3,,,C3,D3


In [42]:
result = pd.merge(left, right, left_index=True, right_index=True, how='inner')
result

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K2,A2,B2,C2,D2


In [43]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                     'B': ['B0', 'B1', 'B2', 'B3'],
                  'key': ['K0', 'K1', 'K0', 'K1']})
left

Unnamed: 0,A,B,key
0,A0,B0,K0
1,A1,B1,K1
2,A2,B2,K0
3,A3,B3,K1


In [44]:
right = pd.DataFrame({'C': ['C0', 'C1'],
                      'D': ['D0', 'D1']},
                    index=['K0', 'K1'])
right

Unnamed: 0,C,D
K0,C0,D0
K1,C1,D1


In [46]:
result = left.join(right, on='key')
result

Unnamed: 0,A,B,key,C,D
0,A0,B0,K0,C0,D0
1,A1,B1,K1,C1,D1
2,A2,B2,K0,C0,D0
3,A3,B3,K1,C1,D1


In [47]:
result = pd.merge(left, right, left_on='key', right_index=True,
                     how='left', sort=False)
result

Unnamed: 0,A,B,key,C,D
0,A0,B0,K0,C0,D0
1,A1,B1,K1,C1,D1
2,A2,B2,K0,C0,D0
3,A3,B3,K1,C1,D1


In [48]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                      'B': ['B0', 'B1', 'B2']},
                     index=pd.Index(['K0', 'K1', 'K2'], name='key'))
left


Unnamed: 0_level_0,A,B
key,Unnamed: 1_level_1,Unnamed: 2_level_1
K0,A0,B0
K1,A1,B1
K2,A2,B2


In [49]:
index = pd.MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'),
                                   ('K2', 'Y2'), ('K2', 'Y3')],
                                   names=['key', 'Y'])

index

MultiIndex([('K0', 'Y0'),
            ('K1', 'Y1'),
            ('K2', 'Y2'),
            ('K2', 'Y3')],
           names=['key', 'Y'])

In [50]:
right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],
                      'D': ['D0', 'D1', 'D2', 'D3']},
                     index=index)
right

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
key,Y,Unnamed: 2_level_1,Unnamed: 3_level_1
K0,Y0,C0,D0
K1,Y1,C1,D1
K2,Y2,C2,D2
K2,Y3,C3,D3


In [52]:
left

Unnamed: 0_level_0,A,B
key,Unnamed: 1_level_1,Unnamed: 2_level_1
K0,A0,B0
K1,A1,B1
K2,A2,B2


In [51]:
result = left.join(right, how='inner')
result

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C,D
key,Y,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
K0,Y0,A0,B0,C0,D0
K1,Y1,A1,B1,C1,D1
K2,Y2,A2,B2,C2,D2
K2,Y3,A2,B2,C3,D3
