In [3]:
import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype

In [8]:
df = pd.DataFrame({'A': np.random.randn(6), 'B': list('aabbca')})
df['B'] = df['B'].astype(CategoricalDtype(list('cab')))
df

Unnamed: 0,A,B
0,0.182365,a
1,0.31759,a
2,0.788809,b
3,-0.560701,b
4,1.161556,c
5,0.26543,a


In [13]:
df.B.cat.categories

Index(['c', 'a', 'b'], dtype='object')

In [17]:
df2 = df.set_index('B')

In [19]:
df2.index

CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['c', 'a', 'b'], ordered=False, dtype='category', name='B')

In [22]:
df3 = pd.DataFrame({'A': np.arange(3), 'B': pd.Series(list('abc')).astype('category')})

In [24]:
df3 = df3.set_index('B')

In [28]:
df3.reindex(['a', 'e']).index

Index(['a', 'e'], dtype='object', name='B')

In [30]:
df3.reindex(pd.Categorical(['a', 'e'], categories=list('abe'))).index

CategoricalIndex(['a', 'e'], categories=['a', 'b', 'e'], ordered=False, dtype='category', name='B')

In [32]:
g1 = pd.DataFrame(np.random.randn(3, 2))
g2 = pd.DataFrame(np.random.rand(5, 2))
g3 = pd.DataFrame(np.random.randn(4, 2))
g = pd.concat([g1, g2, g3])
g

Unnamed: 0,0,1
0,-0.18935,-1.31548
1,-0.994848,-2.306822
2,-1.026077,1.031546
0,0.949769,0.079199
1,0.888296,0.198168
2,0.786889,0.082815
3,0.747009,0.703058
4,0.900227,0.635306
0,-0.063779,0.301822
1,0.069889,0.060957


In [36]:
df4 = pd.DataFrame({'A': np.arange(2), 'B': list('ba')})
df4['B'] = df4['B'].astype(CategoricalDtype(list('ab')))
df4 = df4.set_index('B')
df5 = pd.DataFrame({'A': np.arange(2), 'B': list('bc')})
df5['B'] = df5['B'].astype(CategoricalDtype(list('bc')))
df5 = df5.set_index('B')

In [42]:
df4.index

CategoricalIndex(['b', 'a'], categories=['a', 'b'], ordered=False, dtype='category', name='B')

In [43]:
df5.index

CategoricalIndex(['b', 'c'], categories=['b', 'c'], ordered=False, dtype='category', name='B')

In [41]:
df6 = pd.concat([df4, df5]).drop_duplicates()
df6

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
b,0
a,1


In [45]:
df7 = pd.DataFrame({'A': np.arange(2), 'B': list('xy')})
df7 = df7.set_index('B')

In [46]:
df7.index

Index(['x', 'y'], dtype='object', name='B')

In [49]:
df = pd.concat([df5, df7])
df.index

Index(['b', 'c', 'x', 'y'], dtype='object', name='B')

In [52]:
dfir = pd.concat([pd.DataFrame(np.random.randn(5, 2), index=np.arange(5) * 250, columns=list('AB')),
                 pd.DataFrame(np.random.randn(6, 2), index=np.arange(4, 10) * 250.1, columns=list('AB'))])

In [57]:
dfir.iloc[0:5]

Unnamed: 0,A,B
0.0,-1.593315,-0.497128
250.0,-0.089892,0.941772
500.0,-0.456843,-2.031756
750.0,-0.558829,0.116213
1000.0,-1.112782,0.049663


In [61]:
pd.interval_range(start=0, end=5, closed='neither')

IntervalIndex([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)], dtype='interval[int64, neither]')

In [66]:
pd.interval_range(0, 6)

IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5], (5, 6]], dtype='interval[int64, right]')

In [50]:
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3'],
                    'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3']}, index= np.arange(4))
df2 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3'],
                    'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3']}, index= np.arange(4,8))
df3 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3'],
                    'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3']}, index= np.arange(8, 12))

In [19]:
df4 = pd.DataFrame({'B': ['B2', 'B3', 'B6', 'B7'],
                   'D': ['D2', 'D3', 'D6', 'D7'],
                   'F': ['F2', 'F3', 'F6', 'F7']}, index=[2,3,6,7])

In [26]:
pd.concat([df1, df4.reindex(df1.index)], axis=1)

Unnamed: 0,A,B,C,D,B.1,D.1,F
0,A0,B0,C0,D0,,,
1,A1,B1,C1,D1,,,
2,A2,B2,C2,D2,B2,D2,F2
3,A3,B3,C3,D3,B3,D3,F3


In [40]:
s1 = pd.Series(["X0", "X1", "X2", "X3"], name="X")
pd.concat([df1, s1], axis=1)

Unnamed: 0,A,B,C,D,X
0,A0,B0,C0,D0,X0
1,A1,B1,C1,D1,X1
2,A2,B2,C2,D2,X2
3,A3,B3,C3,D3,X3


In [43]:
pd.concat([df1, s1], axis=1, ignore_index=True).rename(columns={0:'col1', 1:'col2', 2:'col3', 3:'col4', 4:'col5'})

Unnamed: 0,col1,col2,col3,col4,col5
0,A0,B0,C0,D0,X0
1,A1,B1,C1,D1,X1
2,A2,B2,C2,D2,X2
3,A3,B3,C3,D3,X3


In [51]:
pieces = {'x': df1, 'y': df2, 'z': df3}
result = pd.concat(pieces, keys=['x', 'y', 'z'])

In [55]:
result.index.levels

FrozenList([['x', 'y', 'z'], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]])

In [56]:
pd.concat(pieces, keys=['x', 'y', 'z'], levels=[['z', 'y', 'x', 'w']], names=['group_keys'])

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C,D
group_keys,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
x,0,A0,B0,C0,D0
x,1,A1,B1,C1,D1
x,2,A2,B2,C2,D2
x,3,A3,B3,C3,D3
y,4,A0,B0,C0,D0
y,5,A1,B1,C1,D1
y,6,A2,B2,C2,D2
y,7,A3,B3,C3,D3
z,8,A0,B0,C0,D0
z,9,A1,B1,C1,D1


In [73]:
s1 = s1.rename(index={0:'A', 1:'B', 2:'C', 3:'D'})

In [74]:
s1

A    X0
B    X1
C    X2
D    X3
Name: X, dtype: object

In [76]:
df1.append(s1, ignore_index=True)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,X0,X1,X2,X3


In [78]:
dicts = [{'A': 1, 'B':2, 'C':3, 'D':4}, {'A':5, 'B':6, 'C':7, 'Y':8}]
pd.DataFrame(dicts)

Unnamed: 0,A,B,C,D,Y
0,1,2,3,4.0,
1,5,6,7,,8.0


In [81]:
df1.append(dicts)

Unnamed: 0,A,B,C,D,Y
0,A0,B0,C0,D0,
1,A1,B1,C1,D1,
2,A2,B2,C2,D2,
3,A3,B3,C3,D3,
0,1,2,3,4.0,
1,5,6,7,,8.0


In [83]:
df1.append(dicts, ignore_index=True, sort=True)

Unnamed: 0,A,B,C,D,Y
0,A0,B0,C0,D0,
1,A1,B1,C1,D1,
2,A2,B2,C2,D2,
3,A3,B3,C3,D3,
4,1,2,3,4.0,
5,5,6,7,,8.0


In [89]:
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                     'A': ['A0', 'A1', 'A2', 'A3'],
                     'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                      'C': ['C0', 'C1', 'C2', 'C3'],
                      'D': ['D0', 'D1', 'D2', 'D3']}, index=list('abcd'))

In [90]:
pd.merge(left, right, on='key')

Unnamed: 0,key,A,B,C,D
0,K0,A0,B0,C0,D0
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3


In [95]:
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
                    'key2': ['K0', 'K1', 'K0', 'K1'],
                    'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
                    'key2': ['K0', 'K0', 'K0', 'K0'],
                    'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3']})

In [96]:
pd.merge(left, right, on=['key1', 'key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K1,K0,A2,B2,C1,D1
2,K1,K0,A2,B2,C2,D2


In [97]:
table = pd.read_html('https://pandas.pydata.org/docs/user_guide/merging.html')

In [98]:
table[0]

Unnamed: 0,Merge method,SQL Join Name,Description
0,left,LEFT OUTER JOIN,Use keys from left frame only
1,right,RIGHT OUTER JOIN,Use keys from right frame only
2,outer,FULL OUTER JOIN,Use union of keys from both frames
3,inner,INNER JOIN,Use intersection of keys from both frames
4,cross,CROSS JOIN,Create the cartesian product of rows of both f...


In [100]:
print(table[0].to_html(index=False))

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>Merge method</th>
      <th>SQL Join Name</th>
      <th>Description</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>left</td>
      <td>LEFT OUTER JOIN</td>
      <td>Use keys from left frame only</td>
    </tr>
    <tr>
      <td>right</td>
      <td>RIGHT OUTER JOIN</td>
      <td>Use keys from right frame only</td>
    </tr>
    <tr>
      <td>outer</td>
      <td>FULL OUTER JOIN</td>
      <td>Use union of keys from both frames</td>
    </tr>
    <tr>
      <td>inner</td>
      <td>INNER JOIN</td>
      <td>Use intersection of keys from both frames</td>
    </tr>
    <tr>
      <td>cross</td>
      <td>CROSS JOIN</td>
      <td>Create the cartesian product of rows of both frames</td>
    </tr>
  </tbody>
</table>


In [101]:
pd.merge(left, right, how='left', on=['key1', 'key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K0,K1,A1,B1,,
2,K1,K0,A2,B2,C1,D1
3,K1,K0,A2,B2,C2,D2
4,K2,K1,A3,B3,,


In [102]:
pd.merge(left, right, how='inner', on =['key1', 'key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K1,K0,A2,B2,C1,D1
2,K1,K0,A2,B2,C2,D2


In [104]:
pd.merge(left, right, how='cross')

Unnamed: 0,key1_x,key2_x,A,B,key1_y,key2_y,C,D
0,K0,K0,A0,B0,K0,K0,C0,D0
1,K0,K0,A0,B0,K1,K0,C1,D1
2,K0,K0,A0,B0,K1,K0,C2,D2
3,K0,K0,A0,B0,K2,K0,C3,D3
4,K0,K1,A1,B1,K0,K0,C0,D0
5,K0,K1,A1,B1,K1,K0,C1,D1
6,K0,K1,A1,B1,K1,K0,C2,D2
7,K0,K1,A1,B1,K2,K0,C3,D3
8,K1,K0,A2,B2,K0,K0,C0,D0
9,K1,K0,A2,B2,K1,K0,C1,D1


In [110]:
df = pd.DataFrame({'Let': ['A', 'B', 'C'], 'Num': [1,2,3]})
ser = pd.Series(list('abcdef'), index=pd.MultiIndex.from_arrays([list('ABC')*2, list(range(1,7))], names=['Let', 'Num']))

In [111]:
df

Unnamed: 0,Let,Num
0,A,1
1,B,2
2,C,3


In [112]:
ser

Let  Num
A    1      a
B    2      b
C    3      c
A    4      d
B    5      e
C    6      f
dtype: object

In [113]:
pd.merge(df, ser.reset_index(), on=['Let', 'Num'])

Unnamed: 0,Let,Num,0
0,A,1,a
1,B,2,b
2,C,3,c


In [125]:
left = pd.DataFrame({'A': [1,2], 'B': [1,2]})
right = pd.DataFrame({'A': [4,5,6], 'B': [2,2,2]})

In [118]:
pd.merge(left, right, on='B')

Unnamed: 0,A_x,B,A_y
0,1,2,4
1,1,2,5
2,1,2,6
3,2,2,4
4,2,2,5
5,2,2,6


In [129]:
pd.merge(left, right, on='B', validate='1:m')

Unnamed: 0,A_x,B,A_y
0,2,2,4
1,2,2,5
2,2,2,6


In [130]:
pd.merge(left, right, on='B', validate='m:m')

Unnamed: 0,A_x,B,A_y
0,2,2,4
1,2,2,5
2,2,2,6


In [131]:
table[1]

Unnamed: 0,Observation Origin,_merge value
0,Merge key only in 'left' frame,left_only
1,Merge key only in 'right' frame,right_only
2,Merge key in both frames,both


In [133]:
print(table[1].to_html(index=False))

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>Observation Origin</th>
      <th>_merge value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>Merge key only in 'left' frame</td>
      <td>left_only</td>
    </tr>
    <tr>
      <td>Merge key only in 'right' frame</td>
      <td>right_only</td>
    </tr>
    <tr>
      <td>Merge key in both frames</td>
      <td>both</td>
    </tr>
  </tbody>
</table>


In [134]:
df1 = pd.DataFrame({'col1': [0,1], 'col_lef': ['a', 'b']})
df2 = pd.DataFrame({'col1': [1,2,2], 'col_right': [2,2,2]})

In [136]:
pd.merge(df1, df2, on='col1', how='outer', indicator=True)

Unnamed: 0,col1,col_lef,col_right,_merge
0,0,a,,left_only
1,1,b,2.0,both
2,2,,2.0,right_only
3,2,,2.0,right_only


In [138]:
from pandas.api.types import CategoricalDtype

In [145]:
left = pd.DataFrame({'X': pd.Series(np.random.choice(['foo', 'bar'], size=(10,))).astype(CategoricalDtype(['foo', 'bar'])),
                    'Y': np.random.choice(['one', 'two', 'three'], size=(10,))})

In [148]:
left.dtypes

X    category
Y      object
dtype: object

In [149]:
right = pd.DataFrame({'X': pd.Series(['foo', 'bar'], dtype=pd.CategoricalDtype(['foo', 'bar'])),
                     'Z': [1,2]})

In [151]:
right.dtypes

X    category
Z       int64
dtype: object

In [153]:
pd.merge(left, right, on='X').drop_duplicates()

Unnamed: 0,X,Y,Z
0,foo,two,1
4,foo,three,1
5,foo,one,1
7,bar,three,2
9,bar,one,2


In [154]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']}, index=['K0', 'K1', 'K2'])
right = pd.DataFrame({'C': ['C0', 'C2', 'C3'], 'D': ['D0', 'D2', 'D3']}, index=['K0', 'K2', 'K3'])

In [155]:
left.join(right)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2


In [156]:
left.join(right, how='outer')

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2
K3,,,C3,D3


In [158]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                'B': ['B0', 'B1', 'B2', 'B3'],
                'key1': ['K0', 'K0', 'K1', 'K2'],
                'key2': ['K0', 'K1', 'K0', 'K1']})
index = pd.MultiIndex.from_arrays([['K0', 'K1', 'K2', 'K2'], ['K0', 'K0', 'K0', 'K1']])
right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']}, index=index)

In [160]:
left.join(right, on=['key1', 'key2'])

Unnamed: 0,A,B,key1,key2,C,D
0,A0,B0,K0,K0,C0,D0
1,A1,B1,K0,K1,,
2,A2,B2,K1,K0,C1,D1
3,A3,B3,K2,K1,C3,D3


In [161]:
left.join(right, on=['key1', 'key2'], how='inner')

Unnamed: 0,A,B,key1,key2,C,D
0,A0,B0,K0,K0,C0,D0
2,A2,B2,K1,K0,C1,D1
3,A3,B3,K2,K1,C3,D3


In [164]:
pd.merge(left, right, left_on=['key1', 'key2'], right_index=True, how='left', sort=False)

Unnamed: 0,A,B,key1,key2,C,D
0,A0,B0,K0,K0,C0,D0
1,A1,B1,K0,K1,,
2,A2,B2,K1,K0,C1,D1
3,A3,B3,K2,K1,C3,D3


In [165]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']}, index=pd.Index(['K0', 'K1', 'K2'], name='key'))

In [166]:
index = pd.MultiIndex.from_arrays([['K0', 'K1', 'K2', 'K2'], ['Y0', 'Y1', 'Y2', 'Y3']], names=['key', 'Y'])
right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']}, index=index)

In [167]:
pd.merge(left.reset_index(), right.reset_index(), on='key', how='inner').set_index(['key', 'Y'])

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C,D
key,Y,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
K0,Y0,A0,B0,C0,D0
K1,Y1,A1,B1,C1,D1
K2,Y2,A2,B2,C2,D2
K2,Y3,A2,B2,C3,D3


In [168]:
left.join(right, how='inner')

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C,D
key,Y,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
K0,Y0,A0,B0,C0,D0
K1,Y1,A1,B1,C1,D1
K2,Y2,A2,B2,C2,D2
K2,Y3,A2,B2,C3,D3


In [171]:
leftindex = pd.MultiIndex.from_product([list('abc'), list('xy'), [1,2]], names=['abc', 'xy', 'num'])
left = pd.DataFrame({'v1': np.arange(12)}, index=leftindex)
rightindex = pd.MultiIndex.from_product([list('abc'), list('xy')], names=['abc', 'xy'])
right = pd.DataFrame({'v2': [i * 100 for i in range(1,7)]}, index=rightindex)

In [177]:
# 'A':['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']
left.join(right, on=['abc', 'xy'], how='inner')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,v1,v2
abc,xy,num,Unnamed: 3_level_1,Unnamed: 4_level_1
a,x,1,0,100
a,x,2,1,100
a,y,1,2,200
a,y,2,3,200
b,x,1,4,300
b,x,2,5,300
b,y,1,6,400
b,y,2,7,400
c,x,1,8,500
c,x,2,9,500


In [178]:
leftindex = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'), ('K1', 'X2')], name = ['key', 'X'])
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']}, index=leftindex)
rightindex = pd.MultiIndex.from_tuples([("K0", "Y0"), ("K1", "Y1"), ("K2", "Y2"), ("K2", "Y3")], names=["key", "Y"])
right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']}, index=rightindex)


In [181]:
pd.merge(left.reset_index(), right.reset_index(), on='key', how='outer').set_index(['key', 'X', 'Y'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,A,B,C,D
key,X,Y,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
K0,X0,Y0,A0,B0,C0,D0
K0,X1,Y0,A1,B1,C0,D0
K1,X2,Y1,A2,B2,C1,D1
K2,,Y2,,,C2,D2
K2,,Y3,,,C3,D3


In [182]:
left_index = pd.Index(['K0', 'K0', 'K1', 'K2'], name='key1')
left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3'],
                    'key2': ['K0', 'K1', 'K2', 'K3']}, index=left_index)
right_index = pd.Index(['K0', 'K1', 'K2', 'K2'], name='key1')
right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3'],
                    'key2': ['K0', 'K0', 'K0', 'K1']}, index=right_index)
left.merge(right, on=['key1', 'key2'])

Unnamed: 0_level_0,A,B,key2,C,D
key1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
K0,A0,B0,K0,C0,D0


In [184]:
left.merge(right, on=['key1', 'key2'])

Unnamed: 0_level_0,A,B,key2,C,D
key1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
K0,A0,B0,K0,C0,D0


In [185]:
left = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'v': [1,2,3]})
right = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'v': [4,5,6]})

In [190]:
pd.merge(left, right, on='k', suffixes=('_l', '_r'))

Unnamed: 0,k,v_l,v_r
0,K0,1,4
1,K0,1,5


In [192]:
left.join(right, lsuffix='_l', rsuffix='_r')

Unnamed: 0,k_l,v_l,k_r,v_r
0,K0,1,K0,4
1,K1,2,K0,5
2,K2,3,K3,6


In [193]:
right2 = pd.DataFrame({'v': [7, 8, 9]}, index=['K1', 'K1', 'K2'])

In [208]:
right.set_index('k')

Unnamed: 0_level_0,v
k,Unnamed: 1_level_1
K0,4
K0,5
K3,6


In [209]:
left.set_index('k')

Unnamed: 0_level_0,v
k,Unnamed: 1_level_1
K0,1
K1,2
K2,3


In [210]:
right2

Unnamed: 0,v
K1,7
K1,8
K2,9


In [207]:
left.set_index('k').join([right.set_index('k'), right2])

Unnamed: 0,v_x,v_y,v
K0,1,4.0,
K0,1,5.0,
K1,2,,7.0
K1,2,,8.0
K2,3,,9.0


In [211]:
left

Unnamed: 0,k,v
0,K0,1
1,K1,2
2,K2,3


In [212]:
right

Unnamed: 0,k,v
0,K0,4
1,K0,5
2,K3,6


In [227]:
pd.merge(left, right, on=['v'], how='outer')

Unnamed: 0,k_x,v,k_y
0,K0,1,
1,K1,2,
2,K2,3,
3,,4,K0
4,,5,K0
5,,6,K3


In [225]:
left.join(right, lsuffix='_l', rsuffix='_r')

Unnamed: 0,k_l,v_l,k_r,v_r
0,K0,1,K0,4
1,K1,2,K0,5
2,K2,3,K3,6


In [228]:
left = pd.DataFrame({'k': ['K0', 'K1', 'K1', 'K2'], 'lv': [1,2,3,4], 's': ['a', 'b', 'c', 'd'] })
right = pd.DataFrame({'k': ['K1', 'K2', 'K4'], 'rv': [1,2,3]})

In [233]:
pd.merge_ordered(left, right, left_by='s').set_index('s')

Unnamed: 0_level_0,k,lv,rv
s,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,K0,1.0,
a,K1,,1.0
a,K2,,2.0
a,K4,,3.0
b,K1,2.0,1.0
b,K2,,2.0
b,K4,,3.0
c,K1,3.0,1.0
c,K2,,2.0
c,K4,,3.0


In [234]:
left

Unnamed: 0,k,lv,s
0,K0,1,a
1,K1,2,b
2,K1,3,c
3,K2,4,d


In [235]:
right

Unnamed: 0,k,rv
0,K1,1
1,K2,2
2,K4,3
