# Merging, Joining, and Concatentating

pandas allows `DataFrame` objects to be combined in three ways: merging, joining, and concatenating (as the title and name of this notebook have already stated).

### Make an example `DataFrame`.

In [66]:
import pandas as pd

In [67]:
df1 = pd.DataFrame({
    'A': ['A 00', 'A 01', 'A 02', 'A 03'],
    'B': ['B 00', 'B 01', 'B 02', 'B 03'],
    'C': ['C 00', 'C 01', 'C 02', 'C 03'],
    'D': ['D 00', 'D 01', 'D 02', 'D 03']},
    index=[0, 1, 2, 3])

df2 = pd.DataFrame({
    'A': ['A 04', 'A 05', 'A 06', 'A 07'],
    'B': ['B 04', 'B 05', 'B 06', 'B 07'],
    'C': ['C 04', 'C 05', 'C 06', 'C 07'],
    'D': ['D 04', 'D 05', 'D 06', 'D 07']},
    index=[4, 5, 6, 7])

df3 = pd.DataFrame({
    'A': ['A 08', 'A 09', 'A 10', 'A 11'],
    'B': ['B 08', 'B 09', 'B 10', 'B 11'],
    'C': ['C 08', 'C 09', 'C 10', 'C 11'],
    'D': ['D 08', 'D 09', 'D 10', 'D 11']},
    index=[8, 9, 10, 11])

In [68]:
df1

Unnamed: 0,A,B,C,D
0,A 00,B 00,C 00,D 00
1,A 01,B 01,C 01,D 01
2,A 02,B 02,C 02,D 02
3,A 03,B 03,C 03,D 03


In [69]:
df2

Unnamed: 0,A,B,C,D
4,A 04,B 04,C 04,D 04
5,A 05,B 05,C 05,D 05
6,A 06,B 06,C 06,D 06
7,A 07,B 07,C 07,D 07


In [70]:
df3

Unnamed: 0,A,B,C,D
8,A 08,B 08,C 08,D 08
9,A 09,B 09,C 09,D 09
10,A 10,B 10,C 10,D 10
11,A 11,B 11,C 11,D 11


### Glue data together with `pd.concatenate()`.

In [71]:
# Concat along axis 0.
pd.concat([df1, df2, df3])

Unnamed: 0,A,B,C,D
0,A 00,B 00,C 00,D 00
1,A 01,B 01,C 01,D 01
2,A 02,B 02,C 02,D 02
3,A 03,B 03,C 03,D 03
4,A 04,B 04,C 04,D 04
5,A 05,B 05,C 05,D 05
6,A 06,B 06,C 06,D 06
7,A 07,B 07,C 07,D 07
8,A 08,B 08,C 08,D 08
9,A 09,B 09,C 09,D 09


In [72]:
# Concat along axis 1.
pd.concat([df1, df2, df3], axis=1)

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1,A.2,B.2,C.2,D.2
0,A 00,B 00,C 00,D 00,,,,,,,,
1,A 01,B 01,C 01,D 01,,,,,,,,
2,A 02,B 02,C 02,D 02,,,,,,,,
3,A 03,B 03,C 03,D 03,,,,,,,,
4,,,,,A 04,B 04,C 04,D 04,,,,
5,,,,,A 05,B 05,C 05,D 05,,,,
6,,,,,A 06,B 06,C 06,D 06,,,,
7,,,,,A 07,B 07,C 07,D 07,,,,
8,,,,,,,,,A 08,B 08,C 08,D 08
9,,,,,,,,,A 09,B 09,C 09,D 09


### Make more example DataFrames.

In [73]:
left = pd.DataFrame({
    'key': ['K0', 'K1', 'K2', 'K3'],
    'A': ['A0', 'A1', 'A2', 'A3'],
    'B': ['B0', 'B1', 'B2', 'B3']})
   
right = pd.DataFrame({
    'key': ['K0', 'K1', 'K2', 'K3'],
    'C': ['C0', 'C1', 'C2', 'C3'],
    'D': ['D0', 'D1', 'D2', 'D3']})    

In [74]:
left

Unnamed: 0,A,B,key
0,A0,B0,K0
1,A1,B1,K1
2,A2,B2,K2
3,A3,B3,K3


In [75]:
right

Unnamed: 0,C,D,key
0,C0,D0,K0
1,C1,D1,K1
2,C2,D2,K2
3,C3,D3,K3


### `pd.merge()` DataFrames together with an SQL-like logic.

This performs a database-style join operation. Knowledge of SQL or whatever could shed some light on this.

In [76]:
pd.merge(left, right, how='inner', on='key')

Unnamed: 0,A,B,key,C,D
0,A0,B0,K0,C0,D0
1,A1,B1,K1,C1,D1
2,A2,B2,K2,C2,D2
3,A3,B3,K3,C3,D3


### A more complicated example of `pd.merge()`.

In [77]:
left = pd.DataFrame({
    'key1': ['K0', 'K0', 'K1', 'K2'],
    'key2': ['K0', 'K1', 'K0', 'K1'],
    'A': ['A0', 'A1', 'A2', 'A3'],
    'B': ['B0', 'B1', 'B2', 'B3']})
    
right = pd.DataFrame({
    'key1': ['K0', 'K1', 'K1', 'K2'],
    'key2': ['K0', 'K0', 'K0', 'K0'],
    'C': ['C0', 'C1', 'C2', 'C3'],
    'D': ['D0', 'D1', 'D2', 'D3']})

In [78]:
left

Unnamed: 0,A,B,key1,key2
0,A0,B0,K0,K0
1,A1,B1,K0,K1
2,A2,B2,K1,K0
3,A3,B3,K2,K1


In [79]:
right

Unnamed: 0,C,D,key1,key2
0,C0,D0,K0,K0
1,C1,D1,K1,K0
2,C2,D2,K1,K0
3,C3,D3,K2,K0


In [80]:
pd.merge(left, right, on=['key1', 'key2'])

Unnamed: 0,A,B,key1,key2,C,D
0,A0,B0,K0,K0,C0,D0
1,A2,B2,K1,K0,C1,D1
2,A2,B2,K1,K0,C2,D2


In [81]:
pd.merge(left, right, how='outer', on=['key1', 'key2'])

Unnamed: 0,A,B,key1,key2,C,D
0,A0,B0,K0,K0,C0,D0
1,A1,B1,K0,K1,,
2,A2,B2,K1,K0,C1,D1
3,A2,B2,K1,K0,C2,D2
4,A3,B3,K2,K1,,
5,,,K2,K0,C3,D3


In [82]:
pd.merge(left, right, how='right', on=['key1', 'key2'])

Unnamed: 0,A,B,key1,key2,C,D
0,A0,B0,K0,K0,C0,D0
1,A2,B2,K1,K0,C1,D1
2,A2,B2,K1,K0,C2,D2
3,,,K2,K0,C3,D3


In [83]:
pd.merge(left, right, how='left', on=['key1', 'key2'])

Unnamed: 0,A,B,key1,key2,C,D
0,A0,B0,K0,K0,C0,D0
1,A1,B1,K0,K1,,
2,A2,B2,K1,K0,C1,D1
3,A2,B2,K1,K0,C2,D2
4,A3,B3,K2,K1,,


### `pd.join()` columns from DataFrames with different indexes.

In [84]:
left = pd.DataFrame({
    'A': ['A0', 'A1', 'A2'],
    'B': ['B0', 'B1', 'B2']},
    index=['K0', 'K1', 'K2']) 

right = pd.DataFrame({
    'C': ['C0', 'C2', 'C3'],
    'D': ['D0', 'D2', 'D3']},
    index=['K0', 'K2', 'K3'])

In [85]:
left

Unnamed: 0,A,B
K0,A0,B0
K1,A1,B1
K2,A2,B2


In [86]:
right

Unnamed: 0,C,D
K0,C0,D0
K2,C2,D2
K3,C3,D3


In [87]:
left.join(right)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2


In [89]:
left.join(right, how='outer')

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2
K3,,,C3,D3
