# 

# <center> Join x Merge x Concatenate

### Reference
https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html

In practice things can be, and often are, more complicated than in the following examples, but you get the idea of ​​how things work.

___

In [1]:
import pandas as pd

---

## Comparison between join, concatenate and merge

In [2]:
# Create some data
left = pd.DataFrame(
    {"A": ["A0", "A1", "A2"], "B": ["B0", "B1", "B2"]}, index=["K0", "K1", "K2"]
)


right = pd.DataFrame(
    {"C": ["C0", "C2", "C3"], "D": ["D0", "D2", "D3"]}, index=["K0", "K2", "K3"]
)

In [3]:
left

Unnamed: 0,A,B
K0,A0,B0
K1,A1,B1
K2,A2,B2


In [4]:
right

Unnamed: 0,C,D
K0,C0,D0
K2,C2,D2
K3,C3,D3


# On index

## inner

In [5]:
# join
left.join(right, how = 'inner')

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K2,A2,B2,C2,D2


In [6]:
# concat
pd.concat([left, right], join = 'inner', axis = 1)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K2,A2,B2,C2,D2


In [7]:
# merge
pd.merge(left, right, how = 'inner', left_index=True, right_index=True)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K2,A2,B2,C2,D2


## outer

In [8]:
# join
left.join(right, how = 'outer')

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2
K3,,,C3,D3


In [9]:
# concat
pd.concat([left, right], join = 'outer', axis = 1)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2
K3,,,C3,D3


In [10]:
# merge
pd.merge(left, right, how = 'outer', left_index=True, right_index=True)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2
K3,,,C3,D3


## left

In [11]:
# join
left.join(right, how = 'left')

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2


In [19]:
# concat
# not possible

In [14]:
# merge
pd.merge(left, right, how = 'left', left_index=True, right_index=True)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2


## right

In [20]:
# right join
result = left.join(right, how = 'right')
result

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K2,A2,B2,C2,D2
K3,,,C3,D3


In [21]:
# concat
# not possible withou define key

In [23]:
# merge
pd.merge(left, right, how = 'right', left_index=True, right_index=True)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K2,A2,B2,C2,D2
K3,,,C3,D3


## On Key

To make the example more interesting, the key is a column in the first dataset, while in the second dataset the key is in the index.

In [45]:
# New dataframe
left = pd.DataFrame(
    {
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"],
        "key": ["K0", "K1", "K0", "K1"],
    }
)

right = pd.DataFrame({"C": ["C0", "C1"], 
                      "D": ["D0", "D1"]}, 
                     index=["K0", "K1"])

In [46]:
# join by key
left.join(right, on = 'key', how = 'inner')

Unnamed: 0,A,B,key,C,D
0,A0,B0,K0,C0,D0
2,A2,B2,K0,C0,D0
1,A1,B1,K1,C1,D1
3,A3,B3,K1,C1,D1


In [51]:
# merge
pd.merge(left, right, how = 'inner', left_on = 'key', right_index = True)

Unnamed: 0,A,B,key,C,D
0,A0,B0,K0,C0,D0
2,A2,B2,K0,C0,D0
1,A1,B1,K1,C1,D1
3,A3,B3,K1,C1,D1


In [31]:
# join by key
left.join(right, on = 'key', how = 'outer')

Unnamed: 0,A,B,key,C,D
0,A0,B0,K0,C0,D0
2,A2,B2,K0,C0,D0
1,A1,B1,K1,C1,D1
3,A3,B3,K1,C1,D1


In [52]:
# merge
pd.merge(left, right, how = 'outer', left_on = 'key', right_index = True)

Unnamed: 0,A,B,key,C,D
0,A0,B0,K0,C0,D0
2,A2,B2,K0,C0,D0
1,A1,B1,K1,C1,D1
3,A3,B3,K1,C1,D1


In [32]:
# join by key
left.join(right, on = 'key', how = 'left')

Unnamed: 0,A,B,key,C,D
0,A0,B0,K0,C0,D0
1,A1,B1,K1,C1,D1
2,A2,B2,K0,C0,D0
3,A3,B3,K1,C1,D1


In [53]:
# merge
pd.merge(left, right, how = 'left', left_on = 'key', right_index = True)

Unnamed: 0,A,B,key,C,D
0,A0,B0,K0,C0,D0
1,A1,B1,K1,C1,D1
2,A2,B2,K0,C0,D0
3,A3,B3,K1,C1,D1


In [54]:
# join by key
left.join(right, on = 'key', how = 'right')

Unnamed: 0,A,B,key,C,D
0,A0,B0,K0,C0,D0
2,A2,B2,K0,C0,D0
1,A1,B1,K1,C1,D1
3,A3,B3,K1,C1,D1


In [55]:
# merge
pd.merge(left, right, how = 'right', left_on = 'key', right_index = True)

Unnamed: 0,A,B,key,C,D
0,A0,B0,K0,C0,D0
2,A2,B2,K0,C0,D0
1,A1,B1,K1,C1,D1
3,A3,B3,K1,C1,D1


---