In [63]:
import numpy as np
import pandas as pd

### Concatenning on the basis of rows.

In [64]:
df1 = pd.DataFrame(

    [
        ['A0', 'B0', 'C0', 'D0'],
        ['A1', 'B1', 'C1', 'D1'],
        ['A2', 'B2', 'C2', 'D2'],
        ['A3', 'B3', 'C3', 'D3']
    ],
    
    columns={'A', 'B', 'C', 'D'}

)
df2 = pd.DataFrame(

    [
        ['A4', 'B4', 'C4', 'D4'],
        ['A5', 'B5', 'C5', 'D5'],
        ['A6', 'B6', 'C6', 'D6'],
        ['A7', 'B7', 'C7', 'D7']
    ],
    
    columns={'A', 'B', 'C', 'D'}

)

In [65]:
df1

Unnamed: 0,B,D,C,A
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


In [66]:
df2

Unnamed: 0,B,D,C,A
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7


In [67]:
# Here axis should be zero.
pd.concat([df1, df2], axis=0)

Unnamed: 0,B,D,C,A
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7


### Concatenning on the basis of columns.

In [68]:
df1 = pd.DataFrame(

    [
        ['A0', 'B0'],
        ['A1', 'B1'],
        ['A2', 'B2'],
        ['A3', 'B3']
    ],
    
    columns={'A', 'B'}

)
df2 = pd.DataFrame(

    [
        ['C4', 'D4'],
        ['C5', 'D5'],
        ['C6', 'D6'],
        ['C7', 'D7']
    ],
    
    columns={'C', 'D'}
)

In [69]:
df1

Unnamed: 0,B,A
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


In [70]:
df2

Unnamed: 0,D,C
0,C4,D4
1,C5,D5
2,C6,D6
3,C7,D7


In [71]:
# Since we want concatenate on the basis of columns that's axis is equals to 1.
pd.concat([df1, df2], axis=1)

Unnamed: 0,B,A,D,C
0,A0,B0,C4,D4
1,A1,B1,C5,D5
2,A2,B2,C6,D6
3,A3,B3,C7,D7


In [72]:
# If we try to concatenate on the basis of row NaN will be assigned to empty values. 
pd.concat([df1, df2], axis=0)

Unnamed: 0,B,A,D,C
0,A0,B0,,
1,A1,B1,,
2,A2,B2,,
3,A3,B3,,
0,,,C4,D4
1,,,C5,D5
2,,,C6,D6
3,,,C7,D7


In [73]:
# But we can achieve this by changing the name of columns
df2.columns=df1.columns
df2

Unnamed: 0,B,A
0,C4,D4
1,C5,D5
2,C6,D6
3,C7,D7


In [74]:
# Now cpncatenate will work as expected.
# The index remains same.
pd.concat([df1, df2], axis=0)

Unnamed: 0,B,A
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3
0,C4,D4
1,C5,D5
2,C6,D6
3,C7,D7


## Merge

In [75]:
regs=pd.DataFrame({
    'reg_id':[1,2,3,4],
    'name':['Andrew', 'Bob', 'Clair', 'Ayush']
})
regs

Unnamed: 0,reg_id,name
0,1,Andrew
1,2,Bob
2,3,Clair
3,4,Ayush


In [76]:
logs=pd.DataFrame({
    'reg_id':[1,2,3,4],
    'name':['Xavier', 'Andrew', 'Yolanda', 'Bob']
})
logs

Unnamed: 0,reg_id,name
0,1,Xavier
1,2,Andrew
2,3,Yolanda
3,4,Bob


### Inner Merge

In [77]:
pd.merge(regs, logs, how='inner', on='name')

Unnamed: 0,reg_id_x,name,reg_id_y
0,1,Andrew,2
1,2,Bob,4


### Left Merge

In [78]:
# all left rows are present.
pd.merge(regs, logs, how='left', on='name')

Unnamed: 0,reg_id_x,name,reg_id_y
0,1,Andrew,2.0
1,2,Bob,4.0
2,3,Clair,
3,4,Ayush,


### Right Merge

In [79]:
# all right rows are present.
pd.merge(regs, logs, how='right', on='name')

Unnamed: 0,reg_id_x,name,reg_id_y
0,,Xavier,1
1,1.0,Andrew,2
2,,Yolanda,3
3,2.0,Bob,4


### Outer Merge

In [80]:
# all names are uniquely present.
pd.merge(regs, logs, how='outer', on='name')

Unnamed: 0,reg_id_x,name,reg_id_y
0,1.0,Andrew,2.0
1,2.0,Bob,4.0
2,3.0,Clair,
3,4.0,Ayush,
4,,Xavier,1.0
5,,Yolanda,3.0


In [81]:
regs=regs.set_index('name')
regs

Unnamed: 0_level_0,reg_id
name,Unnamed: 1_level_1
Andrew,1
Bob,2
Clair,3
Ayush,4


In [82]:
# Using index with column
pd.merge(regs, logs, left_index=True, right_on='name')

Unnamed: 0,reg_id_x,reg_id_y,name
1,1,2,Andrew
3,2,4,Bob


In [83]:
regs.reset_index()

Unnamed: 0,name,reg_id
0,Andrew,1
1,Bob,2
2,Clair,3
3,Ayush,4
