In [4]:
import pandas as pd
import numpy as np

In [5]:
df1 = pd.DataFrame({
    'key': ['a', 'b', 'c', 'd', 'e'],
    'A1': range(5),
    'B1': range(5, 10)
})
# the keys will be column header and the values will be the items of corresponding column header

df2 = pd.DataFrame({
    'key': ['a', 'b', 'c'],
    'A2': range(3),
    'B2': range(3, 6)
})

In [6]:
df1

Unnamed: 0,key,A1,B1
0,a,0,5
1,b,1,6
2,c,2,7
3,d,3,8
4,e,4,9


In [7]:
df2

Unnamed: 0,key,A2,B2
0,a,0,3
1,b,1,4
2,c,2,5


In [8]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   key     5 non-null      object
 1   A1      5 non-null      int64 
 2   B1      5 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 252.0+ bytes


In [9]:
pd.merge(df1, df2, how = 'inner', on = 'key') # to see all key

Unnamed: 0,key,A1,B1,A2,B2
0,a,0,5,0,3
1,b,1,6,1,4
2,c,2,7,2,5


In [12]:
pd.merge(df1, df2, how = 'inner', on = 'key')[['key', 'A1', 'A2']] 
# to see specific key

Unnamed: 0,key,A1,A2
0,a,0,0
1,b,1,1
2,c,2,2


In [15]:
type(np.nan)

float

In [16]:
pd.merge(df1, df2, how = 'left', on = 'key')

# every cell's value will be of same data type in a dataframe 
# data type of "NaN" is float, thats why A2, B2 values are float 
# array contains same data type

Unnamed: 0,key,A1,B1,A2,B2
0,a,0,5,0.0,3.0
1,b,1,6,1.0,4.0
2,c,2,7,2.0,5.0
3,d,3,8,,
4,e,4,9,,


In [17]:
df3 = pd.DataFrame({
    'key': ['a', 'b', 'c'],
    'A3': ['x', 'y', 'z'],
    'B3': ['p', 'q', 'r']
})
df3

Unnamed: 0,key,A3,B3
0,a,x,p
1,b,y,q
2,c,z,r


In [18]:
df4 = pd.merge(df1, df3, how = 'left', on = 'key')
df4

Unnamed: 0,key,A1,B1,A3,B3
0,a,0,5,x,p
1,b,1,6,y,q
2,c,2,7,z,r
3,d,3,8,,
4,e,4,9,,


In [19]:
df4.info()

# here, null will become string (object)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   key     5 non-null      object
 1   A1      5 non-null      int64 
 2   B1      5 non-null      int64 
 3   A3      3 non-null      object
 4   B3      3 non-null      object
dtypes: int64(2), object(3)
memory usage: 332.0+ bytes


In [20]:
pd.merge(df1, df2, how = 'right', on = 'key')

Unnamed: 0,key,A1,B1,A2,B2
0,a,0,5,0,3
1,b,1,6,1,4
2,c,2,7,2,5


In [21]:
pd.merge(df1, df2, how = 'outer', on = 'key')

Unnamed: 0,key,A1,B1,A2,B2
0,a,0,5,0.0,3.0
1,b,1,6,1.0,4.0
2,c,2,7,2.0,5.0
3,d,3,8,,
4,e,4,9,,


In [22]:
left = pd.DataFrame(
    {
        "key1": ["a", "a", "b", "c"],
        "key2": ["a", "b", "a", "b"],
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"]
    }
)

right = pd.DataFrame(
    {
        "key1": ["a", "b", "b", "c"],
        "key2": ["a", "b", "a", "a"],
        "C": ["C0", "C1", "C2", "C3"],
        "D": ["D0", "D1", "D2", "D3"]
    }
)

In [23]:
left

Unnamed: 0,key1,key2,A,B
0,a,a,A0,B0
1,a,b,A1,B1
2,b,a,A2,B2
3,c,b,A3,B3


In [24]:
right

Unnamed: 0,key1,key2,C,D
0,a,a,C0,D0
1,b,b,C1,D1
2,b,a,C2,D2
3,c,a,C3,D3


In [26]:
pd.merge(left, right, how = 'inner', on = ['key1', 'key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,a,a,A0,B0,C0,D0
1,b,a,A2,B2,C2,D2


In [27]:
pd.merge(left, right, how = 'outer', on = ['key1', 'key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,a,a,A0,B0,C0,D0
1,a,b,A1,B1,,
2,b,a,A2,B2,C2,D2
3,b,b,,,C1,D1
4,c,a,,,C3,D3
5,c,b,A3,B3,,


In [28]:
pd.merge(left, right, how = 'left', on = ['key1', 'key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,a,a,A0,B0,C0,D0
1,a,b,A1,B1,,
2,b,a,A2,B2,C2,D2
3,c,b,A3,B3,,


In [29]:
pd.merge(left, right, how = 'right', on = ['key1', 'key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,a,a,A0,B0,C0,D0
1,b,b,,,C1,D1
2,b,a,A2,B2,C2,D2
3,c,a,,,C3,D3


In [36]:
df5 = pd.DataFrame(
    {
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"],
        "C": ["C0", "C1", "C2", "C3"],
        "D": ["D0", "D1", "D2", "D3"],
    },
    index=[0, 1, 2, 3],  # providing index no
)

df5

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


In [37]:
df6 = pd.DataFrame(
    {
        "A": ["A4", "A5", "A6", "A7"],
        "B": ["B4", "B5", "B6", "B7"],
        "C": ["C4", "C5", "C6", "C7"],
        "D": ["D4", "D5", "D6", "D7"],
    },
    index=[4, 5, 6, 7], # keys are the same, so we can do concatetion 
)

df6

Unnamed: 0,A,B,C,D
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [38]:
pd.concat([df5, df6])

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7
