# MERGE DATA FRAME HAVE SAME ELEMENTS IN ONE VARIABLE

Pandas merge connects columns or indexes in DataFrame based on one or more keys

In [1]:
#Import pandas library
import pandas as pd

In [2]:
#Creating datafeame df1
df1 = pd.DataFrame({'ID':[1,2,3,4],
                   'Class':[9,10,11,12]})
df1

Unnamed: 0,ID,Class
0,1,9
1,2,10
2,3,11
3,4,12


In [3]:
#Creating datafeame df2
df2 = pd.DataFrame({'ID':[1,2,3,4],
                   'Name':['A', 'B', 'C', 'D']})
df2

Unnamed: 0,ID,Name
0,1,A
1,2,B
2,3,C
3,4,D


In [4]:
#Merging two data frame
pd.merge(df1,df2)

Unnamed: 0,ID,Class,Name
0,1,9,A
1,2,10,B
2,3,11,C
3,4,12,D


In [5]:
#Merging two data frame with key value
pd.merge(df1,df2, on = 'ID')

Unnamed: 0,ID,Class,Name
0,1,9,A
1,2,10,B
2,3,11,C
3,4,12,D


In [6]:
#Merging two data frame with key value
pd.merge(df2,df1, on = 'ID')
#All the elements in ID data in df1 and d2 have same hence merge has been done successful to all the elements.

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12


# MERGE DATA FRAME HAVE FEW DIFFERENT ELEMENTS IN ONE VARIABLE

In [6]:
#Creating datafeame df2
df3 = pd.DataFrame({'ID':[1,2,3,5],
                   'Name':['A', 'B', 'C', 'D']})
df3

Unnamed: 0,ID,Name
0,1,A
1,2,B
2,3,C
3,5,D


In [7]:
#Merging two data frame
pd.merge(df1,df3)
#ID number in df1 and df3 are different hence merge has not been done only for that data

Unnamed: 0,ID,Class,Name
0,1,9,A
1,2,10,B
2,3,11,C


# MERGE DATA WITH HOW PARAMETER

In [10]:
#Merging the data for df1 and df3 with inner
pd.merge(df1,df3,on = 'ID', how = 'inner')
#Inner will remove the mismatch data

Unnamed: 0,ID,Class,Name
0,1,9,A
1,2,10,B
2,3,11,C


In [11]:
#Merging the data for df1 and df3 with outer
pd.merge(df1,df3,on = 'ID', how = 'outer')
#outer will show all match and mismatch data

Unnamed: 0,ID,Class,Name
0,1,9.0,A
1,2,10.0,B
2,3,11.0,C
3,4,12.0,
4,5,,D


In [12]:
#Merging the data for df1 and df3 with left
pd.merge(df1,df3,on = 'ID', how = 'left')

Unnamed: 0,ID,Class,Name
0,1,9,A
1,2,10,B
2,3,11,C
3,4,12,


In [13]:
#Merging the data for df1 and df3 with right
pd.merge(df1,df3,on = 'ID', how = 'right')

Unnamed: 0,ID,Class,Name
0,1,9.0,A
1,2,10.0,B
2,3,11.0,C
3,5,,D


# MERGE DATA WITH INDICATOR PARAMETER

In [14]:
#Merging the data for df1 and df3 with right
pd.merge(df1,df3,on = 'ID', how = 'outer', indicator = True)

Unnamed: 0,ID,Class,Name,_merge
0,1,9.0,A,both
1,2,10.0,B,both
2,3,11.0,C,both
3,4,12.0,,left_only
4,5,,D,right_only


# MERGE DATA WITH INDEX PARAMETER

In [15]:
#Creating datafeame df4
df4 = pd.DataFrame({'ID':[5,6,7,8],
                   'Name':['A', 'B', 'C', 'D']})
df4

Unnamed: 0,ID,Name
0,5,A
1,6,B
2,7,C
3,8,D


In [16]:
#Merging the data for df1 and df4
pd.merge(df1,df4, left_index = True, right_index = True)

Unnamed: 0,ID_x,Class,ID_y,Name
0,1,9,5,A
1,2,10,6,B
2,3,11,7,C
3,4,12,8,D


# MERGE DATA WITH SUFFIXES PARAMETER (WHEN BOTH THE DATA HAVE SAME DATA SET)

In [18]:
#Creating datafeame df5
df5 = pd.DataFrame({'ID':[1,2,3,4],
                   'Class':[9,10,11,12]})
df5

Unnamed: 0,ID,Class
0,1,9
1,2,10
2,3,11
3,4,12


In [20]:
#Merging the data for df1 and df4
pd.merge(df1,df5,on = 'ID') #by default it takes suffix x and y

Unnamed: 0,ID,Class_Higher,Class_Middle
0,1,9,9
1,2,10,10
2,3,11,11
3,4,12,12


In [22]:
#Merging the data for df1 and df4
pd.merge(df1,df5,on = 'ID', suffixes = ('_Higher', '_Middle') ) #Adding suffix as Higher and Middler

Unnamed: 0,ID,Class_Higher,Class_Middle
0,1,9,9
1,2,10,10
2,3,11,11
3,4,12,12
