# Pandas Merge
Join Data frame
- Common Column
- in row
- in column

In [4]:
#libraries
import pandas as pd
import numpy as np

In [20]:
# Create sample data
empID = ['E01','E02', 'E03','E04','E05']
empName = ['Dhiraj', 'Kunal', 'Sushma','Poonam', 'Upen']
gender = ['M'] * 2 + ['F','F','M' ]
print(empID, '\n', empName, '\n', gender)
employee = pd.DataFrame({'empID':empID, 'empName':empName, 'gender':gender}, 
                        columns=['empID', 'empName','gender'])
employee.head()

['E01', 'E02', 'E03', 'E04', 'E05'] 
 ['Dhiraj', 'Kunal', 'Sushma', 'Poonam', 'Upen'] 
 ['M', 'M', 'F', 'F', 'M']


Unnamed: 0,empID,empName,gender
0,E01,Dhiraj,M
1,E02,Kunal,M
2,E03,Sushma,F
3,E04,Poonam,F
4,E05,Upen,M


In [48]:
employee2 = pd.DataFrame({'empID':['E11','E12'], 'empName':['Tanvi', 'Kanika'], 'gender':['F','F']})
employee2

Unnamed: 0,empID,empName,gender
0,E11,Tanvi,F
1,E12,Kanika,F


In [33]:
empID2 = ['E01','E02','E04','E05', 'E07']
course1 = np.random.randint(50,100,5)
course2  = [55, 66, np.nan, 77, 88]
marks1 = pd.DataFrame({'empID':empID2, 'course1':course1, 'course2':course2}, 
                        columns=['empID', 'course1','course2'])
print(marks1)

  empID  course1  course2
0   E01       71     55.0
1   E02       75     66.0
2   E04       55      NaN
3   E05       84     77.0
4   E07       97     88.0


In [34]:
marks2 = pd.DataFrame({'ID':empID2, 'course1':course1, 'course2':course2})
print(marks2)

    ID  course1  course2
0  E01       71     55.0
1  E02       75     66.0
2  E04       55      NaN
3  E05       84     77.0
4  E07       97     88.0


In [53]:
empID2 = ['E01','E02','E04','E05', 'E07']
course3 = np.random.randint(55,100,5)
course4  = np.random.randint(50,100,5)
marks3 = pd.DataFrame({'empID':empID, 'course3':course3, 'course4':course4})
print(marks3)

  empID  course3  course4
0   E01       89       86
1   E02       72       93
2   E03       76       69
3   E04       65       79
4   E05       74       57


## join

### Simple

In [35]:
employee.merge(marks1)

Unnamed: 0,empID,empName,gender,course1,course2
0,E01,Dhiraj,M,71,55.0
1,E02,Kunal,M,75,66.0
2,E04,Poonam,F,55,
3,E05,Upen,M,84,77.0


### Left Join

In [43]:
print(employee.merge(marks1, how='left'))  #all in left
# no marks for E04

  empID empName gender  course1  course2
0   E01  Dhiraj      M     71.0     55.0
1   E02   Kunal      M     75.0     66.0
2   E03  Sushma      F      NaN      NaN
3   E04  Poonam      F     55.0      NaN
4   E05    Upen      M     84.0     77.0


### Left Join with Indicator

In [44]:
print(employee.merge(marks1, how='left', indicator=True))  #all in left
#indicator tells remarks

  empID empName gender  course1  course2     _merge
0   E01  Dhiraj      M     71.0     55.0       both
1   E02   Kunal      M     75.0     66.0       both
2   E03  Sushma      F      NaN      NaN  left_only
3   E04  Poonam      F     55.0      NaN       both
4   E05    Upen      M     84.0     77.0       both


### Right Join 

In [38]:
employee.merge(marks1, how='right')  #all in right 
#no empName for E07

Unnamed: 0,empID,empName,gender,course1,course2
0,E01,Dhiraj,M,71,55.0
1,E02,Kunal,M,75,66.0
2,E04,Poonam,F,55,
3,E05,Upen,M,84,77.0
4,E07,,,97,88.0


## Join with different column names

In [36]:
#employee.merge(marks2)  #error
employee.merge(marks2, left_on = 'empID', right_on = 'ID')

Unnamed: 0,empID,empName,gender,ID,course1,course2
0,E01,Dhiraj,M,E01,71,55.0
1,E02,Kunal,M,E02,75,66.0
2,E04,Poonam,F,E04,55,
3,E05,Upen,M,E05,84,77.0


## Inner Join - Common

In [None]:
print(employee.merge(marks1, how='inner'))  #common only

## Outer Join - All 

In [45]:
print(employee.merge(marks1, how='outer'))  #all from left and right

  empID empName gender  course1  course2
0   E01  Dhiraj      M     71.0     55.0
1   E02   Kunal      M     75.0     66.0
2   E03  Sushma      F      NaN      NaN
3   E04  Poonam      F     55.0      NaN
4   E05    Upen      M     84.0     77.0
5   E07     NaN    NaN     97.0     88.0


## Concatenate 
- horizontal axis=1
- vertical axis=0

In [46]:
pd.concat([employee, marks1], axis=1, join='inner')

Unnamed: 0,empID,empName,gender,empID.1,course1,course2
0,E01,Dhiraj,M,E01,71,55.0
1,E02,Kunal,M,E02,75,66.0
2,E03,Sushma,F,E04,55,
3,E04,Poonam,F,E05,84,77.0
4,E05,Upen,M,E07,97,88.0


In [55]:
pd.concat([employee, marks1, marks3], axis=1, join='inner')

Unnamed: 0,empID,empName,gender,empID.1,course1,course2,empID.2,course3,course4
0,E01,Dhiraj,M,E01,71,55.0,E01,89,86
1,E02,Kunal,M,E02,75,66.0,E02,72,93
2,E03,Sushma,F,E04,55,,E03,76,69
3,E04,Poonam,F,E05,84,77.0,E04,65,79
4,E05,Upen,M,E07,97,88.0,E05,74,57


In [47]:
pd.concat([employee, marks1], axis=1, join='outer')

Unnamed: 0,empID,empName,gender,empID.1,course1,course2
0,E01,Dhiraj,M,E01,71,55.0
1,E02,Kunal,M,E02,75,66.0
2,E03,Sushma,F,E04,55,
3,E04,Poonam,F,E05,84,77.0
4,E05,Upen,M,E07,97,88.0


In [58]:
pd.concat([employee, marks1, marks2, marks3], axis=1) # increase columns
#without any common empID # not much useful unless empID order is same

Unnamed: 0,empID,empName,gender,empID.1,course1,course2,ID,course1.1,course2.1,empID.2,course3,course4
0,E01,Dhiraj,M,E01,71,55.0,E01,71,55.0,E01,89,86
1,E02,Kunal,M,E02,75,66.0,E02,75,66.0,E02,72,93
2,E03,Sushma,F,E04,55,,E04,55,,E03,76,69
3,E04,Poonam,F,E05,84,77.0,E05,84,77.0,E04,65,79
4,E05,Upen,M,E07,97,88.0,E07,97,88.0,E05,74,57


In [60]:
pd.concat([employee, marks3], axis=1) # increase columns
#same empID order

Unnamed: 0,empID,empName,gender,empID.1,course3,course4
0,E01,Dhiraj,M,E01,89,86
1,E02,Kunal,M,E02,72,93
2,E03,Sushma,F,E03,76,69
3,E04,Poonam,F,E04,65,79
4,E05,Upen,M,E05,74,57


In [50]:
pd.concat([employee, employee2], axis=0) # increase rows

Unnamed: 0,empID,empName,gender
0,E01,Dhiraj,M
1,E02,Kunal,M
2,E03,Sushma,F
3,E04,Poonam,F
4,E05,Upen,M
0,E11,Tanvi,F
1,E12,Kanika,F
