# Python Pandas Tutorial 5 - Concatenation and Merging

## CONCAT

In [45]:
import pandas as pd

In [47]:
one = pd.DataFrame({
    'Name': ['Ram','Mohan','David'],
    'Age': [27,29,30],
    'Income': [70000,90000,61000]
})

In [48]:
two  = pd.DataFrame({
    'Name': ['Rajesh','Ganesh','Suraj'],
    'Age': [24,39,32],
    'Income': [73000,60000,71000]
})

In [49]:
df1 = pd.concat([one,two])
df1

Unnamed: 0,Name,Age,Income
0,Ram,27,70000
1,Mohan,29,90000
2,David,30,61000
0,Rajesh,24,73000
1,Ganesh,39,60000
2,Suraj,32,71000


In [50]:
df2 = pd.concat([one,two],ignore_index=True)
df2

Unnamed: 0,Name,Age,Income
0,Ram,27,70000
1,Mohan,29,90000
2,David,30,61000
3,Rajesh,24,73000
4,Ganesh,39,60000
5,Suraj,32,71000


In [51]:
df3 = pd.concat([one,two],ignore_index=True,axis=1)
df3

Unnamed: 0,0,1,2,3,4,5
0,Ram,27,70000,Rajesh,24,73000
1,Mohan,29,90000,Ganesh,39,60000
2,David,30,61000,Suraj,32,71000


In [52]:
df3 = pd.concat([one,two],axis=1)
df3

Unnamed: 0,Name,Age,Income,Name.1,Age.1,Income.1
0,Ram,27,70000,Rajesh,24,73000
1,Mohan,29,90000,Ganesh,39,60000
2,David,30,61000,Suraj,32,71000


In [53]:
df3 = pd.concat([one,two],keys=['X','Y'])
df3

Unnamed: 0,Unnamed: 1,Name,Age,Income
X,0,Ram,27,70000
X,1,Mohan,29,90000
X,2,David,30,61000
Y,0,Rajesh,24,73000
Y,1,Ganesh,39,60000
Y,2,Suraj,32,71000


In [54]:
df3.loc['X']

Unnamed: 0,Name,Age,Income
0,Ram,27,70000
1,Mohan,29,90000
2,David,30,61000


In [55]:
df3.loc['Y']

Unnamed: 0,Name,Age,Income
0,Rajesh,24,73000
1,Ganesh,39,60000
2,Suraj,32,71000


## Merge

In [34]:
one = pd.DataFrame({
    'id':[1,2,3], 
    'Name': ['Ram','Mohan','David'],
    'Income': [70000,90000,61000],
    'subject_id':['sub1','sub2','sub4']
})

In [36]:
two  = pd.DataFrame({
    'id':[1,2,3], 
    'Name': ['Rajesh','Mohan','Suraj'],
    'Income': [73000,60000,71000],
    'subject_id':['sub1','sub2','sub5']
}) 

In [38]:
df1= pd.merge(one,two, on='id')
df1

Unnamed: 0,id,Name_x,Income_x,subject_id_x,Name_y,Income_y,subject_id_y
0,1,Ram,70000,sub1,Rajesh,73000,sub1
1,2,Mohan,90000,sub2,Mohan,60000,sub2
2,3,David,61000,sub4,Suraj,71000,sub5


In [41]:
df2= pd.merge(one,two, on=['id','subject_id'])
df2

Unnamed: 0,id,Name_x,Income_x,subject_id,Name_y,Income_y
0,1,Ram,70000,sub1,Rajesh,73000
1,2,Mohan,90000,sub2,Mohan,60000


In [43]:
df3= pd.merge(one,two, on=['subject_id'], how='left')
df3

Unnamed: 0,id_x,Name_x,Income_x,subject_id,id_y,Name_y,Income_y
0,1,Ram,70000,sub1,1.0,Rajesh,73000.0
1,2,Mohan,90000,sub2,2.0,Mohan,60000.0
2,3,David,61000,sub4,,,


In [44]:
df4= pd.merge(one,two, on=['subject_id'], how='right')
df4

Unnamed: 0,id_x,Name_x,Income_x,subject_id,id_y,Name_y,Income_y
0,1.0,Ram,70000.0,sub1,1,Rajesh,73000
1,2.0,Mohan,90000.0,sub2,2,Mohan,60000
2,,,,sub5,3,Suraj,71000
