In [1]:
# Concatenation -> pasting the two DataFrames together

In [2]:
import numpy as np
import pandas as pd

In [3]:
data_one={'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']}

In [4]:
one=pd.DataFrame(data_one)

In [5]:
one

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


In [6]:
two=pd.DataFrame({'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})

In [7]:
two

Unnamed: 0,C,D
0,C0,D0
1,C1,D1
2,C2,D2
3,C3,D3


In [8]:
pd.concat([one,two],axis=0,ignore_index=False)

Unnamed: 0,A,B,C,D
0,A0,B0,,
1,A1,B1,,
2,A2,B2,,
3,A3,B3,,
0,,,C0,D0
1,,,C1,D1
2,,,C2,D2
3,,,C3,D3


In [9]:
#  assume both tables are of same features,so join them
two.columns

Index(['C', 'D'], dtype='object')

In [10]:
two.columns=one.columns

In [11]:
two

Unnamed: 0,A,B
0,C0,D0
1,C1,D1
2,C2,D2
3,C3,D3


In [12]:
mydf=pd.concat([one,two],axis=0)

In [13]:
mydf.index=range(len(mydf))

In [14]:
mydf

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3
4,C0,D0
5,C1,D1
6,C2,D2
7,C3,D3


## merge

In [15]:
registerations=pd.DataFrame({'reg_id':[1,2,3,4],'name':['Gautam','Pankaj','Nikhil','Jatin']})

In [16]:
logins=pd.DataFrame({'login_id':[1,2,3,4],'name':['Aditya','Gautam','Vishal','Nikhil']})

In [17]:
# inner merge/join
pd.merge(registerations,logins,how='inner',on='name')

Unnamed: 0,reg_id,name,login_id
0,1,Gautam,2
1,3,Nikhil,4


In [18]:
# left merge
pd.merge(left=registerations,right=logins,how='left',on='name')

Unnamed: 0,reg_id,name,login_id
0,1,Gautam,2.0
1,2,Pankaj,
2,3,Nikhil,4.0
3,4,Jatin,


In [19]:
#  right merge
pd.merge(left=registerations,right=logins,how='right')

Unnamed: 0,reg_id,name,login_id
0,,Aditya,1
1,1.0,Gautam,2
2,,Vishal,3
3,3.0,Nikhil,4


In [20]:
# outer merge
pd.merge(left=registerations,right=logins,how='outer',on='name')

Unnamed: 0,reg_id,name,login_id
0,1.0,Gautam,2.0
1,2.0,Pankaj,
2,3.0,Nikhil,4.0
3,4.0,Jatin,
4,,Aditya,1.0
5,,Vishal,3.0


## merging based on index

In [21]:
registerations=registerations.set_index('name')

In [22]:
registerations

Unnamed: 0_level_0,reg_id
name,Unnamed: 1_level_1
Gautam,1
Pankaj,2
Nikhil,3
Jatin,4


In [23]:
logins

Unnamed: 0,login_id,name
0,1,Aditya
1,2,Gautam
2,3,Vishal
3,4,Nikhil


In [24]:
pd.merge(registerations,logins,left_index=True,right_on='name',how='inner')  # _on -> column   _index -> row   

Unnamed: 0,reg_id,login_id,name
1,1,2,Gautam
3,3,4,Nikhil


In [25]:
# merging tables with different column name
registerations=registerations.reset_index()

In [27]:
registerations

Unnamed: 0,name,reg_id
0,Gautam,1
1,Pankaj,2
2,Nikhil,3
3,Jatin,4


In [28]:
registerations.columns=['reg_name','reg_id']

In [29]:
registerations

Unnamed: 0,reg_name,reg_id
0,Gautam,1
1,Pankaj,2
2,Nikhil,3
3,Jatin,4


In [30]:
logins

Unnamed: 0,login_id,name
0,1,Aditya
1,2,Gautam
2,3,Vishal
3,4,Nikhil


In [32]:
results=pd.merge(registerations,logins,how='inner',left_on='reg_name',right_on='name')

In [33]:
results.drop('reg_name',axis=1)

Unnamed: 0,reg_id,login_id,name
0,1,2,Gautam
1,3,4,Nikhil


In [34]:
#  merging tables with all same column names
registerations.columns=['name','id']
logins.columns=['id','name']

In [35]:
registerations

Unnamed: 0,name,id
0,Gautam,1
1,Pankaj,2
2,Nikhil,3
3,Jatin,4


In [36]:
logins

Unnamed: 0,id,name
0,1,Aditya
1,2,Gautam
2,3,Vishal
3,4,Nikhil


In [39]:
pd.merge(registerations,logins,how='inner',on='name') # automatically seperate same name columns
# pandas automatically add suffixes '_x' for left '_y' for right 

Unnamed: 0,name,id_x,id_y
0,Gautam,1,2
1,Nikhil,3,4


In [41]:
# we can add Custom suffixes 
pd.merge(registerations,logins,how='inner',on='name',suffixes=('_reg','_log'))

Unnamed: 0,name,id_reg,id_log
0,Gautam,1,2
1,Nikhil,3,4
