In [1]:
import numpy as np
import pandas as pd

In [3]:
employees = pd.DataFrame({
    'employee_id':[1,2,3,4,5],
    'name':['john','anna','peter','linda','bob'],
    'department':['HR','IT','Finance','IT','HR']
})
salaries = pd.DataFrame({
    'employee_id':[1,2,3,6,7],
    'salary':[60000,80000,65000,70000,90000],
    'bonus':[5000,10000,7000,8000,12000]
})

In [4]:
employees

Unnamed: 0,employee_id,name,department
0,1,john,HR
1,2,anna,IT
2,3,peter,Finance
3,4,linda,IT
4,5,bob,HR


In [5]:
salaries

Unnamed: 0,employee_id,salary,bonus
0,1,60000,5000
1,2,80000,10000
2,3,65000,7000
3,6,70000,8000
4,7,90000,12000


### Merging DataFrames

In [6]:
# merge on basis of employee_id
# inner merge means considering only common values
pd.merge(employees,salaries,on = "employee_id", how = "inner")

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,john,HR,60000,5000
1,2,anna,IT,80000,10000
2,3,peter,Finance,65000,7000


In [7]:
# outer means considering the column only => uncommon values will be present
pd.merge(employees,salaries,on = "employee_id", how = "outer")

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,john,HR,60000.0,5000.0
1,2,anna,IT,80000.0,10000.0
2,3,peter,Finance,65000.0,7000.0
3,4,linda,IT,,
4,5,bob,HR,,
5,6,,,70000.0,8000.0
6,7,,,90000.0,12000.0


In [8]:
# basis on left column means employees => all rows of it will exist
pd.merge(employees,salaries,on = "employee_id", how = "left")

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,john,HR,60000.0,5000.0
1,2,anna,IT,80000.0,10000.0
2,3,peter,Finance,65000.0,7000.0
3,4,linda,IT,,
4,5,bob,HR,,


In [9]:
# basis on right column means salaries => all rows of it will exist
pd.merge(employees,salaries,on = "employee_id", how = "right")

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,john,HR,60000,5000
1,2,anna,IT,80000,10000
2,3,peter,Finance,65000,7000
3,6,,,70000,8000
4,7,,,90000,12000


### Concatenation of 2 Dataframes

In [11]:
df1 = pd.DataFrame({
    'A':['A0','A1','A2'],
    'B':['B0','B1','B2'],
    'C':['C0','C1','C2'],
})

df2 = pd.DataFrame({
    'A':['A3','A4','A5'],
    'B':['B3','B4','B5'],
    'C':['C3','C4','C5'],
})

In [12]:
df1

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [13]:
df2

Unnamed: 0,A,B,C
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5


In [17]:
# concatenating on the basis of columns
pd.concat([df1,df2,])

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5


In [18]:
pd.concat([df2,df1])

Unnamed: 0,A,B,C
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [19]:
# concatenating on the basis of rows
pd.concat([df1,df2],axis = 1)

Unnamed: 0,A,B,C,A.1,B.1,C.1
0,A0,B0,C0,A3,B3,C3
1,A1,B1,C1,A4,B4,C4
2,A2,B2,C2,A5,B5,C5


### Joining 2 Data Frames

In [20]:
df1 = pd.DataFrame({
    'name':['Alice','Bob','Charlie'],
},index = [1,2,3])

df2 = pd.DataFrame({
    'score':[85,90,75],    
},index = [2,3,4])

In [21]:
df1

Unnamed: 0,name
1,Alice
2,Bob
3,Charlie


In [22]:
df2

Unnamed: 0,score
2,85
3,90
4,75


In [23]:
df1.join(df2,how = 'inner')

Unnamed: 0,name,score
2,Bob,85
3,Charlie,90


In [24]:
df1.join(df2,how = 'outer')

Unnamed: 0,name,score
1,Alice,
2,Bob,85.0
3,Charlie,90.0
4,,75.0


In [25]:
df2.join(df1,)

Unnamed: 0,score,name
2,85,Bob
3,90,Charlie
4,75,
