In [1]:
import numpy as np
import pandas as pd

### Merging 2 Dataframes

In [2]:
employees = pd.DataFrame({
    'employee_id': [1,2,3,4,5],
    'name': ['Jhon','Anna', 'Peter', 'Linda', 'Bob'],
    'department': ['HR','IT','Finance','IT','HR']
})

salaries = pd.DataFrame({
    'employee_id': [1,2,3,6,7],
    'salary': [60000,80000,65000,70000, 90000],
    'bonus': [5000, 10000, 7000, 8000, 12000]
})

In [3]:
employees

Unnamed: 0,employee_id,name,department
0,1,Jhon,HR
1,2,Anna,IT
2,3,Peter,Finance
3,4,Linda,IT
4,5,Bob,HR


In [4]:
salaries

Unnamed: 0,employee_id,salary,bonus
0,1,60000,5000
1,2,80000,10000
2,3,65000,7000
3,6,70000,8000
4,7,90000,12000


In [5]:
pd.merge(employees, salaries, on='employee_id') # employee_id common column

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,Jhon,HR,60000,5000
1,2,Anna,IT,80000,10000
2,3,Peter,Finance,65000,7000


In [6]:
pd.merge(employees, salaries, on='employee_id', how='inner') # default inner

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,Jhon,HR,60000,5000
1,2,Anna,IT,80000,10000
2,3,Peter,Finance,65000,7000


In [7]:
pd.merge(employees, salaries, on='employee_id', how='outer')

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,Jhon,HR,60000.0,5000.0
1,2,Anna,IT,80000.0,10000.0
2,3,Peter,Finance,65000.0,7000.0
3,4,Linda,IT,,
4,5,Bob,HR,,
5,6,,,70000.0,8000.0
6,7,,,90000.0,12000.0


In [8]:
pd.merge(employees, salaries, on='employee_id', how='left')

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,Jhon,HR,60000.0,5000.0
1,2,Anna,IT,80000.0,10000.0
2,3,Peter,Finance,65000.0,7000.0
3,4,Linda,IT,,
4,5,Bob,HR,,


In [9]:
pd.merge(employees, salaries, on='employee_id', how='right')

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,Jhon,HR,60000,5000
1,2,Anna,IT,80000,10000
2,3,Peter,Finance,65000,7000
3,6,,,70000,8000
4,7,,,90000,12000


### Concatination of 2 Dataframes

In [10]:
df1 = pd.DataFrame({
    'A': ['A0', 'A1', 'A2'],
    'B': ['B0', 'B1', 'B2'],
    'C': ['C0', 'C1', 'C2']
})

df2 = pd.DataFrame({
    'A': ['A3', 'A4', 'A5'],
    'B': ['B3', 'B4', 'B5'],
    'C': ['C3', 'C4', 'C5']
})

In [11]:
pd.concat([df1, df2])

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5


In [12]:
pd.concat([df2,df1], axis=1)

Unnamed: 0,A,B,C,A.1,B.1,C.1
0,A3,B3,C3,A0,B0,C0
1,A4,B4,C4,A1,B1,C1
2,A5,B5,C5,A2,B2,C2


### Joining 2 Dataframes

In [13]:
df1 = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie']
}, index=[1,2,3])

df2 = pd.DataFrame({
    'score': [85, 90, 75]
}, index=[2,3,4])

In [14]:
df1.join(df2)

Unnamed: 0,name,score
1,Alice,
2,Bob,85.0
3,Charlie,90.0


In [15]:
df1.join(df2, how='outer')

Unnamed: 0,name,score
1,Alice,
2,Bob,85.0
3,Charlie,90.0
4,,75.0
