# Merging and Joining
---
Often data are split across multiple table or files. Pandas allow you to combine them just like SQL

In [2]:
import pandas as pd

In [3]:
employees = pd.DataFrame({
"EmpID": [1, 2, 3],
"Name": ["Alice", "Bob", "Charlie"],
"DeptID": [10, 20, 30]
})


In [5]:
employees

Unnamed: 0,EmpID,Name,DeptID
0,1,Alice,10
1,2,Bob,20
2,3,Charlie,30


In [6]:
departments = pd.DataFrame({
"DeptID": [10, 20, 40],
"DeptName": ["HR", "Engineering", "Marketing"]
})

In [8]:
departments

Unnamed: 0,DeptID,DeptName
0,10,HR
1,20,Engineering
2,40,Marketing


# Merge Like SQL
---
**Inner Join** - Returns only Matching Department IDs

In [10]:
pd.merge(employees, departments, on="DeptID")

Unnamed: 0,EmpID,Name,DeptID,DeptName
0,1,Alice,10,HR
1,2,Bob,20,Engineering


**Left Join** - Keep all the employees fill NaN where no match

In [17]:
pd.merge(employees, departments, on="DeptID", how="left")

Unnamed: 0,EmpID,Name,DeptID,DeptName
0,1,Alice,10,HR
1,2,Bob,20,Engineering
2,3,Charlie,30,


**Right Join**- Keep all the department even if there is no employee

In [19]:
pd.merge(employees, departments, on="DeptID", how="right")

Unnamed: 0,EmpID,Name,DeptID,DeptName
0,1.0,Alice,10,HR
1,2.0,Bob,20,Engineering
2,,,40,Marketing


**Outer Join** - Includes all data and fill missing value with NaN

In [21]:
pd.merge(employees, departments, on="DeptID", how="outer")

Unnamed: 0,EmpID,Name,DeptID,DeptName
0,1.0,Alice,10,HR
1,2.0,Bob,20,Engineering
2,3.0,Charlie,30,
3,,,40,Marketing


# Concatenating DataFrames 
---
Use pd.concat() to stack datasets either vertically or horizontally.

In [22]:
df1 = pd.DataFrame({"Name": ["Alice", "Bob"]})
df1

Unnamed: 0,Name
0,Alice
1,Bob


In [24]:
df2 = pd.DataFrame({"Name": ["Charlie", "David"]})
df2

Unnamed: 0,Name
0,Charlie
1,David
