In [5]:
import pandas as pd

# merge() — SQL-style joins (inner, left, right, outer)
    merge() combines two DataFrames using one or more columns like SQL joins. You choose how=:
    inner, left, right, outer.
        (1) Inner Join:
         - Returns only the rows with keys that are present in both DataFrames.
        (2) Left Join:
         - Returns all rows from the left DataFrame and the matched rows from the
         right DataFrame.
         - Unmatched rows will have NaN for columns from the right DataFrame.
        (3) Right Join:
         - Returns all rows from the right DataFrame and the matched rows from the left DataFrame.
         - Unmatched rows will have NaN for columns from the left DataFrame.
        (4) Outer Join:
         - Returns all rows when there is a match in either left or right DataFrame.
         - Unmatched rows will have NaN for columns from the other DataFrame.

# Difference between merge() and join()
    - merge() is column-based and more flexible.
    - It behaves like SQL joins. Use on=, left_on=, right_on=. It handles many-to-one and
     many-to-many.
    - join() is index-based by default and is a method on DataFrame.
    - It is short and convenient when your keys are already indexes.
    - You can do df1.merge(df2, left_index=True, right_index=True) to get the same as df1.join
    (df2).
    - merge() has more options like validate=, indicator=, and fine control over suffixes.

In [6]:
customers = pd.DataFrame({
    'CustomerID': [1, 2, 3, 4],
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana'],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston']
})

orders = pd.DataFrame({
    'OrderID': [101, 102, 103, 104, 105],
    'CustomerID': [1, 2, 2, 5, 3],   # note CustomerID 5 does not exist in customers
    'Product': ['Laptop', 'Phone', 'Tablet', 'Monitor', 'Mouse'],
    'Amount': [1200, 800, 450, 300, 50]
})

In [7]:
# Inner join (only matching keys)
pd.merge(orders, customers, on='CustomerID', how='inner')
# Explanation: rows with CustomerID=5 were dropped because there is no matching customer.

Unnamed: 0,OrderID,CustomerID,Product,Amount,Name,City
0,101,1,Laptop,1200,Alice,New York
1,102,2,Phone,800,Bob,Los Angeles
2,103,2,Tablet,450,Bob,Los Angeles
3,105,3,Mouse,50,Charlie,Chicago


In [8]:
# Left join (all left rows, match where possible)
# Left = keep all orders rows, add customer info if exists.
pd.merge(orders, customers, on='CustomerID', how='left')
# Explanation: OrderID 104 kept, but customer info is NaN because CustomerID=5 not in customers.

Unnamed: 0,OrderID,CustomerID,Product,Amount,Name,City
0,101,1,Laptop,1200,Alice,New York
1,102,2,Phone,800,Bob,Los Angeles
2,103,2,Tablet,450,Bob,Los Angeles
3,104,5,Monitor,300,,
4,105,3,Mouse,50,Charlie,Chicago


In [9]:
# Right join (all right rows, match where possible)
# Right = keep all customers rows, include order info if exists.
pd.merge(orders, customers, on='CustomerID', how='right')

# Explanation: Diana exists in customers but has no orders. Her order columns are NaN.

Unnamed: 0,OrderID,CustomerID,Product,Amount,Name,City
0,101.0,1,Laptop,1200.0,Alice,New York
1,102.0,2,Phone,800.0,Bob,Los Angeles
2,103.0,2,Tablet,450.0,Bob,Los Angeles
3,105.0,3,Mouse,50.0,Charlie,Chicago
4,,4,,,Diana,Houston


In [10]:
# Outer join (all rows from both sides)
pd.merge(orders, customers, on='CustomerID', how='outer')
# Explanation: includes both the unmatched order (CustomerID 5) and unmatched customer (CustomerID 4).

Unnamed: 0,OrderID,CustomerID,Product,Amount,Name,City
0,101.0,1,Laptop,1200.0,Alice,New York
1,102.0,2,Phone,800.0,Bob,Los Angeles
2,103.0,2,Tablet,450.0,Bob,Los Angeles
3,105.0,3,Mouse,50.0,Charlie,Chicago
4,,4,,,Diana,Houston
5,104.0,5,Monitor,300.0,,
