In [1]:
import pandas as pd

# join() — index-based joining
    join() :
        - is a DataFrame method that aligns on the index by default.
        - It is convenient when your datasets use the index as the key.
        - Under the hood df1.join(df2) is similar to pd.merge(df1, df2, left_index=True,
         right_index=True)

In [2]:
customers = pd.DataFrame({
    'CustomerID': [1, 2, 3, 4],
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana'],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston']
})

df_customers = customers.set_index('CustomerID')

balances = pd.DataFrame({
    'Balance': [150, 80, 200]
}, index=[2, 3, 5])   # index 5 not in df_customers

In [3]:
# Default join() (left join)
df_customers.join(balances)
# Explanation: by default join() does left join, keeping all rows of df_customers. Index 5 from balances is not included.

Unnamed: 0_level_0,Name,City,Balance
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Alice,New York,
2,Bob,Los Angeles,150.0
3,Charlie,Chicago,80.0
4,Diana,Houston,


In [4]:
# Outer join with join
df_customers.join(balances, how='outer')

Unnamed: 0,Name,City,Balance
1,Alice,New York,
2,Bob,Los Angeles,150.0
3,Charlie,Chicago,80.0
4,Diana,Houston,
5,,,200.0


In [5]:


# 1. Customers DataFrame
customers_data = {
    'CustomerID': [101, 102, 103, 104, 105, 106],
    'CustomerName': ['Aarav Patel', 'Priya Sharma', 'Rohan Mehta', 'Sneha Desai', 'Vikram Singh', 'Anjali Gupta'],
    'Country': ['India', 'USA', 'India', 'UK', 'USA', 'Canada']
}
customers_df = pd.DataFrame(customers_data).set_index('CustomerID')

# 2. Orders DataFrame (Note: Customer 106 has no orders)
orders_data = {
    'OrderID': ['ORD01', 'ORD02', 'ORD03', 'ORD04', 'ORD05'],
    'CustomerID': [101, 102, 101, 103, 105],
    'OrderDate': pd.to_datetime(['2025-05-15', '2025-05-18', '2025-06-01', '2025-06-05', '2025-06-10']),
    'Amount': [5000, 8500, 3200, 12000, 7800]
}
orders_df = pd.DataFrame(orders_data).set_index('CustomerID')

# 3. SalesTargets DataFrame (Note: Customers 103, 106 have no target)
sales_targets_data = {
    'CustomerID': [101, 102, 104, 105],
    'Year': [2025, 2025, 2025, 2025],
    'TargetAmount': [50000, 75000, 60000, 80000]
}
sales_targets_df = pd.DataFrame(sales_targets_data).set_index('CustomerID')


## Practice Question
    Easy: Join all customer information with their order details.

    Medium: Show information for only those customers who have placed an order.

    Medium: Create a consolidated list of all customers and their sales targets, showing all records even if there isn't a match.

In [17]:
# Hard: Create a single DataFrame where the customer's name, country, their order amount, and their sales target are all in the same row.
final_result=(
    customers_df.join(orders_df,how='left')
    .groupby(['CustomerID','CustomerName','Country'])['Amount'].sum().reset_index().set_index('CustomerID')
    .join(sales_targets_df,how='inner')
)
print(final_result[['CustomerName','Country','Amount','TargetAmount']])

            CustomerName Country  Amount  TargetAmount
CustomerID                                            
101          Aarav Patel   India  8200.0         50000
102         Priya Sharma     USA  8500.0         75000
104          Sneha Desai      UK     0.0         60000
105         Vikram Singh     USA  7800.0         80000


In [37]:
# Hard: Find customers who have placed an order but for whom no sales target is set.
final_result=(
    customers_df.join(orders_df,how='inner')
    .groupby(['CustomerID','CustomerName'])['Amount'].sum().reset_index().set_index('CustomerID')
    .join(sales_targets_df).reset_index()
    .query("not TargetAmount>0")
)
print(final_result['CustomerName'])

2    Rohan Mehta
Name: CustomerName, dtype: object


In [48]:
# Hard: For each country, find the sum of the total order amount and the total target amount.
final_result1=(
    customers_df.join(orders_df)
    .groupby('Country')[['Amount']].sum()
)
final_result2=customers_df.join(sales_targets_df).groupby('Country')['TargetAmount'].sum()
final_result=final_result1.join(final_result2)
print(final_result)

          Amount  TargetAmount
Country                       
Canada       0.0           0.0
India    20200.0       50000.0
UK           0.0       60000.0
USA      16300.0      155000.0
