In [9]:
import pandas as pd

# --- STEP 1: Create Dummy Data ---
# Table A: Customers
customers = pd.DataFrame({
    'ID': [101, 102, 103],
    'Name': ['Alice', 'Bob', 'Charlie'],
    'City': ['New York', 'Los Angeles', 'Chicago']
})

# Table B: Transactions (Notice ID 104 is missing in Customers!)
sales = pd.DataFrame({
    'Order_ID': [1, 2, 3, 4],
    'Cust_ID': [101, 102, 101, 104],
    'Amount': [250, 300, 150, 400],
    'Date_Str': ['2023-01-15', '2023-01-16', '2023-01-20', '2023-02-01']
})

print("--- Customers ---")
print(customers)
print("\n--- Sales ---")
print(sales)

# --- STEP 2: Merging ---
# Goal: Add Customer Name to the Sales table
# We merge 'sales' (left) with 'customers' (right) on the ID columns
# 'how=left' keeps all sales, even if customer is unknown (ID 104)
full_data = pd.merge(sales, customers, left_on='Cust_ID', right_on='ID', how='left')

print("\n--- Merged Data (Notice NaN for ID 104) ---")
print(full_data)

# --- STEP 3: Time Series ---
# Convert string to datetime
full_data['Date'] = pd.to_datetime(full_data['Date_Str'])

# Extract features
full_data['Month'] = full_data['Date'].dt.month_name()
full_data['Day'] = full_data['Date'].dt.day_name()

print("\n--- Final Dataset with Time Features ---")
print(full_data[['Date', 'Day', 'Amount', 'Name']])

# Bonus: Which day had the most sales?
print("\n--- Busiest Days ---")
print(full_data['Day'].value_counts())

--- Customers ---
    ID     Name         City
0  101    Alice     New York
1  102      Bob  Los Angeles
2  103  Charlie      Chicago

--- Sales ---
   Order_ID  Cust_ID  Amount    Date_Str
0         1      101     250  2023-01-15
1         2      102     300  2023-01-16
2         3      101     150  2023-01-20
3         4      104     400  2023-02-01

--- Merged Data (Notice NaN for ID 104) ---
   Order_ID  Cust_ID  Amount    Date_Str     ID   Name         City
0         1      101     250  2023-01-15  101.0  Alice     New York
1         2      102     300  2023-01-16  102.0    Bob  Los Angeles
2         3      101     150  2023-01-20  101.0  Alice     New York
3         4      104     400  2023-02-01    NaN    NaN          NaN

--- Final Dataset with Time Features ---
        Date        Day  Amount   Name
0 2023-01-15     Sunday     250  Alice
1 2023-01-16     Monday     300    Bob
2 2023-01-20     Friday     150  Alice
3 2023-02-01  Wednesday     400    NaN

--- Busiest Days ---
Da