In [None]:
# Ques_7.ipynb — Advanced Consistency Check with Hierarchical Data

import pandas as pd

# Load the datasets
orders_df = pd.read_csv("orders.csv")
order_items_df = pd.read_csv("order_items.csv")

# Display sample data (optional)
print("Orders Sample:\n", orders_df.head())
print("\nOrder Items Sample:\n", order_items_df.head())

# Extract order_ids from both datasets
order_ids_in_orders = set(orders_df["order_id"])
order_ids_in_items = set(order_items_df["order_id"])

# Identify inconsistent records
orders_with_no_items = order_ids_in_orders - order_ids_in_items
items_with_no_orders = order_ids_in_items - order_ids_in_orders

# Report results
print(f"\nTotal Orders: {len(order_ids_in_orders)}")
print(f"Orders with No Items: {len(orders_with_no_items)}")
print(f"Order Items with No Matching Order: {len(items_with_no_orders)}")

# Create DataFrames for detailed inspection
missing_items_df = orders_df[orders_df["order_id"].isin(orders_with_no_items)]
orphan_items_df = order_items_df[order_items_df["order_id"].isin(items_with_no_orders)]

# Show mismatches
print("\nOrders with No Items:\n", missing_items_df)
print("\nOrder Items with No Matching Orders:\n", orphan_items_df)

# Optional: Save to CSV
missing_items_df.to_csv("orders_with_no_items.csv", index=False)
orphan_items_df.to_csv("orphan_order_items.csv", index=False)