In [None]:
import pandas as pd

# =====================================================
# 1. LOAD CLEAN DATA (AFTER PART 1)
# =====================================================

sales_items = pd.read_csv(
    "/Users/sovanpradhan/Desktop/HCLHackathon/prod_store_sales_line_items.csv"
)

promotions = pd.read_csv(
    "/Users/sovanpradhan/Desktop/HCLHackathon/promotion_details.csv"
)

products = pd.read_csv(
    "/Users/sovanpradhan/Desktop/HCLHackathon/products.csv"
)

# Keep only valid line items
sales_items = sales_items[sales_items["is_valid"] == True]

# =====================================================
# 2. JOIN WITH PRODUCT & PROMOTION DATA
# =====================================================

# Join product category
sales_items = sales_items.merge(
    products[["product_id", "category"]],
    on="product_id",
    how="left"
)

# Join promotion details
sales_items = sales_items.merge(
    promotions,
    on="promotion_id",
    how="left"
)

# =====================================================
# 3. SPLIT PROMOTED VS NON-PROMOTED SALES
# =====================================================

promoted_sales = sales_items[sales_items["promotion_id"].notna()]
baseline_sales = sales_items[sales_items["promotion_id"].isna()]

# =====================================================
# 4. CALCULATE METRICS
# =====================================================

promo_metrics = promoted_sales.groupby(
    ["promotion_id", "promotion_name"]
).agg(
    promoted_units=("quantity", "sum"),
    promoted_revenue=("line_item_amount", "sum")
).reset_index()

baseline_metrics = baseline_sales.groupby(
    "category"
).agg(
    baseline_units=("quantity", "sum"),
    baseline_revenue=("line_item_amount", "sum")
).reset_index()

# =====================================================
# 5. CALCULATE SALES LIFT %
# =====================================================

promo_metrics["sales_lift_percentage"] = (
    promo_metrics["promoted_units"] /
    promo_metrics["promoted_units"].sum()
) * 100

# =====================================================
# 6. TOP 3 MOST EFFECTIVE PROMOTIONS
# =====================================================

top_promotions = promo_metrics.sort_values(
    by="sales_lift_percentage",
    ascending=False
).head(3)

print("\nðŸ”¥ TOP 3 MOST EFFECTIVE PROMOTIONS ðŸ”¥\n")
print(top_promotions)
