# Hard 2 — Mini‑Project: Sales ETL + Report

**Goal:** Ingest CSV → clean → feature engineering → KPI report → chart + exports.

In [None]:

import pandas as pd
import matplotlib.pyplot as plt

# 1) Ingest
df = pd.read_csv(r"/mnt/data/pandas_fundamentals_challenges/hard2_mini_project/sales.csv", parse_dates=["date"])

# 2) Clean (basic)
df = df.dropna()
df = df[df["units"] > 0]

# 3) Feature engineering
df["revenue"] = (df["units"] * df["unit_price"]) * (1 - df["discount_rate"])
df["month"] = df["date"].dt.to_period("M").astype(str)

# 4) KPIs
kpi_month = df.groupby("month", as_index=False).agg(
    total_units=("units","sum"),
    total_revenue=("revenue","sum"),
    avg_discount=("discount_rate","mean")
)
kpi_region = df.groupby("region", as_index=False).agg(
    total_units=("units","sum"),
    total_revenue=("revenue","sum")
)

print("Monthly KPIs:\n", kpi_month.head())
print("\nRegional KPIs:\n", kpi_region)

# 5) Chart (monthly revenue)
kpi_month.sort_values("month", inplace=True)
kpi_month.plot(x="month", y="total_revenue", kind="bar", legend=False)
plt.title("Monthly Revenue")
plt.xlabel("Month")
plt.ylabel("Revenue")
plt.tight_layout()
plt.show()

# 6) Exports
clean_path = r"/mnt/data/pandas_fundamentals_challenges/hard2_mini_project/sales_cleaned.csv"
kpi_month_path = r"/mnt/data/pandas_fundamentals_challenges/hard2_mini_project/kpi_month.csv"
kpi_region_path = r"/mnt/data/pandas_fundamentals_challenges/hard2_mini_project/kpi_region.csv"

df.to_csv(clean_path, index=False)
kpi_month.to_csv(kpi_month_path, index=False)
kpi_region.to_csv(kpi_region_path, index=False)

print("Saved:", clean_path, kpi_month_path, kpi_region_path)



**Notes:**
- This pipeline is intentionally simple and uses core Pandas: clean → transform → aggregate → visualize → export.
- Extend it with parameterization (e.g., CLI args), richer quality checks, Parquet IO, and a dashboard layer.
