In [None]:
from pyspark.sql import SparkSession, functions as F

def test_cost_avoidance_median_min(spark: SparkSession):
    df = spark.createDataFrame([
        ("P1,Before", "P1", 100.0),
        ("P1,Before", "P1", 80.0),
        ("P1,Before", "P1", 90.0),
    ], "row string, ProjectID string, Price_USD double")
    med = df.groupBy("ProjectID").agg(F.expr("percentile_approx(Price_USD,0.5)").alias("median"),
                                      F.min("Price_USD").alias("min"))
    out = med.withColumn("CAO", F.col("median")-F.col("min")) \
             .withColumn("CAO_pct", (F.col("CAO")/F.col("median"))*100)
    r = out.collect()[0]
    assert r["median"] == 90.0 and r["min"] == 80.0 and round(r["CAO"],2) == 10.0 and round(r["CAO_pct"],2) == 11.11

def test_cost_reduction_budget_vs_po(spark: SparkSession):
    df = spark.createDataFrame([("P1", 1000.0, 850.0)], "ProjectID string, RtB double, PO double")
    out = df.select("ProjectID",
                    (F.col("RtB")-F.col("PO")).alias("CR"),
                    ( (F.col("RtB")-F.col("PO"))/F.col("RtB")*100 ).alias("CR_pct"))
    r = out.first()
    assert r["CR"] == 150.0 and round(r["CR_pct"],2) == 15.00

def test_anybut_first_round_vs_po(spark: SparkSession):
    r1 = spark.createDataFrame([("X", 100.0), ("X", 90.0)], "PRQ string, Price_USD double") 
    po = spark.createDataFrame([("X", 80.0)], "PRQ string, Price_USD double")
    from pyspark.sql.functions import min as MIN
    r1_min = r1.groupBy("PRQ").agg(MIN("Price_USD").alias("r1_min"))
    po_min = po.groupBy("PRQ").agg(MIN("Price_USD").alias("po_min"))
    out = r1_min.join(po_min, "PRQ").withColumn("CR", F.col("r1_min")-F.col("po_min")) \
                .withColumn("CR_pct", (F.col("CR")/F.col("r1_min"))*100)
    rr = out.first()
    assert rr["CR"] == 10.0 and round(rr["CR_pct"],2) == 11.11