# dim_account_gld

In [0]:
CATALOG = "workspace"
SCHEMA  = "ai_bi_reporting"

In [0]:
import pyspark.sql.functions as F

acct_slv = f"{CATALOG}.{SCHEMA}.anon_bst_acct_slv"
dim_acct = f"{CATALOG}.{SCHEMA}.dim_account_gld"

dfa = spark.table(acct_slv)

# Keep curated columns only (extend as needed)
keep = [c for c in dfa.columns if c in {
    "Id","Name","Type","AccountSource","FY_Revenue_Target","CVA_NPS","CVA_CSAT"
}]
dfa = dfa.select(*keep)

# Optional: create a surrogate key (hash)
dfa = dfa.withColumn(
    "account_sk",
    F.sha2(F.col("Id").cast("string"), 256)
)

(
    dfa.write
    .option("delta.columnMapping.mode", "name")
    .option("overwriteSchema", "true")
    .mode("overwrite")
    .saveAsTable(dim_acct)
)

# dim_service_gld

In [0]:
cust_slv = f"{CATALOG}.{SCHEMA}.anon_cust_intelligence_opportunity_slv"
dim_srv = f"{CATALOG}.{SCHEMA}.dim_service_gld"

dfs = spark.table(cust_slv).select("Service Family","Service Line").dropDuplicates()

# Surrogate key for the service grain
dfs = dfs.withColumn("service_sk", F.sha2(F.concat_ws("||", *[F.col("Service Family"), F.col("Service Line")]), 256))

(dfs.write
    .option("delta.columnMapping.mode","name")
    .option("overwriteSchema","true")
    .mode("overwrite")
    .saveAsTable(dim_srv))


# dim_date_gld

In [0]:
from pyspark.sql import Row
import datetime as dt

dim_date = f"{CATALOG}.{SCHEMA}.dim_date_gld"

# Build a small date dimension around your Opp close dates
opp_slv = f"{CATALOG}.{SCHEMA}.anon_bst_opp_slv"
dfo = spark.table(opp_slv)

min_dt = dfo.select(F.min("CloseDate")).first()[0]
max_dt = dfo.select(F.max("CloseDate")).first()[0]

if min_dt is None or max_dt is None:
    # Fallback window if missing
    min_dt = dt.date(2015,1,1)
    max_dt = dt.date(2030,12,31)
else:
    min_dt = min_dt.date()
    max_dt = max_dt.date()

dates = []
cur = min_dt
while cur <= max_dt:
    dates.append(Row(
        date=cur,
        year=cur.year,
        month=cur.month,
        day=cur.day,
        yyyymm=int(f"{cur.year}{cur.month:02d}")
    ))
    cur += dt.timedelta(days=1)

dfd = spark.createDataFrame(dates)
(dfd.write
    .option("delta.columnMapping.mode","name")
    .option("overwriteSchema","true")
    .mode("overwrite")
    .saveAsTable(dim_date))


# fact_opportunity_gld

In [0]:
opp_slv  = f"{CATALOG}.{SCHEMA}.anon_bst_opp_slv"
dim_acct = f"{CATALOG}.{SCHEMA}.dim_account_gld"
fact_opp = f"{CATALOG}.{SCHEMA}.fact_opportunity_gld"

dfo = spark.table(opp_slv)
dfa = spark.table(dim_acct).select("Id","account_sk")

# Choose a single amount priority
amount_expr = F.coalesce(
    F.col("Amount_ACV_USD"),
    F.col("Amount_ACV"),
    F.col("Amount_ACV_Static")
).alias("amount_acv_usd")

# Win flag
is_won = F.when(F.lower(F.col("StageName")).isin("closed won","won"), F.lit(1)).otherwise(F.lit(0)).alias("is_won")

# Build fact
keep_raw = [c for c in dfo.columns if c in {
    "ID","AccountID","StageName","CloseDate","LastModifiedDate","Reason_Code",
    "Reporting_Org","Roadmap_Indicator","Record_Type_API_Name"
}]

measure_cols = [c for c in ["CX_Ops_Overall_ACV","CX_Ops_Overall_ECR","ECR_Converted"] if c in dfo.columns]

fact = (dfo
    .select(*keep_raw, *[F.col(c) for c in measure_cols], amount_expr, is_won)
    .join(dfa, dfo["AccountID"]==dfa["Id"], "left")
    .withColumn("close_date", F.to_date("CloseDate"))
    .withColumn("close_month", F.date_format("close_date","yyyy-MM"))
    .drop("Id")  # from dim to avoid confusion
)

(fact.write
    .option("delta.columnMapping.mode","name")
    .option("overwriteSchema","true")
    .mode("overwrite")
    .saveAsTable(fact_opp))


# agg_opportunity_monthly_gld (example mart)

In [0]:
fact_opp = f"{CATALOG}.{SCHEMA}.fact_opportunity_gld"
agg_tbl  = f"{CATALOG}.{SCHEMA}.agg_opportunity_monthly_gld"

f = spark.table(fact_opp)

agg = (f
  .groupBy("close_month","Reporting_Org")
  .agg(
      F.sum("amount_acv_usd").alias("sum_acv_usd"),
      F.avg("CX_Ops_Overall_ECR").alias("avg_ecr"),
      (F.sum("is_won")/F.count(F.lit(1))).alias("win_rate")
  ))

(agg.write
   .option("delta.columnMapping.mode","name")
   .option("overwriteSchema","true")
   .mode("overwrite")
   .saveAsTable(agg_tbl))
