## Top states (Z targets)

In [0]:
from pyspark.sql.functions import col, sum, avg, desc, when

gold_top_states = spark.table('msme_risk_analytics.silver_gst_unified') \
    .withColumn('fy_total', col('fy_total').cast('double')) \
    .groupBy('state_name') \
    .agg(
        sum('fy_total').alias('total_6years'),
        avg('fy_total').alias('avg_annual')
    ) \
    .withColumn('annual_avg', col('total_6years')/6) \
    .withColumn('target_priority', 
        when(col('total_6years') > 500000, 'High')
        .when(col('total_6years') > 200000, 'Medium')
        .otherwise('Low')) \
    .orderBy(desc('total_6years'))

gold_top_states.write.format('delta').mode('overwrite') \
    .saveAsTable('msme_risk_analytics.gold_gst_state_rankings')

print(f"✓ Created: gold_gst_state_rankings")
gold_top_states.show(15, truncate=False)

✓ Created: gold_gst_state_rankings
+--------------+------------------+------------------+------------------+---------------+
|state_name    |total_6years      |avg_annual        |annual_avg        |target_priority|
+--------------+------------------+------------------+------------------+---------------+
|Maharashtra   |1585953.3351862999|264325.5558643833 |264325.5558643833 |High           |
|Karnataka     |714531.3013516001 |119088.55022526668|119088.55022526668|High           |
|Gujarat       |641467.7523677    |106911.29206128333|106911.29206128333|High           |
|Tamil Nadu    |603920.8391540999 |100653.47319235   |100653.47319235   |High           |
|Haryana       |515584.0841295999 |85930.68068826666 |85930.68068826666 |High           |
|Uttar Pradesh |513877.8611465    |85646.31019108334 |85646.31019108334 |High           |
|Delhi         |338693.90664380003|56448.984440633336|56448.984440633336|Medium         |
|West Bengal   |322739.0685554    |53789.84475923333 |53789.84475

## YoY growth

In [0]:
gold_yoy = spark.table('msme_risk_analytics.silver_gst_unified') \
    .groupBy('fiscal_year') \
    .agg(sum('fy_total').alias('total_collection')) \
    .orderBy('fiscal_year')

gold_yoy.write.format('delta').mode('overwrite') \
    .saveAsTable('msme_risk_analytics.gold_gst_yoy_growth')

print(f"✓ Created: gold_gst_yoy_growth")

✓ Created: gold_gst_yoy_growth
