In [0]:
from pyspark.sql import functions as F
from pyspark.sql import Window

In [0]:
df_classified = spark.table("gold.conversations_classified")

In [0]:
# Category distribution by country
df_category_by_country = (
    df_classified
        .groupBy("country", "top_category")
        .agg(F.count("*").alias("count"))
        .orderBy("country", F.desc("count"))
)

display(df_category_by_country)

In [0]:
# Find top category per country
window_country = Window.partitionBy("country").orderBy(F.desc("count"))

df_top_category_by_country = (
    df_category_by_country
        .withColumn("rank", F.row_number().over(window_country))
        .filter(F.col("rank") == 1)
        .select("country", "top_category", "count")
        .withColumnRenamed("top_category", "top_category")
        .withColumnRenamed("count", "top_category_count")
        .orderBy(F.desc("top_category_count"))
)

display(df_top_category_by_country)

In [0]:
df_category_by_country.write.mode("overwrite").saveAsTable("gold.dashboard_category_by_country")
df_top_category_by_country.write.mode("overwrite").saveAsTable("gold.dashboard_top_category_by_country")