In [0]:
# Feito em SQL por questão de tempo para entrega, posteriomente sendo possível migrar para Spark

_sql = """
WITH

sigla AS (
     SELECT order_id, 
          CASE WHEN delivery_diff_promissed <=  0 AND delivery_diff_promissed >=  -5 THEN 'B | BOM'
               WHEN delivery_diff_promissed <= -6 AND delivery_diff_promissed >= -12 THEN 'O | OTIMO'
               WHEN delivery_diff_promissed < -12                                    THEN 'E | EXCELENTE'
               WHEN delivery_diff_promissed >=  1 AND delivery_diff_promissed <=  3  THEN 'T | TOLERAVEL'
               WHEN delivery_diff_promissed >=  4 AND delivery_diff_promissed <=  7  THEN 'R | RUIM'
               WHEN delivery_diff_promissed >=  8 AND delivery_diff_promissed <= 14  THEN 'G | GRAVE'
               WHEN delivery_diff_promissed >= 15 AND delivery_diff_promissed <= 28  THEN 'P | PESSIMO'
               WHEN delivery_diff_promissed >  28                                    THEN 'U | URGENTE'
               END AS sigla 
     FROM datum.silver.olist_orders_datediff
),

sigla_agrupada AS (
     SELECT DISTINCT oi.seller_id, s.sigla, 
          COUNT(s.sigla) OVER(PARTITION BY oi.seller_id, s.sigla ORDER BY oi.seller_id) AS n_pedidos,
          ROUND(n_pedidos /
          COUNT(seller_id) OVER(PARTITION BY oi.seller_id ORDER BY oi.seller_id) *100, 2) AS proporcional
     FROM datum.silver.olist_order_items AS oi
          INNER JOIN sigla AS s
               ON oi.order_id = s.order_id
     ORDER BY proporcional DESC
),

sigla_final AS (
  SELECT seller_id, COLLECT_LIST(CONCAT(sigla, '(', proporcional, '%)')) AS resumo, SUM(n_pedidos) AS n_pedidos
  FROM sigla_agrupada
  GROUP BY seller_id
),

orders AS (
  SELECT oi.order_id, oi.seller_id
  FROM datum.silver.olist_order_items as oi
),

sellers_total AS (
  SELECT o.seller_id, SUM(p.payment_value) AS total
  FROM orders AS o
    INNER JOIN datum.silver.olist_payments AS p
      ON o.order_id = p.order_id
  WHERE p.payment_value IS NOT NULL
  GROUP BY o.seller_id
),

sellers AS (
  SELECT DISTINCT seller_id, seller_city, seller_state
  FROM datum.silver.olist_sellers
)

SELECT s.seller_id                                           AS id_vendedor, 
       s.seller_city                                         AS cidade, 
       s.seller_state                                        AS estado, 
       total                                                 AS total, 
       CAST(n_pedidos AS INTEGER)                            AS pedidos, 
       COALESCE(CAST(sf.resumo AS STRING), 'Nao localizado') AS resumo_entregas,
       CAST(NOW() AS DATE)                                   AS date_ref_carga
FROM sellers AS s
  INNER JOIN sellers_total AS st
    ON s.seller_id = st.seller_id
  LEFT JOIN sigla_final AS sf
    ON s.seller_id = sf.seller_id
ORDER BY total DESC, sf.n_pedidos DESC
"""
df_vendedores = spark.sql(_sql)

In [0]:
df_vendedores.limit(10).display()

id_vendedor,cidade,estado,total,pedidos,resumo_entregas,date_ref_carga
7c67e1448b00f6e969d365cea6b010ab,itaquaquecetuba,SP,507166.91,1355,"[E | EXCELENTE(50.41%), O | OTIMO(29.89%), B | BOM(10.85%), T | TOLERAVEL(3.25%), G | GRAVE(2.36%), R | RUIM(1.55%), P | PESSIMO(1.11%), U | URGENTE(0.59%)]",2024-04-14
1025f0e2d44d7041d6cf58b6550e0bfa,sao paulo,SP,308222.04,1420,"[E | EXCELENTE(45.63%), O | OTIMO(33.1%), B | BOM(14.58%), T | TOLERAVEL(2.82%), R | RUIM(2.46%), G | GRAVE(0.77%), U | URGENTE(0.35%), P | PESSIMO(0.28%)]",2024-04-14
4a3ca9315b744ce9f8e9374361493884,ibitinga,SP,301245.27,1949,"[E | EXCELENTE(40.43%), O | OTIMO(38.89%), B | BOM(10.98%), G | GRAVE(3.03%), R | RUIM(2.41%), T | TOLERAVEL(2.05%), P | PESSIMO(1.69%), U | URGENTE(0.51%)]",2024-04-14
1f50f920176fa81dab994f9023523100,sao jose do rio preto,SP,290253.42,1926,"[E | EXCELENTE(46.88%), O | OTIMO(32.5%), B | BOM(12.82%), R | RUIM(2.34%), T | TOLERAVEL(2.13%), P | PESSIMO(1.77%), G | GRAVE(1.25%), U | URGENTE(0.31%)]",2024-04-14
53243585a1d6dc2643021fd1853d8905,lauro de freitas,BA,284903.08,400,"[E | EXCELENTE(46.5%), O | OTIMO(40.25%), B | BOM(10.25%), R | RUIM(1.25%), G | GRAVE(0.75%), T | TOLERAVEL(0.5%), P | PESSIMO(0.25%), U | URGENTE(0.25%)]",2024-04-14
da8622b14eb17ae2831f4ac5b9dab84a,piracicaba,SP,272219.32,1548,"[E | EXCELENTE(47.93%), O | OTIMO(35.21%), B | BOM(10.47%), T | TOLERAVEL(2.07%), R | RUIM(1.74%), G | GRAVE(1.29%), P | PESSIMO(0.65%), U | URGENTE(0.65%)]",2024-04-14
4869f7a5dfa277a7dca6462dcf3b52b2,guariba,SP,264166.12,1148,"[E | EXCELENTE(50.35%), O | OTIMO(28.66%), B | BOM(10.45%), R | RUIM(3.4%), G | GRAVE(2.35%), T | TOLERAVEL(2.35%), P | PESSIMO(2.26%), U | URGENTE(0.17%)]",2024-04-14
955fee9216a65b617aa5c0531780ce60,sao paulo,SP,236322.3,1472,"[E | EXCELENTE(40.96%), O | OTIMO(39.61%), B | BOM(12.98%), R | RUIM(2.99%), T | TOLERAVEL(1.56%), G | GRAVE(1.29%), P | PESSIMO(0.41%), U | URGENTE(0.2%)]",2024-04-14
fa1c13f2614d7b5c4749cbc52fecda94,sumare,SP,206513.23,579,"[E | EXCELENTE(43.87%), O | OTIMO(35.92%), B | BOM(11.05%), P | PESSIMO(2.59%), T | TOLERAVEL(2.25%), R | RUIM(2.07%), G | GRAVE(1.9%), U | URGENTE(0.35%)]",2024-04-14
7e93a43ef30c4f03f38b393420bc753a,barueri,SP,185134.21,322,"[E | EXCELENTE(54.97%), O | OTIMO(34.16%), B | BOM(6.21%), R | RUIM(1.55%), T | TOLERAVEL(1.24%), G | GRAVE(0.93%), P | PESSIMO(0.62%), U | URGENTE(0.31%)]",2024-04-14


In [0]:
%sql

USE CATALOG datum;

USE DATABASE gold

In [0]:
%sql

CREATE TABLE IF NOT EXISTS olist_desempenho_vendedores
(
 id_vendedor STRING NOT NULL,
 cidade STRING,
 estado STRING,
 total DECIMAL(20,4),
 pedidos INTEGER,
 resumo_entregas STRING,
 date_ref_carga DATE
)
USING DELTA
LOCATION 'abfss://unity-datum@datumunity.dfs.core.windows.net/gold/olist_desempenho_vendedores'
PARTITIONED BY(date_ref_carga)

In [0]:
if df_vendedores.count() != 0 or df_vendedores is not None:
    df_vendedores.write.format('delta').mode('overwrite').save('abfss://unity-datum@datumunity.dfs.core.windows.net/gold/olist_desempenho_vendedores')
del df_vendedores