# rst_comercio_exterior
> ## Objetivo:
> #### Panorama macro, evolução temporal e principais recortes.
> ## Origem:
> `comercio_ext_estatisticas.tb_importacoes`
>
> `comercio_ext_estatisticas.tb_exportacoes`
>
> ## Colunas:
> ano, mes, uf, isic_divisao, tipo_operacao (EXP / IMP), vl_fob, kg_liquido

## Configurações
> #### **imports**
> #### **get files**
> #### **schema**

In [0]:
import org . apache . spark  . sql . functions . _
import org . apache . spark  . sql . types . _
import org . apache . spark  . sql . DataFrame
import org . apache . hadoop . fs  . FileSystem 
import org . apache . hadoop . fs  . Path 
import io  . delta  . tables . _
import scala . util . Try

In [0]:
val pathExportacoes   = "/mnt/silver/landingbeca2026jan/comercio_ext_estatisticas/tb_exportacoes/"   
val pathImportacoes   = "/mnt/silver/landingbeca2026jan/comercio_ext_estatisticas/tb_importacoes/"
val pathNomeMercosul  = "/mnt/silver/landingbeca2026jan/comercio_ext_indices/tb_nomenclatura_mercosul/"
val pathRefISIC       = "/mnt/silver/landingbeca2026jan/comercio_ext_indices/tb_referencia_ncm_isic/"
val target            = "/mnt/gold/rst_comercio_exterior"
val sc                = spark.sparkContext

## Extração
> #### **spark.read**

In [0]:
val dfExportacoes = spark . read . format ( "delta" ) . load ( pathExportacoes )
val dfImportacoes = spark . read . format ( "delta" ) . load ( pathImportacoes )
val dfNomeMercosul = spark . read . format ( "delta" ) . load ( pathNomeMercosul )
val dfRefISIC = spark . read . format ( "delta" ) . load ( pathRefISIC )

##Transformações
> #### **select**
> #### **union**
> #### **groupby**
> #### **join**

In [0]:
val dfSelectExportacoes = dfExportacoes
  . withColumn ( "an_operacao" ,              col ( "CO_ANO" )     . cast ( "int" )           )
  . withColumn ( "me_operacao" ,              col ( "CO_MES" )     . cast ( "int" )           )
  . withColumn ( "sg_unidade_federativa" ,    col ( "SG_UF_NCM" )  . cast ( "string" )        )
  . withColumn ( "cd_nomenclatura_mercosul" , col ( "CO_NCM" )     . cast ( "string" )        )
  . withColumn ( "vl_free_on_board" ,         col ( "VL_FOB" )     . cast ( "float" )         )
  . withColumn ( "qt_peso_liquido_kg" ,       col ( "KG_LIQUIDO" ) . cast ( "decimal(20,3)" ) )
  . withColumn ( "tp_operacao" ,              lit ( "EXP" )                                   )
  . select ( 
    "an_operacao" , 
    "me_operacao" ,
    "sg_unidade_federativa" ,
    "cd_nomenclatura_mercosul" ,
    "tp_operacao" ,
    "vl_free_on_board" ,
    "qt_peso_liquido_kg"
  ) . filter (
    col ( "an_operacao" ) . isNotNull && 
    col ( "me_operacao" ) . isNotNull
  )

val dfSelectImportacoes = dfImportacoes
  . withColumn ( "an_operacao" ,              col ( "CO_ANO" )     . cast ( "int" )           )
  . withColumn ( "me_operacao" ,              col ( "CO_MES" )     . cast ( "int" )           )
  . withColumn ( "sg_unidade_federativa" ,    col ( "SG_UF_NCM" )  . cast ( "string" )        )
  . withColumn ( "cd_nomenclatura_mercosul" , col ( "CO_NCM" )     . cast ( "string" )        )
  . withColumn ( "vl_free_on_board" ,         col ( "VL_FOB" )     . cast ( "float" )         )
  . withColumn ( "qt_peso_liquido_kg" ,       col ( "KG_LIQUIDO" ) . cast ( "decimal(20,3)" ) )
  . withColumn ( "tp_operacao" ,              lit ( "EXP" )                                   )
  . withColumn ( "ts_atualizacao" ,           lit ( current_timestamp ( ) )                   )
  . select ( 
    "an_operacao" , 
    "me_operacao" ,
    "sg_unidade_federativa" ,
    "cd_nomenclatura_mercosul" ,
    "tp_operacao" ,
    "vl_free_on_board" ,
    "qt_peso_liquido_kg"
  ) . filter (
    col ( "an_operacao" ) . isNotNull && 
    col ( "me_operacao" ) . isNotNull
  )

val dfSelectNomeMercosul = dfNomeMercosul
  . withColumn ( "cd_nomenclatura_mercosul" , col ( "CO_NCM" ) . cast ( "string" ) )
  . withColumn ( "cd_classificacao_internacional" , col ( "CO_ISIC_CLASSE" ) . cast ( "string" ) )
  . select ( 
    "cd_nomenclatura_mercosul" , 
    "cd_classificacao_internacional" 
  ) . filter (
    col ( "cd_classificacao_internacional" ) . isNotNull
  ) 


In [0]:
sc . broadcast ( dfSelectNomeMercosul )

In [0]:
val expCount = dfSelectExportacoes.count()
println(s"$expCount")

In [0]:
val impCount = dfSelectImportacoes.count()
println(s"$impCount")

In [0]:
val dfUnified = dfSelectExportacoes . unionByName ( dfSelectImportacoes , allowMissingColumns = true )

In [0]:
val dfJoin = dfUnified
  . join ( dfSelectNomeMercosul , Seq ( "cd_nomenclatura_mercosul" ) , "left_outer" )
  . drop ( "cd_nomenclatura_mercosul" )

In [0]:
val dfAgg = dfJoin
  . groupBy ( 
    "an_operacao" , 
    "me_operacao" ,
    "sg_unidade_federativa" ,
    "cd_classificacao_internacional" ,
    "tp_operacao"
  ) . agg (
    sum ( coalesce ( 
      col ( "vl_free_on_board" ) , lit ( 0 ) )
    ) . cast ( "decimal(20,2)" ) . alias ( "vl_free_on_board" ) ,
    sum ( coalesce ( 
      col ( "qt_peso_liquido_kg" ) , lit( 0 ) )
    ) . cast ( "decimal(20,3)" ) . alias ( "qt_peso_liquido_kg" )
  )

In [0]:
val aggCount = dfAgg.count()
println(s"$aggCount")

##Carga
> #### **merge**

In [0]:
val deltaTarget = DeltaTable . forName ( "gold.rst_comercio_exterior" )

deltaTarget.as("t")
  .merge(
    dfJoin.as("s"),
    """
      t.an_operacao = s.an_operacao AND
      t.me_operacao = s.me_operacao AND
      t.sg_unidade_federativa  = s.sg_unidade_federativa AND
      (t.cd_classificacao_internacional <=> s.cd_classificacao_internacional) AND
      t.tp_operacao = s.tp_operacao
    """
  )
  .whenMatched()
  .updateExpr(Map(
    "vl_free_on_board"     -> "s.vl_free_on_board",
    "qt_peso_liquido_kg" -> "s.qt_peso_liquido_kg"
  ))
  .whenNotMatched()
  .insertAll()
  .execute()

