# comercio_ext_estatisticas.tb_exportacoes_municipios
> ### Origem â€” `bronze/autoloader/landingbeca2026jan/balancacomercial/EXP_*_MUN_delta`
## ðŸ“Œ DescriÃ§Ã£o do arquivo

Base de **exportaÃ§Ãµes por municÃ­pio**, agregada mensalmente, com cÃ³digo SH4, paÃ­s, UF/municÃ­pio, e medidas `KG_LIQUIDO` e `VL_FOB`.
Fonte: MDIC / Comex Stat â€” Dado pÃºblico agregado (sem identificaÃ§Ã£o de empresas).
| Coluna      | DescriÃ§Ã£o |
|-------------|-----------|
|`CO_ANO`     | Ano da operaÃ§Ã£o |
|`CO_MES`     | MÃªs da operaÃ§Ã£o |
|`SH4`        | CapÃ­tulo SH4 do produto |
|`CO_PAIS`    | PaÃ­s de destino |
|`SG_UF_MUN`  | Sigla da UF do municÃ­pio |
|`CO_MUN`     | CÃ³digo do municÃ­pio |
|`KG_LIQUIDO` | Peso lÃ­quido (kg) |
|`VL_FOB`     | Valor FOB (US$) |

## ConfiguraÃ§Ãµes
> #### **imports**
> #### **get files**
> #### **schema**

In [0]:
import org . apache . spark  . sql . functions . _
import org . apache . spark  . sql . types . _
import org . apache . spark  . sql . DataFrame
import org . apache . hadoop . fs  . FileSystem 
import org . apache . hadoop . fs  . Path 
import io  . delta  . tables . _

In [0]:

val bronzeBaseDir =     "/mnt/bronze/autoloader/landingbeca2026jan/balancacomercial/"   
val silverPath =        "/mnt/silver/landingbeca2026jan/comercio_ext_estatisticas/tb_exportacoes_municipios/"   
val silverTable =       "tb_exportacoes_municipios"
val prefix =            "EXP_"
val suffix =            "_MUN_delta"

val fs = FileSystem . get (
  spark.sparkContext.hadoopConfiguration
)

val sourceDirs: Seq [ String ] = fs 
    . listStatus  ( new Path ( bronzeBaseDir )
) . filter      ( _ . isDirectory
) . map         ( _ . getPath . getName
) . filter      ( name => name . matches ( s"${prefix}\\d{4}${suffix}" )
) . toSeq

require (
  sourceDirs . nonEmpty , 
  s"Nenhuma pasta $prefix encontrada em $bronzeBaseDir"
)

val sourcePaths: Seq [ String ] = sourceDirs . map (
  d => s"$bronzeBaseDir/$d"
)


In [0]:

val silverSchema = StructType(
  Seq (
    StructField ( "CO_ANO" ,      IntegerType,              nullable = false ) ,
    StructField ( "CO_MES" ,      IntegerType,              nullable = false ) ,
    StructField ( "SH4"    ,      StringType,              nullable = false ) ,
    StructField ( "CO_PAIS" ,     StringType,              nullable = false ) ,
    StructField ( "SG_UF_MUN" ,   StringType ,              nullable = true  ) ,
    StructField ( "CO_MUN" ,      IntegerType,              nullable = false ) ,
    StructField ( "KG_LIQUIDO" ,  DecimalType ( 18 , 3 ) ,  nullable = true  ) ,
    StructField ( "VL_FOB" ,      DecimalType ( 18 , 2 ) ,  nullable = true  ) ,
    StructField ( "TS_REF" ,      TimestampType ,           nullable = false ) ,
    StructField ( "NM_ORIGEM" ,   StringType ,              nullable = false )
  )
)


## ExtraÃ§Ã£o
> #### **saprk.read**

In [0]:

val dfs: Seq[DataFrame] = sourcePaths.map { p =>
  println(s"Lendo Delta: $p")
  spark.read.format("delta").load(p)
}

val dfBronzeRaw = dfs.reduce(_.unionByName(_))


##NormalizaÃ§Ã£o
> #### **datatype**
> #### **regras**

In [0]:

val dfNormalized = dfBronzeRaw
  .withColumn("CO_ANO", col("CO_ANO").cast(IntegerType))
  .withColumn("CO_MES", col("CO_MES").cast(IntegerType))
  .withColumn("SH4", col("SH4").cast(IntegerType))
  .withColumn("CO_PAIS", col("CO_PAIS").cast(IntegerType))
  .withColumn("SG_UF_MUN", when(length(trim(col("SG_UF_MUN"))) === 0, lit(null).cast(StringType))
                           .otherwise(trim(upper(col("SG_UF_MUN").cast(StringType)))))
  .withColumn("CO_MUN", col("CO_MUN").cast(IntegerType))
  .withColumn("KG_LIQUIDO", col("KG_LIQUIDO").cast(DecimalType(18,3)))
  .withColumn("VL_FOB", col("VL_FOB").cast(DecimalType(18,2)))


In [0]:

val dfWithDefaults = dfNormalized
  .withColumn ( "KG_LIQUIDO", when(col("KG_LIQUIDO") < lit(0), lit(0)).otherwise(col("KG_LIQUIDO")))
  .withColumn ( "VL_FOB", when(col("VL_FOB") < lit(0), lit(0)).otherwise(col("VL_FOB")))
  .withColumn ( "TS_REF", current_timestamp())
  .withColumn ( "NM_ORIGEM", lit("/landingbeca2026jan/balancacomercial/EXP_*_MUN_delta"))


##ValidaÃ§Ãµes
> #### **data quality**
> #### **deduplicaÃ§Ã£o**
> #### **schema fit**

In [0]:

val dfValid = dfWithDefaults . filter ( 
  col("CO_ANO") . isNotNull && 
  col("CO_MES") . isNotNull && 
  col("SH4")    . isNotNull && 
  col("CO_PAIS") . isNotNull &&
  col("CO_MUN") . isNotNull
) . filter (
  col("CO_MES") . between ( 1 , 12 )
)


In [0]:

val dfDedup = dfValid . dropDuplicates (
  Seq (
    "CO_ANO","CO_MES","SH4","CO_PAIS","SG_UF_MUN","CO_MUN"
  )
)


In [0]:

val dfSilver = spark.createDataFrame(dfDedup.rdd, silverSchema.fieldNames.foldLeft(dfDedup.schema)((schema, name) => schema))


##Carga
> #### **merge**

In [0]:
val deltaTarget = DeltaTable . forName ( "silver_comercio_ext_estatisticas.tb_exportacoes_municipios" )

deltaTarget.as("t")
  .merge(
    dfSilver.as("s"),
    """
      t.CO_ANO = s.CO_ANO AND
      t.CO_MES = s.CO_MES AND
      t.SH4  = s.SH4 AND
      t.CO_PAIS = s.CO_PAIS AND
      t.SG_UF_NCM = s.SG_UF_NCM AND
      t.CO_MUN = s.CO_MUN
    """
  )
  .whenMatched()
  .updateExpr(Map(
    "t.KG_LIQUIDO" -> "s.KG_LIQUIDO",
    "t.VL_FOB" -> "s.VL_FOB",
    "t.TS_REF" -> "s.TS_REF",
    "t.NM_ORIGEM" -> "s.NM_ORIGEM"
  ))
  .whenNotMatched()
  .insertAll()
  .execute()
