In [0]:
from functools import reduce
import pyspark.sql.functions as F

In [0]:
from functools import reduce

def load_table(file_location, 
               table_name, 
               rename_columns=None, 
               file_type="csv", 
               delimiter=";"):
  
  # The applied options are for CSV files. For other file types, these will be ignored.
  df = spark.read.format(file_type) \
    .option("inferSchema", "true") \
    .option("header", "true") \
    .option("sep", delimiter) \
    .load(file_location)
  df.columns
  if rename_columns is not None:
    df = reduce(lambda dfl, x: dfl.withColumnRenamed(x,rename_columns[x]),rename_columns, df)
    df.columns

  # Create a view or table
  df.createOrReplaceTempView(table_name)

  # Since this table is registered as a temp view, it will only be available to this notebook. If you'd like other users to be able to query  this table, you can also create a table from the DataFrame.
  # Once saved, this table will persist across cluster restarts as well as allow various users across different notebooks to query this data.
  # To do so, choose your table name and uncomment the bottom line.

  df.write.format("parquet").saveAsTable(table_name)

# ALTITUD

In [0]:
altitudDF = spark.read.format("csv") \
.option("inferSchema", "false") \
.option("header", "true") \
.option("sep", ";") \
.load("/mnt/IncendiosForestalesCAT/raw/idescat/cartografia/Altitud.csv")
display(altitudDF)

Municipio,Comarca,Codigo,Altitud (m),Superficie (km²),Poblacion
Abella de la Conca,Pallars Jussà,250019,956,7827,171
Abrera,Baix Llobregat,80018,105,1994,12620
Àger,Noguera,250024,642,16057,588
Agramunt,Urgell,250030,337,7965,5464
Aguilar de Segarra,Bages,80023,480,4332,286
Agullana,Alt Empordà,170010,166,2773,885
Aiguafreda,Vallès Oriental,80142,404,79,2556
Aiguamúrcia,Alt Camp,430017,314,7295,960
Aiguaviva,Gironès,170025,169,1392,789
Aitona,Segrià,250387,110,669,2493


In [0]:
rename_columns = {"Altitud (m)":"altitud",
"Codigo":"cod_idescat",
"Municipio": "municipio",
"Comarca": "comarca"
}

altitudDF = reduce(lambda dfl, x: dfl.withColumnRenamed(x,rename_columns[x]),rename_columns, altitudDF).select(*rename_columns.values())
altitudDF = altitudDF.withColumn("cod_municipio", F.substring("cod_idescat",1,5))

In [0]:
display(altitudDF)

altitud,cod_idescat,municipio,comarca,cod_municipio
956,250019,Abella de la Conca,Pallars Jussà,25001
105,80018,Abrera,Baix Llobregat,80018
642,250024,Àger,Noguera,25002
337,250030,Agramunt,Urgell,25003
480,80023,Aguilar de Segarra,Bages,80023
166,170010,Agullana,Alt Empordà,17001
404,80142,Aiguafreda,Vallès Oriental,80142
314,430017,Aiguamúrcia,Alt Camp,43001
169,170025,Aiguaviva,Gironès,17002
110,250387,Aitona,Segrià,25038


In [0]:
display(altitudDF.filter(F.col("comarca")=="Vallès Occidental"))

altitud,cod_idescat,municipio,comarca,cod_municipio
120,89045,Badia del Vallès,Vallès Occidental,89045
146,82520,Barberà del Vallès,Vallès Occidental,82520
331,80517,Castellar del Vallès,Vallès Occidental,80517
132,80543,Castellbisbal,Vallès Occidental,80543
82,82665,Cerdanyola del Vallès,Vallès Occidental,82665
502,80879,Gallifa,Vallès Occidental,80879
423,81206,Matadepera,Vallès Occidental,81206
36,81252,Montcada i Reixac,Vallès Occidental,81252
130,81568,Palau-solità i Plegamans,Vallès Occidental,81568
123,81672,Polinyà,Vallès Occidental,81672


# PENDIENTE

In [0]:
#CSV
pendienteDF = spark.read.format("csv") \
.option("inferSchema", "false") \
.option("header", "true") \
.option("sep", "\t") \
.load("/mnt/IncendiosForestalesCAT/raw/idescat/cartografia/pendienteCAT.csv")
display(pendienteDF)

Municipio,Superfície total,Superfície amb pendent < 20%,% pendent < 20%,% pendent < 20% sobre el total
Alt Camp,538,3355,624,23
Alt Empordà,13575,7343,541,51
Alt Penedès,5927,379,64,27
Alt Urgell,14475,1259,87,09
Alta Ribagorça,4269,124,29,01
Anoia,8663,444,513,31
Aran,6336,31,05,0
Bages,10923,:,:,:
Baix Camp,6972,356,511,25
Baix Ebre,10027,7435,742,52


In [0]:
rename_columns={"Municipio":"comarca","Superfície total":"superficie_total","Superfície amb pendent < 20%":"superficie_menos_20","% pendent < 20%":"porcentaje_pendiente", "% pendent < 20% sobre el total":"porcentaje_pendiente_total"}

pendienteDF = reduce(lambda dfl, x: dfl.withColumnRenamed(x,rename_columns[x]),rename_columns, pendienteDF).select(*rename_columns.values())
display(pendienteDF)

comarca,superficie_total,superficie_menos_20,porcentaje_pendiente,porcentaje_pendiente_total
Alt Camp,538,3355,624,23
Alt Empordà,13575,7343,541,51
Alt Penedès,5927,379,64,27
Alt Urgell,14475,1259,87,09
Alta Ribagorça,4269,124,29,01
Anoia,8663,444,513,31
Aran,6336,31,05,0
Bages,10923,:,:,:
Baix Camp,6972,356,511,25
Baix Ebre,10027,7435,742,52


# JOIN ALTITUD y PENDIENTE

In [0]:
altitudDF.count()

In [0]:
pendienteDF.count()

In [0]:
cartografiaDF = altitudDF.alias("df1").join(pendienteDF.alias("df2"), F.col("df1.comarca")== F.col("df2.comarca"), "left").select(["df1.*","df2.superficie_total","superficie_menos_20","porcentaje_pendiente","porcentaje_pendiente_total"])

In [0]:
display(cartografiaDF)

altitud,cod_idescat,municipio,comarca,cod_municipio,superficie_total,superficie_menos_20,porcentaje_pendiente,porcentaje_pendiente_total
956,250019,Abella de la Conca,Pallars Jussà,25001,13431.0,2232,166,16
105,80018,Abrera,Baix Llobregat,80018,486.0,2498,514,17
642,250024,Àger,Noguera,25002,17841.0,9733,546,68
337,250030,Agramunt,Urgell,25003,5797.0,5627,971,39
480,80023,Aguilar de Segarra,Bages,80023,10923.0,:,:,:
166,170010,Agullana,Alt Empordà,17001,13575.0,7343,541,51
404,80142,Aiguafreda,Vallès Oriental,80142,735.0,:,:,:
314,430017,Aiguamúrcia,Alt Camp,43001,538.0,3355,624,23
169,170025,Aiguaviva,Gironès,17002,5756.0,4149,721,29
110,250387,Aitona,Segrià,25038,13967.0,12824,918,9


In [0]:
cartografiaDF.write.mode("overwrite").parquet(f"/mnt/IncendiosForestalesCAT/prep/idescat/cartografia/")