In [1]:
from pyspark.sql import SparkSession
from pyspark.sql import Row
from delta import *

# warehouse_location points to the default location for managed databases and tables
warehouse_location = 'hdfs://hdfs-nn:9000/AreasVerdes/warehouse'

builder = SparkSession \
    .builder \
    .appName("Python Spark SQL Hive integration example") \
    .config("spark.sql.warehouse.dir", warehouse_location) \
    .config("hive.metastore.uris", "thrift://hive-metastore:9083") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .config("spark.jars.packages", "io.delta:delta-core_2.12:1.0.0") \
    .enableHiveSupport() \

spark = spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [2]:
# cria a tabela gold para o ano
spark.sql(
    """
    DROP TABLE IF EXISTS AreasVerdes.arvores_year
    """
)

spark.sql(
    """
    CREATE EXTERNAL TABLE AreasVerdes.arvores_year (
      year INT,
      number_year INT,
      objective INT
    
    )
    USING DELTA
    LOCATION 'hdfs://hdfs-nn:9000/AreasVerdes/gold/arvores_ano'
    """
)

DataFrame[]

In [5]:
# cria a tabela gold para o passeio
spark.sql(
    """
    DROP TABLE IF EXISTS AreasVerdes.arvores_sidewalk
    """
)

spark.sql(
    """
    CREATE EXTERNAL TABLE AreasVerdes.arvores_sidewalk (
      year INT,
      boroname STRING,
      sidewalk STRING,
      number_sidewalk INT
    
    )
    USING DELTA
    LOCATION 'hdfs://hdfs-nn:9000/AreasVerdes/gold/arvores_sidewalk'
    """
)

DataFrame[]

In [4]:
# cria a tabela gold para a saude
spark.sql(
    """
    DROP TABLE IF EXISTS AreasVerdes.arvores_health
    """
)

spark.sql(
    """
    CREATE EXTERNAL TABLE AreasVerdes.arvores_health (
      year INT,
      boroname STRING,
      health STRING,
      number_health INT
    
    )
    USING DELTA
    LOCATION 'hdfs://hdfs-nn:9000/AreasVerdes/gold/arvores_health'
    """
)

DataFrame[]

In [2]:
# cria a tabela para as species
spark.sql(
    """
    DROP TABLE IF EXISTS AreasVerdes.arvores_species
    """
)

spark.sql(
    """
    CREATE EXTERNAL TABLE AreasVerdes.arvores_species (
      year INT,
      boroname STRING,
      comun_species_name STRING,
      number_species INT
    
    )
    USING DELTA
    LOCATION 'hdfs://hdfs-nn:9000/AreasVerdes/gold/arvores_species'
    """
)

DataFrame[]

In [2]:
# cria a tabela para os problemas
spark.sql(
    """
    DROP TABLE IF EXISTS AreasVerdes.arvores_problems
    """
)

spark.sql(
    """
    CREATE EXTERNAL TABLE AreasVerdes.arvores_problems (
      year INT,
      boroname STRING,
      problems_general STRING,
      number_problems INT
    
    )
    USING DELTA
    LOCATION 'hdfs://hdfs-nn:9000/AreasVerdes/gold/arvores_problems'
    """
)

DataFrame[]