In [1]:
from pyspark.sql import SparkSession
from pyspark.sql import Row
from delta import *

# warehouse_location points to the default location for managed databases and tables
warehouse = 'hdfs://hdfs-nn:9000/warehouse'

builder = SparkSession \
    .builder \
    .appName("Python Spark SQL Hive integration example") \
    .config("spark.sql.warehouse.dir", warehouse) \
    .config("hive.metastore.uris", "thrift://hive-metastore:9083") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .config("spark.jars.packages", "io.delta:delta-core_2.12:2.4.0") \
    .enableHiveSupport() \

spark = spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [2]:
spark.sql(
    """
    SHOW DATABASES
    """
).show()

+---------+
|namespace|
+---------+
| database|
|database2|
|  default|
+---------+



In [3]:
spark.sql(
    """
    SHOW TABLES FROM database
    """
).show()

+---------+---------+-----------+
|namespace|tableName|isTemporary|
+---------+---------+-----------+
| database|     time|      false|
+---------+---------+-----------+



In [4]:
spark.sql(
    """
    DROP TABLE IF EXISTS database.gender
    """
)

spark.sql(
    """
    CREATE EXTERNAL TABLE database.gender (
        nome_pais varchar(50),
        cod_pais varchar(50),
        nome_indicador varchar(200),
        cod_indicador varchar(100),
        ano int,
        valor float
    )
    USING DELTA
    LOCATION 'hdfs://hdfs-nn:9000/Projeto/silver/TabelaGender'
    """
)

DataFrame[]

In [5]:
spark.sql(
    """
    SELECT * FROM database.gender
    """
).show()

+---------+--------+--------------+-------------+---+-----+
|nome_pais|cod_pais|nome_indicador|cod_indicador|ano|valor|
+---------+--------+--------------+-------------+---+-----+
+---------+--------+--------------+-------------+---+-----+



In [6]:
spark.stop()