In [1]:
pip install delta-spark==2.4.0

Note: you may need to restart the kernel to use updated packages.


In [2]:
from pyspark.sql import SparkSession
from pyspark.sql import Row
from delta import *

# warehouse_location points to the default location for managed databases and tables
warehouse = 'hdfs://hdfs-nn:9000/warehouse'

builder = SparkSession \
    .builder \
    .appName("Python Spark SQL Hive integration example") \
    .config("spark.sql.warehouse.dir", warehouse) \
    .config("hive.metastore.uris", "thrift://hive-metastore:9083") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .config("spark.jars.packages", "io.delta:delta-core_2.12:2.4.0") \
    .enableHiveSupport() \

spark = spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [3]:
spark.sql(
    """
    SHOW TABLES FROM database
    """
).show()

+---------+--------------------+-----------+
|namespace|           tableName|isTemporary|
+---------+--------------------+-----------+
|database2|       borrowedmoney|      false|
|database2|borrowedmoneyfriends|      false|
|database2|          creditcard|      false|
|database2|              gender|      false|
|database2|                jobs|      false|
|database2|            jobsgold|      false|
|database2|       literaciagold|      false|
|database2|          population|      false|
|database2|            temponec|      false|
+---------+--------------------+-----------+



In [4]:
spark.sql(
    """
    SELECT * FROM database.jobs
    """
).show(10)

+--------------------+--------+--------------------+--------------+----+---------+
|           pais_name|pais_cod|      indicador_name|indicador_code| ano|    valor|
+--------------------+--------+--------------------+--------------+----+---------+
|East Asia & Pacif...|     EAP|Access to electri...|EG.ELC.ACCS.ZS|1990| 82.59917|
|East Asia & Pacif...|     EAP|Access to electri...|EG.ELC.ACCS.ZS|1991|81.601685|
|East Asia & Pacif...|     EAP|Access to electri...|EG.ELC.ACCS.ZS|1992| 83.88937|
|East Asia & Pacif...|     EAP|Access to electri...|EG.ELC.ACCS.ZS|1993| 83.24599|
|East Asia & Pacif...|     EAP|Access to electri...|EG.ELC.ACCS.ZS|1994|  84.5359|
|East Asia & Pacif...|     EAP|Access to electri...|EG.ELC.ACCS.ZS|1995| 85.45995|
|East Asia & Pacif...|     EAP|Access to electri...|EG.ELC.ACCS.ZS|1996| 86.57579|
|East Asia & Pacif...|     EAP|Access to electri...|EG.ELC.ACCS.ZS|1997| 87.74911|
|East Asia & Pacif...|     EAP|Access to electri...|EG.ELC.ACCS.ZS|1998| 88.50378|
|Eas

In [5]:
spark.sql(
    """
    SELECT * FROM database.gender
    """
).show(10)

+--------------------+--------+--------------------+--------------+----+-----+
|           nome_pais|cod_pais|      nome_indicador| cod_indicador| ano|valor|
+--------------------+--------+--------------------+--------------+----+-----+
|Caribbean small s...|     CSS|A woman can apply...|SG.APL.PSPT.EQ|1990| null|
|Caribbean small s...|     CSS|A woman can apply...|SG.APL.PSPT.EQ|1991| null|
|Caribbean small s...|     CSS|A woman can apply...|SG.APL.PSPT.EQ|1992| null|
|Caribbean small s...|     CSS|A woman can apply...|SG.APL.PSPT.EQ|1993| null|
|Caribbean small s...|     CSS|A woman can apply...|SG.APL.PSPT.EQ|1994| null|
|Caribbean small s...|     CSS|A woman can apply...|SG.APL.PSPT.EQ|1995| null|
|Caribbean small s...|     CSS|A woman can apply...|SG.APL.PSPT.EQ|1996| null|
|Caribbean small s...|     CSS|A woman can apply...|SG.APL.PSPT.EQ|1997| null|
|Caribbean small s...|     CSS|A woman can apply...|SG.APL.PSPT.EQ|1998| null|
|Caribbean small s...|     CSS|A woman can apply...|

In [6]:
spark.sql(
    """
    DROP TABLE IF EXISTS database.PublicPayment

    """
)

DataFrame[]

In [7]:
spark.sql(
    """
    CREATE EXTERNAL TABLE database.PublicPayment (
        pais varchar(50),    
        ano int,
        indicador varchar(100),
        valor float
    )
    USING DELTA
    LOCATION 'hdfs://hdfs-nn:9000/Projeto/gold/TabelaPublicPayment'
    """
)

DataFrame[]

In [8]:
spark.sql(
    """
    SELECT * FROM database.PublicPayment
    """
).show()

+----+---+---------+-----+
|pais|ano|indicador|valor|
+----+---+---------+-----+
+----+---+---------+-----+



In [9]:
spark.stop()