In [None]:
from pyspark.sql import SparkSession
from pyspark.sql import Row
from delta import *

In [None]:
# warehouse_location points to the default location for managed databases and tables
warehouse_location = 'hdfs://hdfs-nn:9000/warehouse/project'

builder = SparkSession \
    .builder \
    .appName("Python Spark SQL Hive integration example") \
    .config("spark.sql.warehouse.dir", warehouse_location) \
    .config("hive.metastore.uris", "thrift://hive-metastore:9083") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .config("spark.jars.packages", "io.delta:delta-core_2.12:1.0.0") \
    .enableHiveSupport() 
spark = spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [None]:
spark.sql(
    """
    DROP TABLE silver_salaries.deltalake_table
    """
)

In [None]:
spark.sql(
    """
    DROP DATABASE soccer_salaries
    """
)

In [None]:
spark.sql(
    """
    CREATE DATABASE silver_salaries LOCATION 'hdfs://hdfs-nn:9000/warehouse/project/silver_salaries.db/'
    """
)

In [None]:
spark.sql(
    """
    SHOW DATABASES
    """
).show()

In [None]:
spark.sql(
    """
    SHOW TABLES FROM silver_salaries
    """
).show()

In [None]:
spark.sql(
    """
    CREATE EXTERNAL TABLE silver_salaries.deltalake_table (
        Player CHAR(255),
        Weekly_Salary INT,
        Position CHAR(100),
        Age INT,
        Status CHAR(10),
        18_19_Salary INT,
        19_20_Salary INT,
        20_21_Salary INT,
        21_22_Salary INT,
        22_23_Salary INT,
        23_24_Salary INT,
        24_25_Salary INT,
        Club CHAR(100),
        League CHAR(100)
    )
    
    USING DELTA
    PARTITIONED BY (
         Length INT
    )
    
    LOCATION 'hdfs://hdfs-nn:9000/warehouse/project/silver_salaries.db/deltalake_table/'
    """
)

In [None]:
spark.sql(
    """
    SELECT *
    FROM silver_salaries.deltalake_table
    """
).show()

In [None]:
spark.sql(
    """
    DESCRIBE TABLE FORMATTED silver_salaries.deltalake_table
    """
).toPandas()

In [None]:
spark.sql(
    """
    SELECT * FROM silver_salaries.deltalake_table
    """
).show()

In [None]:
spark.stop()