In [1]:
from pyspark.sql import SparkSession
from pyspark.sql import Row

warehouse_location = 'hdfs://hdfs-nn:9000/warehouse'

spark = SparkSession \
    .builder \
    .appName("American Crimes") \
    .config("spark.sql.warehouse.dir", warehouse_location) \
    .config("hive.metastore.uris", "thrift://hive-metastore:9083") \
    .enableHiveSupport() \
    .getOrCreate()

In [2]:
spark.sql(
    """
    SHOW DATABASES
    """
).show()

+--------------+
|     namespace|
+--------------+
|americancrimes|
|       default|
|       economy|
|      politics|
+--------------+



In [3]:
spark.sql(
    """
    SHOW TABLES FROM americancrimes
    """
).show()

+--------------+----------------+-----------+
|      database|       tableName|isTemporary|
+--------------+----------------+-----------+
|americancrimes|civillian_deaths|      false|
|americancrimes|    crime_by_age|      false|
|americancrimes|   crime_by_race|      false|
|americancrimes|          crimes|      false|
+--------------+----------------+-----------+



In [4]:
spark.sql(
    """
    DROP TABLE IF EXISTS americancrimes.demography
    """
)

spark.sql(
    """
    CREATE EXTERNAL TABLE americancrimes.demography (
        state_abb STRING,
        state STRING,
        total_pop INT,
        white INT,
        black INT,
        native INT,
        two_pop INT,
        asian_or_pacific_islander INT
    )
    STORED AS PARQUET
    PARTITIONED BY (
        year DATE
        
    )
    LOCATION 'hdfs://hdfs-nn:9000/warehouse/americancrimes.db/demography/'
    """
)


DataFrame[]

In [5]:
spark.catalog.recoverPartitions("americancrimes.demography")

spark.sql(
    """
    SELECT *
    FROM americancrimes.demography
    """
).show()

+---------+-----+---------+-----+-----+------+-------+-------------------------+----+
|state_abb|state|total_pop|white|black|native|two_pop|asian_or_pacific_islander|year|
+---------+-----+---------+-----+-----+------+-------+-------------------------+----+
+---------+-----+---------+-----+-----+------+-------+-------------------------+----+

