## Catalog Setup Notebook

In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS flights_project.raw;
CREATE SCHEMA IF NOT EXISTS flights_project.bronze;
CREATE SCHEMA IF NOT EXISTS flights_project.silver;
CREATE SCHEMA IF NOT EXISTS flights_project.gold;

In [0]:
import os

# Adding raw data root directory
root_dir = "/dbfs/Volumes/flights_project/raw/raw_volume/raw_data"
if not os.path.exists(root_dir):
  dbutils.fs.mkdirs("/Volumes/flights_project/raw/raw_volume/raw_data")

# Adding a directory for each dataset ingested
for dataset in ['bookings', 'airports', 'flights', 'passengers']:
  dataset_dir = f"/dbfs/Volumes/flights_project/raw/raw_volume/raw_data/{dataset}"
  
  if not os.path.exists(dataset_dir):
    dbutils.fs.mkdirs(f"/Volumes/flights_project/raw/raw_volume/raw_data/{dataset}")

In [0]:
%sql CREATE VOLUME IF NOT EXISTS flights_project.bronze.etl_metadata;

In [0]:
# Adding bronze data root directory
root_dir = "/dbfs/Volumes/flights_project/bronze/etl_metadata/"
if not os.path.exists(root_dir): dbutils.fs.mkdirs("/Volumes/flights_project/bronze/etl_metadata/")

# Adding a directory for each dataset processed in bronze layer
for dataset in ['bookings', 'airports', 'flights', 'passengers']:
  dataset_dir = f"/dbfs/Volumes/flights_project/bronze/etl_metadata/{dataset}"
  
  if not os.path.exists(dataset_dir):
    dbutils.fs.mkdirs(f"/Volumes/flights_project/bronze/etl_metadata/{dataset}")

In [0]:
%sql
CREATE VOLUME IF NOT EXISTS flights_project.raw.raw_volume

### Reset Bronze Layer

In [0]:
def reset_bronze_layer():
    print("Deleting bronze schema data...")

    dbutils.fs.rm("/Volumes/flights_project/bronze/etl_metadata/", True)
    print("Deleted auto logger checkpoints...")


    tables = [table.name for table in spark.catalog.listTables("flights_project.bronze")]
    if tables == []:
        print("No tables detected.")
        return

    for table in tables:
        spark.sql(f"DROP TABLE IF EXISTS flights_project.bronze.{table}")
        print(f"Dropped table: {table}...")

# reset_bronze_layer()

## Reset Gold Layer

In [0]:
def reset_gold_layer():
    print("Deleting gold schema tables...")

    tables = [table.name for table in spark.catalog.listTables("flights_project.gold")]
    if tables == []:
        print("No tables detected.")
        return

    for table in tables:
        spark.sql(f"DROP TABLE IF EXISTS flights_project.gold.{table}")
        print(f"Dropped table: {table}.")

# reset_gold_layer()