In [0]:
%run ./01-config

In [0]:
landing_zone = base_dir_data + "/raw"
test_data_dir = base_dir_data + "/test_data"

def load_date_lookup(catalog, db_name):
    print(f"Loading date_lookup table...", end='')        
    spark.sql(f"""INSERT OVERWRITE TABLE {catalog}.{db_name}.date_lookup 
            SELECT date, week, year, month, dayofweek, dayofmonth, dayofyear, week_part 
            FROM json.`{test_data_dir}/6-date-lookup.json/`""")
    print("Done")


def load_history(catalog, db_name):
    import time
    start = int(time.time())
    print(f"\nStarting historical data load ...")
    load_date_lookup(catalog, db_name)
    print(f"Historical data load completed in {int(time.time()) - start} seconds")


def assert_count(catalog, db_name, table_name, expected_count):
    print(f"Validating record counts in {table_name}...", end='')
    actual_count = spark.read.table(f"{catalog}.{db_name}.{table_name}").count()
    assert actual_count == expected_count, f"Expected {expected_count:,} records, found {actual_count:,} in {table_name}" 
    print(f"Found {actual_count:,} / Expected {expected_count:,} records: Success")
        

def validate(catalog, db_name):
    import time
    start = int(time.time())
    print(f"\nStarting historical data load validation...")
    assert_count(catalog, db_name, "date_lookup", 365)
    print(f"Historical data load validation completed in {int(time.time()) - start} seconds")
