In [0]:
from pyspark.sql import DataFrame
from functools import reduce
from pyspark.sql.functions import lit

# ---------------------------
# Step 1: Get all regular views
# ---------------------------
catalog = "4_prod"
schema = "pacs_dlt"

views_query = f"""
SELECT
    table_catalog,
    table_schema,
    table_name
FROM {catalog}.information_schema.views
WHERE table_schema = '{schema}'
ORDER BY table_name
"""

views_df = spark.sql(views_query)

# Show the list of views
print("All regular views in schema:")
views_df.show(truncate=False)

# ---------------------------
# Step 2: Get 'Last Refreshed' for each view
# ---------------------------
last_refreshed_list = []

for row in views_df.collect():  # Use collect() only if number of views is moderate
    table_name = row["table_name"]
    table_catalog = row["table_catalog"]
    table_schema = row["table_schema"]
    
    full_table_name = f"{table_catalog}.{table_schema}.{table_name}"
    
    # DESCRIBE EXTENDED and filter for 'Last Refreshed'
    df_desc = spark.sql(f"DESCRIBE EXTENDED {full_table_name}") \
                   .filter("col_name = 'Last Refreshed'") \
                   .withColumn("view_name", lit(table_name))
    
    last_refreshed_list.append(df_desc)



In [0]:

# Combine all Last Refreshed info into one DataFrame
if last_refreshed_list:
    last_refreshed_df = reduce(DataFrame.unionByName, last_refreshed_list)
    print("Last Refreshed timestamps for views:")
    last_refreshed_df.show(truncate=False)
else:
    print("No 'Last Refreshed' info found for views.")


In [0]:
%sql
DESCRIBE EXTENDED 4_prod.pacs_dlt.stag_requested_accession_nbr