In [0]:
import json
import re
import sys
from pyspark.sql.utils import AnalysisException

# Step 1: Get modified files from GitHub Actions
dbutils.widgets.text("modified_files", "[]")  # Default to empty list
modified_files = json.loads(dbutils.widgets.get("modified_files"))

if not modified_files:
    print("‚úÖ No modified files to validate.")
    sys.exit(0)  # Exit successfully if there are no changes

# Step 2: Get the Databricks username
try:
    username = spark.sql("SELECT current_user()").collect()[0][0]
except AnalysisException:
    print("‚ùå Error: Unable to fetch username.")
    sys.exit(1)  # Fail the job

# Step 3: Get the repo name dynamically
repo_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()
repo_name = repo_path.split("/")[3]  # Extract repo name

# Step 4: Remove .ipynb extension and construct full paths
modified_files = [file.replace(".ipynb", "") for file in modified_files]  # Remove .ipynb extension
full_paths = [f"/Workspace/Users/{username}/{repo_name}/{file}" for file in modified_files]
print("üîπ Full Paths to Validate:", full_paths)

# Step 5: Function to validate SQL scripts
def validate_sql_script(sql_content):
    """
    Validates if all columns in a CREATE TABLE statement have comments.
    Returns True if valid, False otherwise.
    """
    create_table_pattern = re.compile(r"CREATE\s+TABLE\s+[\w.]+\s*\((.*?)\)", re.IGNORECASE | re.DOTALL)
    column_pattern = re.compile(r"(\w+)\s+\w+(\s+COMMENT\s+'[^']+')?", re.IGNORECASE)

    match = create_table_pattern.search(sql_content)
    if not match:
        print("‚ö†Ô∏è No CREATE TABLE statement found in this file.")
        return True  # Ignore files that don't create tables

    column_definitions = match.group(1)
    columns = column_pattern.findall(column_definitions)

    missing_comments = [col[0] for col in columns if not col[1]]
    if missing_comments:
        print(f"‚ùå Missing comments for columns: {missing_comments}")
        return False  # Fail validation
    return True

# Step 6: Validate each modified file
validation_failed = False

for file_path in full_paths:
    try:
        # Convert workspace path to DBFS path
        dbfs_path = f"dbfs:/mnt/workspace{file_path.replace('/Workspace', '')}"

        # Read the notebook content using dbutils
        sql_content = dbutils.fs.head(dbfs_path, 100000)  # Read first 100 KB
        
        if not validate_sql_script(sql_content):
            validation_failed = True
    except Exception as e:
        print(f"‚ùå Error reading file {file_path}: {e}")
        validation_failed = True  # Fail validation if file cannot be read

# Step 7: Exit with failure if validation fails
if validation_failed:
    print("‚ùå Validation failed! Some tables have missing column comments.")
    sys.exit(1)  # Fail the Databricks job (which fails GitHub Actions)
else:
    print("‚úÖ Validation passed! All tables have proper column comments.")
    sys.exit(0)  # Pass the job
