In [7]:
import os
import re
import time
from collections import defaultdict

sql_base_path = './sql'    

def parse_sql_file(file_path):
    try:
        with open(file_path, 'r') as f:
            content = f.read()
        dependencies = re.findall(r'FROM\s+(\w+)', content, re.IGNORECASE)
        return set(dependencies)
    except FileNotFoundError:
        print(f"Error: SQL file {file_path} not found.")
        return set()

def map_dependencies(sql_folders):
    dependencies = defaultdict(set)
    all_sql_files = []
    
    for folder in sql_folders:
        folder_path = os.path.join(sql_base_path, folder)
        if not os.path.exists(folder_path):
            print(f"Error: Folder {folder_path} not found.")
            continue
        
        for file in os.listdir(folder_path):
            if file.endswith('.sql'):
                file_path = os.path.join(folder_path, file)
                file_basename = os.path.basename(file_path).replace('.sql', '')
                deps = parse_sql_file(file_path)
                dependencies[file_basename] = deps
                all_sql_files.append((file_basename, file_path))
    
    return dependencies, all_sql_files

def resolve_execution_order(dependencies):
    resolved_order = []
    seen = set()

    def visit(node):
        if node not in seen:
            seen.add(node)
            for dep in dependencies[node]:
                visit(dep)
            resolved_order.append(node)

    for node in list(dependencies.keys()): 
        visit(node)

    return resolved_order

def run_sql_files_in_order(resolved_order, all_sql_files):
    file_dict = {file[0]: file[1] for file in all_sql_files}
    print("Execution Order:")
    for sql_file in resolved_order:
        print(f"Running {sql_file}.sql")
        time.sleep(2) 

sql_folders = ['source', 'tmp', 'final']

dependencies, all_sql_files = map_dependencies(sql_folders)
resolved_order = resolve_execution_order(dependencies)

print("Dependencies for each SQL file:")
for file, deps in dependencies.items():
    print(f"{file}: depends on {', '.join(deps) if deps else 'no dependencies'}")
    
run_sql_files_in_order(resolved_order, all_sql_files)


Dependencies for each SQL file:
agents: depends on no dependencies
enquiries_per_day: depends on no dependencies
listings_features: depends on no dependencies
listing_performances: depends on performances
listing_photos_quality: depends on no dependencies
page_clicks_per_day: depends on clicks
listings_performances: depends on no dependencies
performances: depends on no dependencies
clicks: depends on no dependencies
Execution Order:
Running agents.sql
Running enquiries_per_day.sql
Running listings_features.sql
Running performances.sql
Running listing_performances.sql
Running listing_photos_quality.sql
Running clicks.sql
Running page_clicks_per_day.sql
Running listings_performances.sql
