<a href="https://colab.research.google.com/github/ShikharV010/gist_daily_runs/blob/main/Stored_Proc_Load.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install required packages
!pip install psycopg2-binary pandas

import psycopg2
import pandas as pd
import time
from datetime import datetime

# ---------------------------------------------------------
# PostgreSQL connection parameters
# ---------------------------------------------------------
pg_params = {
    'host': 'gw-rds-prod.celzx4qnlkfp.us-east-1.rds.amazonaws.com',
    'database': 'gw_prod',
    'user': 'airbyte_user',
    'password': 'airbyte_user_password',
    'port': '5432'
}

# ---------------------------------------------------------
# Stored procedures to EXCLUDE from execution
# ---------------------------------------------------------
EXCLUDED_PROCEDURES = {
    'gist.update_gtm_master_allcampaigns'
}

# ---------------------------------------------------------
# Materialized views to EXCLUDE from refresh
# ---------------------------------------------------------
EXCLUDED_MATVIEWS = {
    'gist.matv_writerallocation_contractedapproved_materialized',
    'gist.matv_gist_pageperformance'
}

# ---------------------------------------------------------
# Create audit table if it does not exist
# ---------------------------------------------------------
def create_audit_table():
    """Create the audit table if it doesn't exist"""
    conn = None
    cur = None
    try:
        conn = psycopg2.connect(**pg_params)
        cur = conn.cursor()

        cur.execute('''
            CREATE TABLE IF NOT EXISTS gist.audit_automated_procsandviews (
                id SERIAL PRIMARY KEY,
                object_name VARCHAR(255),
                object_type VARCHAR(50),
                start_time TIMESTAMP,
                end_time TIMESTAMP,
                duration_seconds NUMERIC(10,2),
                status VARCHAR(50),
                error_message TEXT
            )
        ''')

        conn.commit()
        print("Audit table created or already exists.")

    except Exception as e:
        print(f"Error creating audit table: {e}")
    finally:
        if cur:
            cur.close()
        if conn:
            conn.close()

# ---------------------------------------------------------
# Fetch procedures and materialized views from metadata view
# ---------------------------------------------------------
def get_procedures_and_matviews():
    """Get all stored procedures and materialized views from the view"""
    conn = None
    cur = None
    try:
        conn = psycopg2.connect(**pg_params)
        cur = conn.cursor()

        query = """
            SELECT name, object_type
            FROM gist.gist_procsandviews
            ORDER BY object_type, name
        """
        cur.execute(query)
        results = cur.fetchall()

        procedures = []
        matviews = []

        for name, obj_type in results:
            if obj_type == 'PROCEDURE':
                procedures.append(name)
            elif obj_type == 'MATERIALIZED VIEW':
                matviews.append(name)

        return procedures, matviews

    except Exception as e:
        print(f"Error getting objects from view: {e}")
        return [], []
    finally:
        if cur:
            cur.close()
        if conn:
            conn.close()

# ---------------------------------------------------------
# Execute a stored procedure with audit logging
# ---------------------------------------------------------
def execute_procedure(proc_name):
    """Execute a stored procedure and log the results"""
    conn = None
    cur = None

    full_proc_name = proc_name if proc_name.startswith('gist.') else f"gist.{proc_name}"

    try:
        conn = psycopg2.connect(**pg_params)
        conn.autocommit = True
        cur = conn.cursor()

        start_time = datetime.now()
        print(f"Executing {full_proc_name}() at {start_time}")

        cur.execute(f"CALL {full_proc_name}()")

        end_time = datetime.now()
        duration = (end_time - start_time).total_seconds()

        cur.execute('''
            INSERT INTO gist.audit_automated_procsandviews
            (object_name, object_type, start_time, end_time, duration_seconds, status, error_message)
            VALUES (%s, %s, %s, %s, %s, %s, %s)
        ''', (full_proc_name, 'PROCEDURE', start_time, end_time, duration, 'SUCCESS', None))

        print(f"Completed {full_proc_name}() in {duration:.2f} seconds")

    except Exception as e:
        error_message = str(e)
        end_time = datetime.now()
        duration = (end_time - start_time).total_seconds() if 'start_time' in locals() else 0

        if conn and not conn.closed:
            error_cur = conn.cursor()
            error_cur.execute('''
                INSERT INTO gist.audit_automated_procsandviews
                (object_name, object_type, start_time, end_time, duration_seconds, status, error_message)
                VALUES (%s, %s, %s, %s, %s, %s, %s)
            ''', (full_proc_name, 'PROCEDURE',
                  start_time if 'start_time' in locals() else None,
                  end_time, duration, 'ERROR', error_message))
            error_cur.close()

        print(f"Error executing {full_proc_name}: {error_message}")

    finally:
        if cur:
            cur.close()
        if conn:
            conn.close()

# ---------------------------------------------------------
# Refresh a materialized view with audit logging
# ---------------------------------------------------------
def refresh_matview(matview_name):
    """Refresh a materialized view and log the results"""
    conn = None
    cur = None

    full_matview_name = matview_name if matview_name.startswith('gist.') else f"gist.{matview_name}"

    try:
        conn = psycopg2.connect(**pg_params)
        conn.autocommit = True
        cur = conn.cursor()

        start_time = datetime.now()
        print(f"Refreshing {full_matview_name} at {start_time}")

        cur.execute(f"REFRESH MATERIALIZED VIEW {full_matview_name}")

        end_time = datetime.now()
        duration = (end_time - start_time).total_seconds()

        cur.execute('''
            INSERT INTO gist.audit_automated_procsandviews
            (object_name, object_type, start_time, end_time, duration_seconds, status, error_message)
            VALUES (%s, %s, %s, %s, %s, %s, %s)
        ''', (full_matview_name, 'MATERIALIZED VIEW',
              start_time, end_time, duration, 'SUCCESS', None))

        print(f"Refreshed {full_matview_name} in {duration:.2f} seconds")

    except Exception as e:
        error_message = str(e)
        end_time = datetime.now()
        duration = (end_time - start_time).total_seconds() if 'start_time' in locals() else 0

        if conn and not conn.closed:
            error_cur = conn.cursor()
            error_cur.execute('''
                INSERT INTO gist.audit_automated_procsandviews
                (object_name, object_type, start_time, end_time, duration_seconds, status, error_message)
                VALUES (%s, %s, %s, %s, %s, %s, %s)
            ''', (full_matview_name, 'MATERIALIZED VIEW',
                  start_time if 'start_time' in locals() else None,
                  end_time, duration, 'ERROR', error_message))
            error_cur.close()

        print(f"Error refreshing {full_matview_name}: {error_message}")

    finally:
        if cur:
            cur.close()
        if conn:
            conn.close()

# ---------------------------------------------------------
# Main runner
# ---------------------------------------------------------
def run_all_objects():
    """Run all stored procedures and refresh materialized views (with exclusions)"""
    print(f"Starting execution at {datetime.now()}")

    create_audit_table()

    procedures, matviews = get_procedures_and_matviews()
    print(f"Found {len(procedures)} procedures and {len(matviews)} materialized views")

    # Execute procedures first (excluding specific ones)
    print("\n--- EXECUTING STORED PROCEDURES ---")
    for proc in procedures:
        full_name = proc if proc.startswith('gist.') else f"gist.{proc}"

        if full_name in EXCLUDED_PROCEDURES:
            print(f"Skipping procedure {full_name}()")
            continue

        execute_procedure(proc)
        time.sleep(10)

    # Refresh materialized views (excluding specific ones)
    print("\n--- REFRESHING MATERIALIZED VIEWS ---")
    for matview in matviews:
        full_name = matview if matview.startswith('gist.') else f"gist.{matview}"

        if full_name in EXCLUDED_MATVIEWS:
            print(f"Skipping refresh for {full_name}")
            continue

        refresh_matview(matview)
        time.sleep(5)

    print(f"Completed all executions at {datetime.now()}")

# ---------------------------------------------------------
# Execute script
# ---------------------------------------------------------
run_all_objects()


Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (4.9 kB)
Downloading psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (4.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.2/4.2 MB[0m [31m42.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: psycopg2-binary
Successfully installed psycopg2-binary-2.9.11
Starting execution at 2026-01-13 15:05:18.981901
Audit table created or already exists.
Found 18 procedures and 19 materialized views

--- EXECUTING STORED PROCEDURES ---
Executing gist.debug_test_business_health() at 2026-01-13 15:05:19.958360
Completed gist.debug_test_business_health() in 0.11 seconds
Executing gist.sync_airbyte_table_v2() at 2026-01-13 15:05:30.396869
Error executing gist.sync_airbyte_table_v2: procedure gist.sync_airbyte_table_v2() does not exist
LINE 1: CALL gist.sync_airbyte_table_v2()
             ^
HINT:  No p