In [None]:
# NOTEBOOK CELL 1 - Setup and Imports
import logging
import os
import traceback
from typing import Dict, List, Any
import sqlalchemy
from sqlalchemy import create_engine

# Setup logging for notebook
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Your imports here
from ..pull_raw.utils import get_tables_to_sync
from data_integration.utils.utils import get_abspath
from data_integration.utils.worker.dune_extractor import DuneExtractor
from data_integration.utils.worker.dune_to_pg_worker import DuneToPgWorker
from data_integration.utils.worker.pg_loader import PgLoader
from data_integration.arguments import FULL_REFRESH, INCREMENTAL_VALUE

In [None]:
# NOTEBOOK CELL 2 - Configuration
TARGET_SCHEMA_NAME = 'bitcoin'

# Database connection (replace with your connection details)
DATABASE_URL = "postgresql://username:password@localhost:5432/database_name"
engine = create_engine(DATABASE_URL)

# Your table metadata configuration
table_meta_data = {
    # Your table configuration here
}

In [None]:
# NOTEBOOK CELL 3 - Initialize Components
# Initialize Dune extractor
dune_extractor = DuneExtractor(api_key=os.environ.get('DUNE_API_KEY'))

print("✅ Dune extractor initialized")
"""

"""
# NOTEBOOK CELL 4 - Load Table Configuration
# Load tables to sync
tables_to_sync = get_tables_to_sync(table_meta_data)
print(f"Found {len(tables_to_sync)} tables to sync:")
for table in tables_to_sync:
    print(f"  - {table.get('name')} (sync_type: {table.get('sync_type')})")

In [None]:
# NOTEBOOK CELL 5 - Process Each Table
with engine.connect() as connection:
    # Initialize worker
    dune_to_pg_worker = DuneToPgWorker(
        dune_extractor=dune_extractor,
        target_schema_name=TARGET_SCHEMA_NAME,
        target_table="",  # Will be updated per table
        target_con=connection,
    )
    
    # Process tables
    for i, table in enumerate(tables_to_sync, 1):
        table_name = table.get('name')
        query_id = table.get('id')
        sync_type = table.get('sync_type')
        source_unique_keys = table.get('source_unique_keys', ['id'])
        incremental_column = table.get('incremental_column', 'updated_at')
        
        print(f"\n[{i}/{len(tables_to_sync)}] Processing: {table_name}")
        print(f"Query ID: {query_id}, Sync Type: {sync_type}")
        
        # Update target table for worker
        dune_to_pg_worker.target_table = table_name
        
        try:
            if sync_type == 'full_refresh':
                print("🔄 Running full refresh...")
                dune_to_pg_worker.run(
                    query_id=query_id,
                    query_parameters=table.get('query_parameters'),
                    source_unique_keys=source_unique_keys,
                    load_strategy=FULL_REFRESH,
                    max_wait_time=300
                )
                
            elif sync_type is None or sync_type == 'sync_incremental':
                print("🔄 Running incremental sync...")
                
                # Get last incremental value
                pg_loader = PgLoader(
                    connection=connection,
                    schema_name=TARGET_SCHEMA_NAME,
                    table_name=table_name
                )
                last_value = pg_loader.get_max_value(incremental_column)
                
                if last_value is None:
                    print("No previous data found, running full refresh...")
                    load_strategy = FULL_REFRESH
                    query_parameters = None
                else:
                    print(f"Last incremental value: {last_value}")
                    load_strategy = INCREMENTAL_VALUE
                    query_parameters = str(last_value)
                
                dune_to_pg_worker.run(
                    query_id=query_id,
                    query_parameters=query_parameters,
                    source_unique_keys=source_unique_keys,
                    incremental_column=incremental_column,
                    incremental_value=last_value,
                    load_strategy=load_strategy,
                    max_wait_time=300
                )
            else:
                raise ValueError(f'Invalid sync_type "{sync_type}"')
                
            print(f"✅ Successfully processed: {table_name}")
            
        except Exception as e:
            print(f"❌ Failed to process {table_name}: {e}")
            print(traceback.format_exc())
            # Continue with next table
            continue
            
print("\n🎉 All tables processed!")

In [None]:
# NOTEBOOK CELL 6 - Verify Results (Optional)
# Check record counts for each table
with engine.connect() as connection:
    print("\n📊 Table Record Counts:")
    for table in tables_to_sync:
        table_name = table.get('name')
        try:
            pg_loader = PgLoader(
                connection=connection,
                schema_name=TARGET_SCHEMA_NAME,
                table_name=table_name
            )
            count = pg_loader.get_record_count()
            print(f"  {table_name}: {count:,} records")
        except Exception as e:
            print(f"  {table_name}: Error getting count - {e}")