In [1]:
# NOTEBOOK CELL 1 - Setup and Imports
import logging
import os
import traceback
from typing import Dict, List, Any
import sqlalchemy
from sqlalchemy import create_engine
import psycopg2
import pandas as pd
# Setup logging for notebook
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Your imports here
from pull_raw.utils import get_tables_to_sync
from utils.utils import get_abspath
from utils.worker.dune_extractor import DuneExtractor
from utils.worker.dune_to_pg_worker import DuneToPgWorker
from utils.worker.pg_loader import PgLoader
#from arguments import FULL_REFRESH, INCREMENTAL_VALUE

In [2]:
# NOTEBOOK CELL 2 - Configuration
TARGET_SCHEMA_NAME = 'bitcoin'

# Database connection (replace with your connection details)
DATABASE_URL = "postgresql://username:password@localhost:5432/database_name"
engine = create_engine(DATABASE_URL)

In [None]:
# NOTEBOOK CELL 3 - Initialize Components

tables_to_sync = get_tables_to_sync()


In [7]:
for index, row in tables_to_sync.iterrows():
    print(index, row['name'], row['id'], row['target_table'], row['p_key'])

0 bitcoin_inputs 2177353 input tx_id
1 bitcoin_output 2177447 output tx_id
2 prices_usd 5816212 price_usd date
3 bitcoin_transactions 2177280 transactions id
4 bitcoin_block 2177266 block hash


In [None]:
# Initialize Dune extractor
dune_extractor = DuneExtractor(api_key="Rq8roupIKKIZ9Iw5lFqNuqsgUAywgtvp")

# NOTEBOOK CELL 5 - Process Each Table
for index, row in tables_to_sync.iterrows():
    start_job(row['name'])
    with engine.connect() as connection:
        # Initialize worker
        dune_to_pg_worker = DuneToPgWorker(
            dune_extractor=dune_extractor,
            target_schema_name=TARGET_SCHEMA_NAME,
            target_table="",  # Will be updated per table
            target_con=connection,
        )
        
        # Process tables
        for i, table in enumerate(tables_to_sync, 1):
            table_name = row['target_table']
            query_id = row['id']
            source_unique_keys = row['p_key']
            
            print(f"\n[{i}/{len(tables_to_sync)}] Processing: {table_name}")
            print(f"Query ID: {query_id}, Sync Type: {sync_type}")
            
            # Update target table for worker
            dune_to_pg_worker.target_table = table_name
            
            try:
                if sync_type == 'full_refresh':
                    print("🔄 Running full refresh...")
                    dune_to_pg_worker.run(
                        query_id=query_id,
                        query_parameters=table.get('query_parameters'),
                        source_unique_keys=source_unique_keys,
                        load_strategy=FULL_REFRESH,
                        max_wait_time=300
                    )
                    
                elif sync_type is None or sync_type == 'sync_incremental':
                    print("🔄 Running incremental sync...")
                    
                    if last_value is None:
                        print("No previous data found, running full refresh...")
                        load_strategy = FULL_REFRESH
                        query_parameters = None
                    else:
                        print(f"Last incremental value: {last_value}")
                        load_strategy = INCREMENTAL_VALUE
                        query_parameters = str(last_value)
                    
                    dune_to_pg_worker.run(
                        query_id=query_id,
                        query_parameters=query_parameters,
                        source_unique_keys=source_unique_keys,
                        incremental_column=incremental_column,
                        incremental_value=last_value,
                        load_strategy=load_strategy,
                        max_wait_time=300
                    )
                else:
                    raise ValueError(f'Invalid sync_type "{sync_type}"')
                    
                print(f"✅ Successfully processed: {table_name}")
                end_job(row['name'])
            except Exception as e:
                print(f"❌ Failed to process {table_name}: {e}")
                print(traceback.format_exc())
                # Continue with next table
                continue
                
    print("\n🎉 All tables processed!")