### Initialize Environment and Populate Blockchain Database

In [1]:
import os
import sys
from pathlib import Path

container_src_path = Path('/app/src/')
local_src_path = Path(Path.cwd(), 'src/')

# see if this src path exists.
# if it does, we are in a container.
# if not, we are in local.
if not container_src_path.exists():
    src_path = local_src_path
else:
    src_path = container_src_path

src_path_str = str(src_path)
if src_path_str not in sys.path:
    sys.path.insert(0, src_path_str)

from dotenv import load_dotenv

from blockchain_data_provider import (
    PersistentBlockchainAPIData,
    BlockchainAPIJSON
)

from models.base import SessionLocal
from models.bitcoin_data import Block

# see if database tables exist. if not, create them
from models import base
from sqlalchemy import inspect
from sqlalchemy.sql import text

# block_data_url = "https://blockchain.info/rawblock/{height}?format=json"
block_data_url = "http://localhost:8500/block/{height}"

DELETE_DB = False
# DELETE_DB = True

inspector = inspect(base.engine)

if DELETE_DB:
# wipe the database
    with SessionLocal() as session:
        if inspector.has_table("inputs"):
            session.execute(text('DELETE FROM inputs'))
        if inspector.has_table("outputs"):
            session.execute(text('DELETE FROM outputs'))
        if inspector.has_table("transactions"):
            session.execute(text('DELETE FROM transactions'))
        if inspector.has_table("blocks"):
            session.execute(text('DELETE FROM blocks'))
        if inspector.has_table("addresses"):
            session.execute(text('DELETE FROM addresses'))
        session.commit()

    # if Path(Path.cwd(), DATABASE_URL).exists():
    #     os.remove(Path(Path.cwd(), DATABASE_URL))
        
    print("Database wiped.")

if not inspector.has_table("blocks"):
    print("No data found. Database created.")

load_dotenv()


False

In [None]:
# max_height = 100_000
max_height = 100_000

with SessionLocal() as session:
    highest_block = session.query(Block).order_by(Block.height.desc()).first()

if highest_block is not None:
    print(f"highest block: {highest_block.height}")

slow_provider = BlockchainAPIJSON(block_endpoint=block_data_url)
provider = PersistentBlockchainAPIData(data_provider=slow_provider)
with SessionLocal() as session:
    provider.populate_blocks(session, range(0,max_height+1), show_progressbar=True)

with SessionLocal() as session:
    # ensure we can get block 170
    assert len(provider.get_block(session, 170).transactions) == 2
    assert len(provider.get_block(session, 546).transactions) == 4

### Get some Data

In [None]:
from blockchain_data_provider import PersistentBlockchainAPIData

slow_provider = BlockchainAPIJSON(block_endpoint=block_data_url)
provider = PersistentBlockchainAPIData(data_provider=slow_provider)

# provider.latest_parsed_block = 2811
# with SessionLocal() as session:
#     provider.populate_block(session, 2812)

data_provider = PersistentBlockchainAPIData()
with SessionLocal() as session:
    block_170 = data_provider.get_block(session, 170)

print(f"transaction count: {len(block_170.transactions)}")

tx_12cb = block_170.transactions[1]
print(f"address of first output: {tx_12cb.outputs[0].address.addr}")
assert provider.get_output(session, 170).value == 5000000000

with SessionLocal() as session:
    block_170 = data_provider.get_block(session, 0)

assert block_170.height == 0
assert block_170.transactions[0].outputs[0].id == 0, block_170.transactions[0].outputs[0].id

In [None]:
from sqlalchemy.orm import joinedload

from models.bitcoin_data import Tx, Input, Output

tx_index = 3003916113328251

with SessionLocal() as session:
    tx_obj = session.query(Tx).options(
            joinedload(Tx.inputs)
            .joinedload(Input.prev_out),
            joinedload(Tx.outputs)
            .joinedload(Output.address)    
        ).filter_by(index=tx_index).first()

for out in tx_obj.outputs:
    print(out.address_addr)

### Count Transactions

In [None]:
from sqlalchemy import func
from sqlalchemy.orm import joinedload

from models.bitcoin_data import Tx, Input, Output

tx_index = 3003916113328251

with SessionLocal() as session:
    tx_count = session.query(func.count(Tx.id)).scalar()

print(tx_count)
# this takes up 292MB of memory
# so 500M transactions would take up the following
mb_per_tx = 292 / tx_count
print(f"memory usage: {mb_per_tx * 500_000_000}MB")
# So, approximately 659 GB of storage space would be required for 500 million transactions.

In [2]:
from tqdm import tqdm
from blockchain_data_provider import PersistentBlockchainAPIData

provider = PersistentBlockchainAPIData()

with base.SessionLocal() as session:
    for tx in provider.get_txs_for_blocks(session, min_height=0, max_height=1000):
        # Process each transaction
        print(tx)


<Tx(hash=4a5e1e4baab89f3a32518a88c31bc87f618f76673e2cc77ab2127b7afdeda33b, index=2098408272645986)>
<Tx(hash=0e3e2357e806b6cdb1f70b54c3a3a17b6714ee1f0e68bebb44a74b1efd512098, index=5352466621385076)>
<Tx(hash=9b0fc92260312ce44e74ef369f5c66bbb85848f2eddd5a7a1cde251e54ccfdd5, index=7529153008223419)>
<Tx(hash=999e1c837c76a1b7fbb7e57baf87b309960f5ffefbf2a9b95dd890602272f644, index=2426408559972891)>
<Tx(hash=df2b060fa2e5e9c8ed5eaf6a45c13753ec8c63282b2688322eba40cd98ea067a, index=4293443976800279)>
<Tx(hash=63522845d294ee9b0188ae5cac91bf389a0c3723f084ca1025e7d9cdfe481ce1, index=7920371199753020)>
<Tx(hash=20251a76e64e920e58291a30d4b212939aae976baca40e70818ceaa596fb9d37, index=1956853451111761)>
<Tx(hash=8aa673bc752f2851fd645d6a0a92917e967083007d9c1684f9423b100540673f, index=2230806024161128)>
<Tx(hash=a6f7f1c0dad0f2eb6b13c4f33de664b1b0e9f22efad5994a6d5b6086d85e85e3, index=8005182764928011)>
<Tx(hash=0437cd7f8525ceed2324359c2d0ba26006d92d856a9c20fa0241106ee5a597c9, index=7092901136679432)>
