### Connect to Gremlin Server

In [1]:
import sys
from pathlib import Path

import nest_asyncio
nest_asyncio.apply()

container_src_path = Path('/app/src/')
local_src_path = Path(Path.cwd(), 'src/')

# see if this src path exists.
# if it does, we are in a container.
# if not, we are in local.
if not container_src_path.exists():
    src_path = local_src_path
else:
    src_path = container_src_path

src_path_str = str(src_path)
if src_path_str not in sys.path:
    sys.path.insert(0, src_path_str)


from gremlin_python import statics
from gremlin_python.process.traversal import T, Direction
from gremlin_python.process.anonymous_traversal import traversal
from gremlin_python.process.graph_traversal import GraphTraversalSource
from gremlin_python.process.graph_traversal import __

from graph.base import g
from ipycytoscape_graph_visualization import visualize_graph

from dotenv import load_dotenv

load_dotenv()

# test connection to gremlin server
g.V().limit(1).toList()

ClientConnectorError: Cannot connect to host btc_janus:8182 ssl:default [Name or service not known]

### Analyze Address `1BBZ`

In [2]:
import networkx as nx

from models.base import SessionLocal
from models.bitcoin_data import Block, Tx, Address, Input, Output
from graph.base import g
from graph_analyze import GraphAnalyzer


analyzer = GraphAnalyzer(g, SessionLocal)

# interesting_addr = '12higDjoCCNXSA95xZMWUdPvXNmkAduhWv'
# interesting_addr = '12cbQLTFMXRnSzktFkuoG3eHoMeFtpTu3S'
interesting_addr = '1BBz9Z15YpELQ4QP5sEKb1SwxkcmPb5TMs'
# interesting_addr = '1KAD5EnzzLtrSo2Da2G4zzD7uZrjk8zRAv'
# interesting_addr = '1DCbY2GYVaAMCBpuBNN5GVg3a47pNK1wdi'

with SessionLocal() as session:
    address = session.query(Address).filter_by(addr=interesting_addr).first()
    
if not address:
    print(f"address {interesting_addr} not found")
    sys.exit(1)

print(f"id of address {address.addr:4}: {address.id}")

my_hist = analyzer.get_address_history(interesting_addr)
graph = analyzer.traversal_to_networkx(my_hist, include_data=True)
# graph = analyzer.traversal_to_networkx(my_hist)

print(my_hist)
print(graph)

# dump graph to gexf file
# addr_hist_graph_path = Path('/', 'app', 'addr_hist_graph.gexf')
# nx.write_gexf(graph, addr_hist_graph_path)

coin_sources = analyzer.get_coin_traces(address.id, 'address', direction='incoming', graph=graph, pretty_labels=True)

for source in coin_sources.values():
    print(f"amount from {source['label']} is {round(source['amount'], 10)}")

from ipycytoscape_graph_visualization import visualize_graph

display(visualize_graph(graph, layout='dagre'))

id of address 1BBz9Z15YpELQ4QP5sEKb1SwxkcmPb5TMs: 504
[['withStrategies', OptionsStrategy], ['withStrategies', OptionsStrategy]][['V'], ['has', 'address_id', 504], ['repeat', [['inE', 'sent'], ['otherV']]], ['emit'], ['path'], ['by', [['elementMap']]], ['by', [['elementMap']]], ['unfold']]
DiGraph with 10 nodes and 10 edges
amount from 187:1:0 15NU (1.0) is 1.0
amount from 183:1:0 13Ht (1.0) is 1.0
amount from 182:1:1 12cb (29.0) is 11.0
amount from 181:1:1 12cb (30.0) is 11.0
amount from 170:1:1 12cb (40.0) is 11.0
amount from 9:0:0 12cb (50.0) is 11.0
amount from 248:1:0 1ByL (10.0) is 10.0
amount from 183:1:1 12cb (28.0) is 10.0
amount from 360:0:0 18SH (50.0) is 50.0


CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'nodeDimensionsIncludeLabels': True, 'rankDir': 'LR'}, cyto…

### Analyze Address `1KAD`

### Apply Manual Proportions and Visualize the Transaction History, then Reset the Proportions

In [4]:
import networkx as nx
from sqlalchemy.orm import joinedload

from models.base import SessionLocal
from models.bitcoin_data import Block, Tx, Address, Input, Output, ManualProportion
from graph.base import g
from graph_populate import PopulateOutputProportionGraph
from graph_analyze import GraphAnalyzer

graph_populator = PopulateOutputProportionGraph(SessionLocal)
analyzer = GraphAnalyzer(g, SessionLocal)

# interesting_addr = '12higDjoCCNXSA95xZMWUdPvXNmkAduhWv'
# interesting_addr = '12cbQLTFMXRnSzktFkuoG3eHoMeFtpTu3S'
# interesting_addr = '1BBz9Z15YpELQ4QP5sEKb1SwxkcmPb5TMs'
interesting_addr = '1KAD5EnzzLtrSo2Da2G4zzD7uZrjk8zRAv'
# interesting_addr = '1DCbY2GYVaAMCBpuBNN5GVg3a47pNK1wdi'


with SessionLocal() as session:
    # apply manual proportion to first input/output pair in second tx of block 546
    tx = session.query(Tx)\
                .options(
                    joinedload(Tx.inputs).joinedload(Input.prev_out),
                    joinedload(Tx.outputs)
                )\
                .filter(Tx.index_in_block == 1, Tx.block_height == 546)\
                .first()
    # create the manual proportion object
    manual_proportions = session.query(ManualProportion)\
                                .filter(ManualProportion.input_id == tx.inputs[0].id)\
                                .first()
    if not manual_proportions:
        session.add_all([
            ManualProportion(
                input_id=tx.inputs[0].id,
                output_id=tx.outputs[0].id,
                proportion=1.0
            ),
            ManualProportion(
                input_id=tx.inputs[1].id,
                output_id=tx.outputs[1].id,
                proportion=1.0
            )
        ])
        session.commit()
    graph_populator.apply_manual_edge_proportions(session, show_progressbar=True)
    address = session.query(Address).filter_by(addr=interesting_addr).first()

if not address:
    print(f"address {interesting_addr} not found")
    sys.exit(1)

print(f"id of address {address.addr:4}: {address.id}")

my_hist = analyzer.get_address_history(interesting_addr)
graph = analyzer.traversal_to_networkx(my_hist, include_data=True)
print(graph)

coin_sources = analyzer.get_coin_traces(address.id, 'address', direction='incoming', graph=graph, pretty_labels=True)

for source in coin_sources.values():
    print(f"amount from {source['label']} is {round(source['amount'], 10)}")

from ipycytoscape_graph_visualization import visualize_graph

display(visualize_graph(graph, layout='dagre'))

with SessionLocal() as session:
    graph_populator.reset_manual_edge_proportions(session, show_progressbar=True)

my_hist = analyzer.get_address_history(interesting_addr)
graph = analyzer.traversal_to_networkx(my_hist, include_data=True)
print(graph)
display(visualize_graph(graph, layout='dagre'))

Applying manual edge proportions:  50%|█████     | 1/2 [00:01<00:01,  1.05s/edge]

id of address 1KAD5EnzzLtrSo2Da2G4zzD7uZrjk8zRAv: 557
DiGraph with 9 nodes and 11 edges
amount from 546:2:0 1KAD (1.0) is 0.96
amount from 546:1:1 1DZT (24.0) is 23.04
amount from 545:1:1 1DCb (24.0) is 23.04
amount from 524:1:0 1DCb (25.0) is 24.0
amount from 286:0:0 1Jhk (50.0) is 24.0
amount from 546:1:0 1KAD (1.0) is 0.96
amount from 545:1:0 1DZT (1.0) is 0.96
amount from 546:2:1 1KAD (24.0) is 23.04





CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'nodeDimensionsIncludeLabels': True, 'rankDir': 'LR'}, cyto…

Resetting edge proportions: 100%|██████████| 1/1 [00:00<00:00, 12.46edge/s]

DiGraph with 9 nodes and 13 edges





CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'nodeDimensionsIncludeLabels': True, 'rankDir': 'LR'}, cyto…

In [21]:
from sqlalchemy import select
from sqlalchemy.orm import joinedload, aliased

from models.base import SessionLocal
from models.bitcoin_data import ManualProportion, Tx, Output, Input

options = (
            joinedload(Tx.inputs)
            .joinedload(Input.prev_out),
            joinedload(Tx.outputs)
        )

with SessionLocal() as session:
    affected_tx_ids = session.query(Tx.id)\
                                 .join(Output, Tx.id == Output.tx_id)\
                                 .join(ManualProportion, Output.id == ManualProportion.output_id)\
                                 .distinct().all()
                                 
    affected_tx_ids = [tx_id for tx_id, in affected_tx_ids]
    # affected_txs = session.query(Tx) \
    #                       .options(*options) \
    #                       .filter(Tx.id.in_(affected_tx_ids)) \
    #                       .limit(2).all()
    
    affected_txs = session.query(Tx) \
                          .options(*options) \
                          .join(Output, Tx.id == Output.tx_id)\
                          .join(ManualProportion, Output.id == ManualProportion.output_id)\
                          .all()

    assert len(affected_txs) == 1

    print(affected_txs[0].inputs[0].prev_out.address_id)

for tx in affected_txs:
    for i in range(max(len(tx.inputs), len(tx.outputs))):
        if i >= len(tx.inputs):
            print(f"input {i}: None -> output {i}: {tx.outputs[i].address_id} ({tx.outputs[i].value})")
        elif i >= len(tx.outputs):
            print(f"input {i}: {tx.inputs[i].prev_out.address_id} ({tx.inputs[i].prev_out.value}) -> output {i}: None")
        else:
            print(f"input {i}: {tx.inputs[i].prev_out.address_id} ({tx.inputs[i].prev_out.value}) -> output {i}: {tx.outputs[i].address_id} ({tx.outputs[i].value})")


555
input 0: 555 (100000000) -> output 0: 557 (100000000)
input 1: 533 (2400000000) -> output 1: 555 (2400000000)


### Trace Forwards from `1Jhk` TODO: Limit depth

In [None]:
import networkx as nx

from models.base import SessionLocal
from models.bitcoin_data import Block, Tx, Address, Input, Output
from graph.base import g
from graph_analyze import GraphAnalyzer


analyzer = GraphAnalyzer(g, SessionLocal)

interesting_addr = '1Jhk2DHosaaZx1E4CbnTGcKM7FC88YHYv9'

with SessionLocal() as session:
    address = session.query(Address)\
                     .filter_by(addr=interesting_addr).first()
    
if not address:
    print(f"address {interesting_addr} not found")
    sys.exit(1)

print(f"id of address {address.addr:4}: {address.id}")

# depth of 15 works
# depth of 30 is too much
# what about 20?
my_hist = analyzer.get_vertex_path(address.id, 'address', depth=16)
graph = analyzer.traversal_to_networkx(my_hist, include_data=True)

print(graph)

# save graph to file in docs/graphs folder
graph_path = Path('/', 'app', 'graph_file_exports', '1jkh_paths.gexf')
nx.write_gexf(graph, graph_path)

# coin_sources = analyzer.get_coin_traces(address.id,
#                                         'address',
#                                         direction='outgoing',
#                                         graph=graph,
#                                         pretty_labels=True)

# for source in list(coin_sources.values())[:5]:
#     print(f"amount from {source['label']} is {round(source['amount'], 10)}")

# from ipycytoscape_graph_visualization import visualize_graph

# display(visualize_graph(graph, layout='dagre'))

### Get JanusGraph ID of `1Jhk`

In [None]:
from gremlin_python.process.traversal import P
from models.base import SessionLocal
from models.bitcoin_data import Address, Output

interesting_addr = '1Jhk2DHosaaZx1E4CbnTGcKM7FC88YHYv9'

with SessionLocal() as session:
    address = session.query(Address)\
                     .filter_by(addr=interesting_addr).first()

# get ids of all outputs that belong to the address 1Jhk...
with SessionLocal() as session:
    outputs = session.query(Output).filter_by(address_id=address.id).all()

output_ids = [output.id for output in outputs]
print(output_ids)

# get janusgraph ids of all vertices that belong to the address 1Jhk...
ids = g.V().has('output_id', P.within(output_ids)).id().toList()

### Visualize using `igraph`

In [None]:
from pathlib import Path
import networkx as nx
import igraph as ig
import dagviz

graph_path = Path('/', 'app', 'graph_file_exports')
graph = nx.read_gexf(graph_path / '1jkh_paths.gexf')


display(visualize_graph(graph, layout='dagre'))

# save svg to file
# dagre = dagviz.render_svg(
#     graph
# )

# with open(graph_path / '1jkh_paths.svg', 'wt') as fs:
#     fs.write(dagre)


# ig.plot(
#     gr,
#     layout=gr.layout("kk"),
#     vertex_size=5,
#     vertex_label_size=5,
#     edge_arrow_size=0.5,
#     edge_label_size=0.5,
#     vertex_label_angle=0.5,
#     vertex_label_dist=0.5,
#     bbox=(1000, 1000),
#     margin=100
# )

# ig.plot(
#     gr,
#     layout=gr.layout('sugiyama', hgap=1),
#     vertex_size=5,
#     vertex_label_size=5,
#     edge_arrow_size=0.5,
#     edge_label_size=0.5,
# )

### Traverse from First Output in Transaction ID `951352`

In [2]:
from models.base import SessionLocal
from models.bitcoin_data import Address, Output, Tx
from graph_analyze import GraphAnalyzer

tx_id = 951352

with SessionLocal() as session:
    output = session.query(Output).filter_by(tx_id=tx_id).first()
    prev_output = session.query(Output)\
                         .join(Output.transaction)\
                         .filter(
                             Tx.block_height==131999,
                             Tx.index_in_block==0,
                             Output.index_in_tx==0).first()

print(output)

analyzer = GraphAnalyzer(g, SessionLocal)
# my_hist = analyzer.get_vertex_history(output.id, 'output', depth=6)
# depth of 2 is too high!
my_hist = analyzer.get_vertex_path(prev_output.id, 'output', depth=0)
graph = analyzer.traversal_to_networkx(my_hist, include_data=True)

print(graph)
display(visualize_graph(graph, layout='dagre'))

<Output(id=1909086)>
DiGraph with 1000 nodes and 999 edges


KeyboardInterrupt: 

### Analyzing an Output

In [None]:
import networkx as nx

from models.base import SessionLocal
from models.bitcoin_data import Block, Tx, Address, Input, Output
from graph.base import g
from graph_analyze import GraphAnalyzer


analyzer = GraphAnalyzer(g, SessionLocal)

interesting_output = 561

my_hist = analyzer.get_output_history(interesting_output)
graph = analyzer.traversal_to_networkx(my_hist, include_data=True)

print(graph)

coin_sources = analyzer.get_coin_traces(interesting_output, 'output', graph, pretty_labels=True)

for source in coin_sources.values():
    print(f"amount from {source['label']} is {source['amount']}")

from ipycytoscape_graph_visualization import visualize_graph

display(visualize_graph(graph, layout='dagre'))

### Traverse Forward

In [None]:
import networkx as nx

from models.base import SessionLocal
from models.bitcoin_data import Block, Tx, Address, Input, Output
from graph.base import g
from graph_analyze import GraphAnalyzer


analyzer = GraphAnalyzer(g, SessionLocal)

# interesting_addr = '12higDjoCCNXSA95xZMWUdPvXNmkAduhWv'
# interesting_addr = '12cbQLTFMXRnSzktFkuoG3eHoMeFtpTu3S'
# interesting_addr = '1BBz9Z15YpELQ4QP5sEKb1SwxkcmPb5TMs'
# interesting_addr = '1KAD5EnzzLtrSo2Da2G4zzD7uZrjk8zRAv'
# interesting_addr = '1DUDsfc23Dv9sPMEk5RsrtfzCw5ofi5sVW'
# interesting_addr = '1DCbY2GYVaAMCBpuBNN5GVg3a47pNK1wdi'
interesting_addr = '13HtsYzne8xVPdGDnmJX8gHgBZerAfJGEf'

with SessionLocal() as session:
    address = session.query(Address).filter_by(addr=interesting_addr).first()
    
if not address:
    print(f"address {interesting_addr} not found")
    sys.exit(1)

print(f"id of address {address.addr:4}: {address.id}")

my_path = analyzer.get_vertex_path(address.id, 'address')
graph = analyzer.traversal_to_networkx(my_path, include_data=True)

print(graph)

coin_traces = analyzer.get_coin_traces(address.id, 'address', 'outgoing', graph, pretty_labels=True)

for source in coin_traces.values():
    print(f"amount from {source['label']} is {source['amount']}")

from ipycytoscape_graph_visualization import visualize_graph

display(visualize_graph(graph, layout='dagre'))

### Analyze Vertex Centrality

In [None]:
from models.base import SessionLocal
from models.bitcoin_data import Block, Tx, Address, Input, Output
from graph.base import g
from graph_analyze import GraphAnalyzer

analyzer = GraphAnalyzer(g, SessionLocal)

degree_centralities = analyzer.highest_degree_centralities('both', 10)

for item in degree_centralities:
    print(f"{item['v']}: {item['degree']}")

#### Visualize a Highly Connected Vertex

In [None]:


import networkx as nx

from models.base import SessionLocal
from models.bitcoin_data import Block, Tx, Address, Input, Output
from graph.base import g
from graph_analyze import GraphAnalyzer


analyzer = GraphAnalyzer(g, SessionLocal)

output_id = 7784

with SessionLocal() as session:
    output = session.query(Output).filter_by(id=output_id).first()
    
if not output:
    print(f"output with ID {output_id} not found")
    sys.exit(1)
    
my_path = analyzer.get_vertex_subgraph(output.id, 'output', depth=4)
graph = analyzer.traversal_to_networkx(my_path, include_data=True)

print(graph)

# coin_traces = analyzer.get_coin_traces(output.id, 'output', 'outgoing', graph, pretty_labels=True)

# for source in coin_traces.values():
#     print(f"amount from {source['label']} is {source['amount']}")

from ipycytoscape_graph_visualization import visualize_graph

display(visualize_graph(graph, layout='dagre'))