# Ground Truth Collection
In this notebook, we show the code used to collect ground truth by opening and closing channels with selected LN nodes.
1. Creating List of Target Nodes
2. Collecting Ground Truth

In [1]:
import sys
sys.path.append("..")

from utils import read_json, set_mapping, on_chain_heuristics_list, write_json

# input files
from utils import heuristics_files, inactive_nodes_file, funding_address_entity_file, settlement_address_entity_file, settlement_txs_file

# output files
from utils import target_nodes_file



# 1. Creating List of Target Nodes
In this section, we use the linking results to select the target nodes for our ground truth collection.

#### Inputs (made available):
- `heuristics_files`
- `inactive_nodes_file`
- `funding_address_entity_file`
- `settlement_address_entity_file`
- `settlement_txs_file`

#### Outputs (made available):
- `target_nodes_file`

In [2]:
entity_node = read_json(heuristics_files[2]['all'][0], int_key=True)
inactive_nodes = read_json(inactive_nodes_file) # created manually
funding_address_entity = read_json(funding_address_entity_file)
settlement_address_entity = read_json(settlement_address_entity_file)
settlement_txs = read_json(settlement_txs_file)

In [3]:
on_chain_heuristics = {och: (True if och != 'none' else False) for och in on_chain_heuristics_list}
funding_address_entity, settlement_address_entity, = set_mapping(funding_address_entity, settlement_address_entity, on_chain_heuristics)

use stars
use snakes
use collectors
use proxies


In [4]:
# count how many times an entity appears in a settlement tx
e_noccur = dict()  # entity and n occur in settlement txs
for tx in settlement_txs.values():
    for out in tx['vout']:
        a = out['scriptpubkey_address']
        if a in settlement_address_entity:
            e = settlement_address_entity[a]
            if e not in e_noccur:
                e_noccur[e] = 0
            e_noccur[e] += 1

# sort
e_noccur_list = []
for e, n in e_noccur.items():
    e_noccur_list.append([e, n])
e_noccur_list.sort(key=lambda x: -x[1])


In [5]:
# create two types of target nodes: the ones with most settlements and some random ones
n_nodes_to_deanon = 40
nodes_to_deanon = dict()
nodes_to_deanon['most_settlements'] = []
nodes_to_deanon['random'] = []

i = 0

for el in e_noccur_list:
    if i < n_nodes_to_deanon:
        entity, occur = el
        if entity in entity_node:
            nodes = entity_node[entity]
            for node in nodes:
                if node not in inactive_nodes and \
                        node not in [el[0] for el in nodes_to_deanon['most_settlements']]:
                    nodes_to_deanon['most_settlements'].append([node, entity])
                    i += 1

for entity, nodes in entity_node.items():
    for node in nodes:
        if entity > 0 and \
                node not in [el[0] for el in nodes_to_deanon['most_settlements']] and \
                node not in [el[0] for el in nodes_to_deanon['random']] and \
                len(nodes_to_deanon['random']) < n_nodes_to_deanon and \
                node not in inactive_nodes:
            nodes_to_deanon['random'].append([node, entity])

In [6]:
write_json(nodes_to_deanon, target_nodes_file)

# 2. Collecting Ground Truth
In this section, we provide the code to open and close channels to target nodes that are used to route payments from two LN nodes that we control. The actual execution and the adaptation of the commands is left to the reader. In particular, the target nodes selected above need to be joined with their IP addresses in order to be able to open channels to them (you may use `lncli getnodeinfo [node_key]` or the 1ML API to do this).

#### Inputs:
- `target_nodes_file`
- `payment_requests_file`

In [3]:
import os
import json
import time

In [4]:
CLI = 'lncli --lnddir=~/.lnd'
firstprs = 12

# TODO: convert target_nodes to target_nodes_with_IPs

target_nodes_with_IP = 'target_nodes_with_IPs.txt'
payment_requests_file = 'payment_requests.txt'

In [5]:
def walletbalance():
    return json.loads(os.popen(f'{CLI} walletbalance').read())


def listchannels():
    out = os.popen(f'{CLI} listchannels').read()
    try:
        r = json.loads(out)
        return r
    except:
        print(f'Unable to list channels, {out}')


def pendingchannels():
    out = os.popen(f'{CLI} pendingchannels').read()
    try:
        r = json.loads(out)
        return r
    except:
        print(f'Unable to list pending channels, {out}')


def closedchannels():
    out = os.popen(f'{CLI} closedchannels').read()
    try:
        r = json.loads(out)
        return r
    except:
        print(f'Unable to list pending channels, {out}')


def getnodeinfo(node_key):
    out = os.popen(f'{CLI} getnodeinfo {node_key}').read()
    try:
        r = json.loads(out)
        return r
    except:
        print(f'Unable to get node info on {node_key}, {out}')


def closeall():
    out = os.popen(f'{CLI} closeallchannels --force').read()
    try:
        return json.loads(out)
    except:
        print(out)


def sendpayment(chan_id, pay_req):
    out = os.popen(f'{CLI} sendpayment --json --pay_req {pay_req} --outgoing_chan_id {chan_id} --force').read()
    try:
        res = json.loads(out)
        return res
    except:
        print(f'Could not sendpayment {pay_req} using {chan_id}, {out}')


def openchannels(nodes, chan_amt=100_000):
    rs = []
    needed = chan_amt + 4000
    for node in nodes:
        node_key = node.split('@')[0]
        address = node.split('@')[1]
        if not shouldtest(node_key):
            continue
        balance = walletbalance()
        while int(balance['confirmed_balance']) < needed <= int(balance['total_balance']):
            print(f'needed: {needed}, confirmed: {balance["confirmed_balance"]}, total: {balance["total_balance"]}')
            print(f'checking again in {60} seconds')
            time.sleep(60)
            balance = walletbalance()
        if int(balance['total_balance']) < needed:
            print('balance not enough to open more channels. Add more funds or close channels')
            return rs

        out = os.popen(f'{CLI} openchannel --node_key {node_key} --local_amt {chan_amt} --sat_per_byte 10 --connect {address}').read()
        try:
            r = json.loads(out)
            rs.append(r)
        except:
            print(f'could not open channel to {node_key}. Got:\n {out}')
    return rs


def shouldtest(node_key):
    pending = pendingchannels()
    pending_open = [p['channel'] for p in pending['pending_open_channels']]
    pending_closing = [p['channel'] for p in pending['pending_closing_channels']]
    pending_force_closing = [p['channel'] for p in pending['pending_force_closing_channels']]
    waiting_close = [p['channel'] for p in pending['waiting_close_channels']]
    closed = closedchannels()['channels']
    active = listchannels()['channels']
    test = True
    test = test and all([node_key != c['remote_pubkey'] for c in closed])
    test = test and all([node_key != c['remote_pubkey'] for c in active])
    test = test and all([node_key != c['remote_node_pub'] for c in pending_open])
    test = test and all([node_key != c['remote_node_pub'] for c in pending_closing])
    test = test and all([node_key != c['remote_node_pub'] for c in waiting_close])
    test = test and all([node_key != c['remote_node_pub'] for c in pending_force_closing])
    return test


def sendallpayments():
    global firstprs
    pending = pendingchannels()
    pending_open = [p['channel'] for p in pending['pending_open_channels']]
    while len(pending_open) != 0:
        print(f'Still {len(pending_open)} pending openings, waiting {60} seconds to check again.')
        time.sleep(60)
        pending = pendingchannels()
        pending_open = [p['channel'] for p in pending['pending_open_channels']]
    with open(payment_requests_file) as file:
        prs = [pr[:-1] for pr in file.readlines()]
    chans = listchannels()['channels']
    chans = [c for c in chans if c['remote_balance'] == '0']
    for i in range(0, len(chans)):
        print(sendpayment(chans[i]['chan_id'], prs[firstprs + i]))
    return firstprs + len(chans)


def parse_addresses(filename=target_nodes_with_IP):
    with open(filename) as f:
        addrs = f.readlines()
    addrs = [':'.join(a.split(':')[:-1]).replace(':', '@', 1) for a in addrs]
    return addrs


def openall(chan_amt=100_000):
    addrs = parse_addresses()
    openchannels(addrs, chan_amt)


def activeafter(node_key, timestamp):
    info = getnodeinfo(node_key)
    if info is None or 'node' not in info or 'last_update' not in info['node']:
        return False
    last_update = info['node']['last_update']
    return last_update > timestamp
