## Import and cleaning

In [1]:
import pandas as pd
import json
import re
import pyomo.environ as pyo


with open("../data/network_graph_2024_06_12.json") as f:
    d = json.load(f)

nodes = pd.DataFrame(d["nodes"])
channels = pd.DataFrame(d["edges"])

## Channels

In [2]:
## Change data types
channels["capacity"] = channels["capacity"].astype(int)
channels["last_update"] = pd.to_datetime(channels["last_update"], unit = 's')

## Filter out channels that are unused (no update time)
channels = channels[channels["last_update"] > "1970-01-01"]

## Filter out channels with nodes with no policy registered for this describegraph query (aka not reachable now)
channels = channels[pd.notnull(channels["node1_policy"]) & pd.notnull(channels["node2_policy"])]

channels = channels.filter(items=['channel_id',
                       'node1_pub',
                       'node2_pub',
                       'capacity',
                       'node1_policy',
                       'node2_policy'
                       ])


From these data we only need information that are strictly related to path finding for channels, thus we only need:

- channel peers
- channel id
- capacity
- nodes policy:
    - fee base msat
    - fee rate milli msat

In [3]:
channels.iloc[0]

channel_id                                     627185621808578560
node1_pub       027d7f94667974b10d3e8330de403111229669273dc902...
node2_pub       03abf6f44c355dec0d5aa155bdbdd6e0c8fefe318eff40...
capacity                                                  3000000
node1_policy    {'time_lock_delta': 144, 'min_htlc': '1000', '...
node2_policy    {'time_lock_delta': 30, 'min_htlc': '1000', 'f...
Name: 4650, dtype: object

In [4]:
channels['node1_fee_base_msat'] = channels['node1_policy'].apply(lambda x: x['fee_base_msat'])
channels['node2_fee_base_msat'] = channels['node2_policy'].apply(lambda x: x['fee_base_msat'])

channels['node1_fee_rate_milli_msat'] = channels['node1_policy'].apply(lambda x: x['fee_rate_milli_msat'])
channels['node2_fee_rate_milli_msat'] = channels['node2_policy'].apply(lambda x: x['fee_rate_milli_msat'])

channels["node1_fee_base_msat"] = channels["node1_fee_base_msat"].astype(int)
channels["node2_fee_base_msat"] = channels["node2_fee_base_msat"].astype(int)

channels["node1_fee_rate_milli_msat"] = channels["node1_fee_rate_milli_msat"].astype(int)
channels["node2_fee_rate_milli_msat"] = channels["node2_fee_rate_milli_msat"].astype(int)


channels = channels.filter(items=['channel_id',
                                  'node1_pub',
                                  'node2_pub',
                                  'capacity',
                                  'node1_fee_base_msat',
                                  'node1_fee_rate_milli_msat',
                                  'node2_fee_base_msat',
                                  'node2_fee_rate_milli_msat'
                                  ])

## Nodes

Set feature to flag the presence of onion address and the presence of clearnet address.
Feature is like the Linux permissions:
- 1 for clearnet
- 2 for onion
- 3 for both onion and clearnet

In [6]:
nodes['addresses'] = nodes.iloc[:, 3].apply(lambda x: [i['addr'] for i in x])

In [7]:
def allocate_code(addresses):
    """
    :param addresses: list of strings with Ip or onion addresses
    :return: score for the kind of addresses used by the node.
             1 of only onion, 2 if only clearnet, 3 if both onion and clearnet
             The score is independent from the number of addresses of each kind
    """
    code = []
    onion_pattern = re.compile(r".*\.onion")
    for element in addresses:
        if onion_pattern.match(element):
            code.append(1)
        else:
            code.append(2)
    return sum(set(code))


nodes["addresses"] = nodes["addresses"].apply(allocate_code)

Remove nodes that were not updated (aka with no last update time data)

In [11]:
nodes["last_update"] = pd.to_datetime(nodes["last_update"], unit = 's')
nodes = nodes[nodes["last_update"] > "1970-01-01"]

Analyse the "features" column

- https://github.com/lightning/bolts/blob/master/09-features.md
- https://github.com/lightningnetwork/lnd/blob/master/lnrpc/lightning.proto
- https://github.com/lightningnetwork/lnd/blob/master/lnrpc/lightning_grpc.pb.go

In [18]:
nodes.loc[0:0, "features"]

0    {'0': {'name': 'data-loss-protect', 'is_requir...
Name: features, dtype: object