This notebook loads some genomic data from a single user from a 23andMe file and stores it on the Nillion platform.

The objective is to benchmark the storage capacity and performance of the Nillion platform for large datasets.

Let's install the necessary libraries.

In [18]:
!pip install numpy
!pip install nada_numpy



Let's load the genomic data from the sample 23andMe file.

In [19]:
import os
import numpy as np
import re

def read_and_process_23andme(file_path, num_lines=None):
    # Check if the file exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist.")

    # Genotype to integer mapping
    genotype_to_int = {
        "AA": 0, "AC": 1, "AG": 2, "AT": 3,
        "CC": 4, "CG": 5, "CT": 6,
        "GG": 7, "GT": 8,
        "TT": 9
    }

    results = []

    with open(file_path, 'r', encoding='utf-8') as file:
        lines_read = 0
        for line in file:
            if num_lines is not None and lines_read >= num_lines:
                break

            if line.startswith('#'):
                continue

            parts = line.strip().split('\t')
            if len(parts) < 4:
                continue

            rsid, chromosome, position, genotype = parts

            # Extract only the numbers from rsid
            rsid_int = int(re.sub(r'\D', '', rsid))

            genotype_int = genotype_to_int.get(genotype, -1)  # Use -1 for unrecognized genotypes

            # Convert chromosome to int if possible, otherwise use -1
            try:
                chrom_int = int(chromosome)
            except ValueError:
                chrom_int = -1  # Use -1 for non-numeric chromosomes (e.g., 'X', 'Y', 'MT')

            results.append([rsid_int, chrom_int, int(position), genotype_int])

            lines_read += 1

    return np.array(results, dtype=int)

In [26]:
gene_data = read_and_process_23andme('testdata/hu278AF5_20210124151934.txt', 5000)
gene_data[:5000, :]

array([[548049170,         1,     69869,         9],
       [  9283150,         1,    565508,         0],
       [116587930,         1,    727841,         7],
       ...,
       [143461987,         1,  19099907,         0],
       [  6603908,         1,  19100057,         6],
       [  4920344,         1,  19104381,         2]])

In [25]:
gene_data.shape

(5000, 4)

Let's do some Nillon setup.

In [27]:
import os
import py_nillion_client as nillion
from py_nillion_client import NodeKey, UserKey
from dotenv import load_dotenv
from nillion_python_helpers import get_quote_and_pay, create_nillion_client, create_payments_config
from cosmpy.aerial.client import LedgerClient
from cosmpy.aerial.wallet import LocalWallet
from cosmpy.crypto.keypairs import PrivateKey
import uuid

home_dir = os.path.expanduser("~")
env_path = os.path.join(home_dir, ".config", "nillion", "nillion-devnet.env")

print(f"Loading environment variables from {env_path}")

load_dotenv(env_path)
for key, value in os.environ.items():
    if key.startswith("NILLION_"):
        print(f"{key}: {value}")

cluster_id = os.getenv('NILLION_CLUSTER_ID')
chain_id = os.getenv('NILLION_NILCHAIN_CHAIN_ID')
grpc_endpoint = os.getenv('NILLION_NILCHAIN_GRPC')

monadic_seed = "monadic_seed"
monadic_userkey = UserKey.from_seed(monadic_seed)
monadic_client = create_nillion_client(monadic_userkey, NodeKey.from_seed(uuid.uuid4().hex))
monadic_party_id = monadic_client.party_id
monadic_user_id = monadic_client.user_id

payments_config = create_payments_config(chain_id, grpc_endpoint)
payments_client = LedgerClient(payments_config)
payments_wallet = LocalWallet(
    PrivateKey(bytes.fromhex(os.getenv("NILLION_NILCHAIN_PRIVATE_KEY_0"))),
    prefix="nillion",
)

Loading environment variables from /home/amardeep/.config/nillion/nillion-devnet.env
NILLION_CLUSTER_ID: 9e68173f-9c23-4acc-ba81-4f079b639964
NILLION_BOOTNODE_MULTIADDRESS: /ip4/127.0.0.1/tcp/37939/p2p/12D3KooWMvw1hEqm7EWSDEyqTb6pNetUVkepahKY6hixuAuMZfJS
NILLION_BOOTNODE_WEBSOCKET: /ip4/127.0.0.1/tcp/54936/ws/p2p/12D3KooWMvw1hEqm7EWSDEyqTb6pNetUVkepahKY6hixuAuMZfJS
NILLION_NILCHAIN_CHAIN_ID: nillion-chain-devnet
NILLION_NILCHAIN_JSON_RPC: http://127.0.0.1:48102
NILLION_NILCHAIN_REST_API: http://localhost:26650
NILLION_NILCHAIN_GRPC: localhost:26649
NILLION_NILCHAIN_PRIVATE_KEY_0: 9a975f567428d054f2bf3092812e6c42f901ce07d9711bc77ee2cd81101f42c5
NILLION_NILCHAIN_PRIVATE_KEY_1: 1e491133b9408b39572a29f91644873decea554224b20e2b0b923aeb860a1c18
NILLION_NILCHAIN_PRIVATE_KEY_2: 980488572f235316cdb330191f8bafe4e635efbe88b3a40f5bee9bd21047c059
NILLION_NILCHAIN_PRIVATE_KEY_3: 612bb5173dc60d9e91404fcc0d1f1847fb4459a7d5160d63d84e91aacbf2ab2f
NILLION_NILCHAIN_PRIVATE_KEY_4: 04f5a984eeea9dce4e5e907da

Adapting a utility function from https://github.com/NillionNetwork/nada-numpy/blob/8dbf08c0ee840cab8b6103ef1d7dc5dcf438d442/examples/common/utils.py#L158. 

In [28]:
from typing import Any
import nada_numpy.client as na_client

async def store_secret_array(
        client: nillion.NillionClient,
        payments_wallet: LocalWallet,
        payments_client: LedgerClient,
        cluster_id: str,
        secret_array: np.ndarray,
        secret_name: str,
        nada_type: Any,
        ttl_days: int = 1,
        permissions: nillion.Permissions = None,
):

    # Create a secret
    stored_secret = nillion.NadaValues(
        na_client.array(secret_array, secret_name, nada_type)
    )

    # Get cost quote, then pay for operation to store the secret
    receipt_store = await get_quote_and_pay(
        client,
        nillion.Operation.store_values(stored_secret, ttl_days=ttl_days),
        payments_wallet,
        payments_client,
        cluster_id,
    )

    # Store a secret, passing in the receipt that shows proof of payment
    store_id = await client.store_values(
        cluster_id, stored_secret, permissions, receipt_store
    )
    return store_id

And now..the moment of truth!

In [29]:
permissions = nillion.Permissions.default_for_user(monadic_user_id)

store_id = await store_secret_array(
    monadic_client,
    payments_wallet,
    payments_client,
    cluster_id,
    gene_data,
    "gene_data",
    nillion.SecretInteger,
    1,
    permissions,
)

Getting quote for operation...
Quote cost is 1920002 unil
Submitting payment receipt 1920002 unil, tx hash A84327D511AD28A469259FDFB9B09A80029D7E839E6DC86A335276B77C510EC7


Here are the results so far:

* 1 record - 2s 545ms
* 10 records - 2s 212ms
* 100 records - 3s 5ms
* 1000 records - 3s 104ms

We now need to break the 10,000 record barrier!

In [31]:
gene_data[gene_data[:, 0] == 548049170]

array([[548049170,         1,     69869,         9]])

In [32]:
target_snp_1 = 548049170
target_genotype_1 = 9

rows_of_interest = gene_data[gene_data[:, 0] == target_snp_1]
is_successful = rows_of_interest[0, 3] == target_genotype_1
print(rows_of_interest)
print(is_successful)

[[548049170         1     69869         9]]
True


Now let's do it using a Nada program.

In [33]:
import uuid
def gen_node_key():
    return NodeKey.from_seed(uuid.uuid4().hex)

In [34]:
patient_seed = "patient_seed"
patient_userkey = UserKey.from_seed(patient_seed)
patient_client = create_nillion_client(patient_userkey, gen_node_key())
patient_party_id = patient_client.party_id
patient_user_id = patient_client.user_id


In [35]:
program_name = "snp-test"
program_mir_path = f"binaries/snp-test.nada.bin"

In [36]:
# Function to store the program
import os

os.environ['RUST_BACKTRACE'] = '1'

print("The patient is storing the program on the network")

async def store_program(client, program_name, program_mir_path):
    print(f"Storing the program '{program_name}' on the network")
    receipt_store_program = await get_quote_and_pay(
        patient_client,
        nillion.Operation.store_program(program_mir_path),
        payments_wallet,
        payments_client,
        cluster_id,
    )
    action_id = await patient_client.store_program(
        cluster_id, program_name, program_mir_path, receipt_store_program
    )
    return action_id

The patient is storing the program on the network


In [37]:
# Function to store secret data
async def store_secret(client, user_id, program_name, input_snp, input_genotype):
    print("Storing the secret SNP and genotype data on the network")
    program_id = f"{patient_user_id}/{program_name}"
    new_secret = nillion.NadaValues({
        "input_snp": nillion.SecretInteger(input_snp),
        "input_genotype": nillion.SecretInteger(input_genotype),
    })
    permissions = nillion.Permissions.default_for_user(patient_user_id)
    permissions.add_compute_permissions({user_id: {program_id}})
    receipt_store = await get_quote_and_pay(
        patient_client,
        nillion.Operation.store_values(new_secret, ttl_days=5),
        payments_wallet,
        payments_client,
        cluster_id,
    )
    store_id = await patient_client.store_values(
        cluster_id, new_secret, permissions, receipt_store
    )
    return store_id

In [38]:
# Function to compute on secret data
async def compute_on_secret(client, user_id, program_name, party_id, store_id):
    print("Running the SNP matching program on the secret data")
    program_id = f"{patient_user_id}/{program_name}"
    party_name = "Party1"
    compute_bindings = nillion.ProgramBindings(program_id)
    compute_bindings.add_input_party(party_name, patient_party_id)
    compute_bindings.add_output_party(party_name, patient_party_id)
    computation_time_secrets = nillion.NadaValues({})
    receipt_compute = await get_quote_and_pay(
        patient_client,
        nillion.Operation.compute(program_id, computation_time_secrets),
        payments_wallet,
        payments_client,
        cluster_id,
    )
    compute_id = await client.compute(
        cluster_id,
        compute_bindings,
        [store_id],
        computation_time_secrets,
        receipt_compute,
    )
    return compute_id

In [39]:
async def main():
    # Store the program
    action_id = await store_program(patient_client, program_name, program_mir_path)
    print(f"Program stored. Action ID: {action_id}")

    # Store the secret data (example values)
    input_snp = 13  # rs548049170
    input_genotype = 9  # TT genotype
    store_id = await store_secret(patient_client, patient_user_id, program_name, input_snp, input_genotype)
    print(f"Secret data stored. Store ID: {store_id}")

    # Compute on the secret
    compute_id = await compute_on_secret(patient_client, patient_user_id, program_name, patient_party_id, store_id)
    print(f"Computation sent to the network. Compute ID: {compute_id}")

    # Wait for and print the result
    while True:
        compute_event = await patient_client.next_compute_event()
        if isinstance(compute_event, nillion.ComputeFinishedEvent):
            print(f"✅ Compute complete for compute_id {compute_event.uuid}")
            print(f"🖥️ The result is {compute_event.result.value}")
            break


In [40]:
await main()

Storing the program 'snp-test' on the network
Getting quote for operation...
Quote cost is 2 unil
Submitting payment receipt 2 unil, tx hash 725F5EEE13B51B5AA8151B81332A33D3640B8D0268259A7C0913AD3A7CF3A1F6
Program stored. Action ID: 229XQCZoSQk9zDx4ixKXg8wAqCy7HnmUjcTuMe7D1bUVhNG7kYNF4xeZYjRsEtJUhKFvXwfM65ixFWDRhptfHCqw/snp-test
Storing the secret SNP and genotype data on the network
Getting quote for operation...
Quote cost is 962 unil
Submitting payment receipt 962 unil, tx hash D5A4FEFCA08120A2B67BFD82211D43E779A2517074C65C4E9491CAF528ECBF00
Secret data stored. Store ID: e2287b2d-6f02-4fd7-a841-1a81caca825c
Running the SNP matching program on the secret data
Getting quote for operation...
Quote cost is 19 unil
Submitting payment receipt 19 unil, tx hash 5255444B40ED906455BADD3E2D43BBB857506E81FD99D7FD46FB3FB3FFD804C4
Computation sent to the network. Compute ID: 6501007f-9ad8-4616-8dfb-da8b7ef0ddc3
✅ Compute complete for compute_id 6501007f-9ad8-4616-8dfb-da8b7ef0ddc3
🖥️ The result i