In [13]:
import numpy as np
import os

import torch
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from torch import nn
import torch.nn.functional as F
import torch.utils.data as data_utils
from tqdm import tqdm
import argparse
import pandas as pd
from numpy import dot
from numpy.linalg import norm

In [14]:
# parser = argparse.ArgumentParser("dean_ens")
# parser.add_argument("--input_dir", type=str, default="../outputs/bert4eth_filter_epoch_50", help="the input directory of address and embedding list")
# parser.add_argument("--metric", type=str, default="euclidean")
# args = parser.parse_args()

In [15]:
def generate_pairs(ens_pairs, min_cnt=2, max_cnt=2, mirror=True):
    """
    Generate testing pairs based on ENS name
    :param ens_pairs:
    :param min_cnt:
    :param max_cnt:
    :param mirror:
    :return:
    """
    pairs = ens_pairs.copy()
    ens_counts = pairs["name"].value_counts()
    address_pairs = []
    all_ens_names = []
    ename2addresses = {}
    for idx, row in pairs.iterrows():
        try:
            ename2addresses[row["name"]].append(row["address"]) # note: cannot use row.name
        except:
            ename2addresses[row["name"]] = [row["address"]]
    for cnt in range(min_cnt, max_cnt + 1):
        ens_names = list(ens_counts[ens_counts == cnt].index)
        all_ens_names += ens_names
        # convert to indices
        for ename in ens_names:
            addrs = ename2addresses[ename]
            for i in range(len(addrs)):
                for j in range(i + 1, len(addrs)):
                    addr1, addr2 = addrs[i], addrs[j]
                    address_pairs.append([addr1, addr2])
                    if mirror:
                        address_pairs.append([addr2, addr1])
    return address_pairs, all_ens_names

In [16]:
ens_pairs = pd.read_csv("../data/dean_all_ens_pairs.csv")
max_ens_per_address = 1
num_ens_for_addr = ens_pairs.groupby("address")["name"].nunique().sort_values(ascending=False).reset_index()
excluded = list(num_ens_for_addr[num_ens_for_addr["name"] > max_ens_per_address]["address"])
ens_pairs = ens_pairs[~ens_pairs["address"].isin(excluded)]
address_pairs, all_ens_names = generate_pairs(ens_pairs, max_cnt=2)

In [17]:
def load_embedding():
    address_input_dir = "../outputs/bert4eth_filter_epoch_50/address.npy"
    embed_input_dir = "../outputs/bert4eth_filter_epoch_50/embedding.npy"
    address_for_embedding = np.load(address_input_dir)
    embeddings = np.load(embed_input_dir)
    
    # group by embedding according to address
    address_to_embedding = {}
    
    for i in range(len(address_for_embedding)):
        address = address_for_embedding[i]
        embedding = embeddings[i]
        try:
            address_to_embedding[address].append(embedding)
        except:
            address_to_embedding[address] = [embedding]
    
    # group to one
    address_list = []
    embedding_list = []
    
    for addr, embeds in address_to_embedding.items():
        address_list.append(addr)
        if len(embeds) > 1:
            embedding_list.append(np.mean(embeds, axis=0))
        else:
            embedding_list.append(embeds[0])

    # final embedding table
    X = np.array(np.squeeze(embedding_list))

    return X, address_list

In [18]:
X, address_list = load_embedding()
address_list

['0xb7de073f5421450d7b35def967f1de0da379e45b',
 '0x5b589e4a49a9363b51c8d4194872f4661d3c98c5',
 '0x23f5a6d807f017b5e115b3029f84b4f389b45fd2',
 '0x4495530a38b6254ae9c990da830a5e035e2733f5',
 '0x7045d06b0ae31d5225ff0dc0ce2d75035a505fcd',
 '0xed06f8eba1eae2271f84deaf55a5e2249af5a1a8',
 '0xe4436322b545b9d1aa595b771dd1cfc42980c4cf',
 '0xea795e5453b31ff7fac93a3d571651b65da8732f',
 '0xd964fffd7f1ad0f5674734b0ea0f8ae6f4bb57e7',
 '0x6186290b28d511bff971631c916244a9fc539cfe',
 '0x4a4fd264d56cc07a1e9874c1cb175a13cd025cd0',
 '0x544e948d47b48ffb92d4323ee053a061b48faf85',
 '0x723ad624e45a95b8db07d72c83f9f20f00d8c542',
 '0x58168afd85d9b656665558a42f4dd17291ca34eb',
 '0x6313df8455bada5cb66efcd99c3cc90ea7bd81ca',
 '0x8e73e54466212fbbb2565a380de42cde01b45060',
 '0x323a4b00bd558cf2838a2e141e9304e45650b129',
 '0x1802b38e20813b12233bb7510ac1fc46ecaa7d89',
 '0xb9b24876e239375e51a2aabdccccd5a650dac735',
 '0xfd0a88dc95942f17d7ab999916c908e07057e746',
 '0xdd54c54fc5c11e8357ab9ca853aff06716677fdc',
 '0x1014fb4ba

In [19]:
def euclidean_dist(a, b):
    return np.sqrt(np.sum(np.square(a-b)))

In [20]:
# map address to int
cnt = 0
address_to_idx = {}
idx_to_address = {}
for address in address_list:
    address_to_idx[address] = cnt
    idx_to_address[cnt] = address
    cnt += 1

idx_pairs = []
failed_address = []
for pair in address_pairs:
    try:
        idx_pairs.append([address_to_idx[pair[0]], address_to_idx[pair[1]]])
    except:
        failed_address.append(pair[0])
        failed_address.append(pair[1])
        continue

# calculate Euclidean distance for ground-truth pairs
ground_truth_euclidean_distance = []
for pair in idx_pairs:
    src_id = pair[0]
    dst_id = pair[1]
    src_embedding = X[src_id]
    dst_embedding = X[dst_id]

    ground_truth_euclidean_distance.append(euclidean_dist(src_embedding, dst_embedding))

In [21]:
def cosine_dist_multi(a, b):
    num = dot(a, b.T)
    denom = norm(a) * norm(b, axis=1)
    res = num/denom
    return -1 * res

def euclidean_dist_multi(a, b):
    return np.sqrt(np.sum(np.square(b-a), axis=1))

In [22]:
def get_neighbors(X, idx, metric="cosine" ,include_idx_mask=[]):
    a = X[idx, :]
    indices = list(range(X.shape[0]))
    if metric == "cosine":
        # dist = np.array([cosine_dist(a, X[i, :]) for i in indices])
        dist = cosine_dist_multi(a, X)
    elif metric == "euclidean":
        dist = euclidean_dist_multi(a, X)
    else:
        raise ValueError("Distance Metric Error")
    sorted_df = pd.DataFrame(list(zip(indices, dist)), columns=["idx", "dist"]).sort_values("dist")
    sorted_df = sorted_df.drop(index=idx) # exclude self distance
    indices = list(sorted_df["idx"])
    distances = list(sorted_df["dist"])

    if len(include_idx_mask) > 0:
        # filter indices
        indices_tmp = []
        distances_tmp = []
        for i, res_idx in enumerate(indices):
            if res_idx in include_idx_mask:
                indices_tmp.append(res_idx)
                distances_tmp.append(distances[i])
        indices = indices_tmp
        distances = distances_tmp
    return indices, distances

In [23]:
def get_rank(X, query_idx, target_idx, metric, include_idx_mask=[]):
    indices, distances = get_neighbors(X, query_idx, metric, include_idx_mask)
    if len(indices) > 0 and target_idx in indices:
        trg_idx = indices.index(target_idx)
        return trg_idx+1, distances[trg_idx], len(indices)
    else:
        return None, None, len(indices)

In [24]:
print("pause")

pbar = tqdm(total=len(idx_pairs))
records = []
for pair in idx_pairs:
    rank, dist, num_set = get_rank(X, pair[1], pair[0], "euclidean")
    records.append((pair[1], pair[0], rank, dist, num_set, "none"))
    print(rank)
    pbar.update(1)

pause



  0%|          | 0/284 [00:18<?, ?it/s][A

  0%|          | 1/284 [00:00<01:55,  2.44it/s][A

1321



  1%|          | 2/284 [00:00<01:49,  2.59it/s][A

2187



  1%|          | 3/284 [00:01<01:47,  2.63it/s][A

153



  1%|▏         | 4/284 [00:01<01:45,  2.64it/s][A

30



  2%|▏         | 5/284 [00:01<01:45,  2.65it/s][A

3722



  2%|▏         | 6/284 [00:02<01:44,  2.67it/s][A

9868



  2%|▏         | 7/284 [00:02<01:44,  2.66it/s][A

95646



  3%|▎         | 8/284 [00:03<01:43,  2.67it/s][A

17620



  3%|▎         | 9/284 [00:03<01:42,  2.68it/s][A

214



  4%|▎         | 10/284 [00:03<01:42,  2.66it/s][A

74



  4%|▍         | 11/284 [00:04<01:43,  2.65it/s][A

639



  4%|▍         | 12/284 [00:04<01:42,  2.66it/s][A

812



  5%|▍         | 13/284 [00:04<01:42,  2.65it/s][A

203041



  5%|▍         | 14/284 [00:05<01:42,  2.63it/s][A

13332



  5%|▌         | 15/284 [00:05<01:42,  2.62it/s][A

878



  6%|▌         | 16/284 [00:06<01:42,  2.62it/s][A

48584



  6%|▌         | 17/284 [00:06<01:40,  2.65it/s][A

1179



  6%|▋         | 18/284 [00:06<01:39,  2.66it/s][A

3257



  7%|▋         | 19/284 [00:07<01:39,  2.66it/s][A

73956



  7%|▋         | 20/284 [00:07<01:39,  2.66it/s][A

321499



  7%|▋         | 21/284 [00:07<01:38,  2.66it/s][A

38679



  8%|▊         | 22/284 [00:08<01:38,  2.67it/s][A

4975



  8%|▊         | 23/284 [00:08<01:36,  2.71it/s][A

480



  8%|▊         | 24/284 [00:09<01:35,  2.71it/s][A

1604



  9%|▉         | 25/284 [00:09<01:35,  2.71it/s][A

16268



  9%|▉         | 26/284 [00:09<01:35,  2.71it/s][A

34317



 10%|▉         | 27/284 [00:10<01:34,  2.72it/s][A

7337



 10%|▉         | 28/284 [00:10<01:34,  2.71it/s][A

115744



 10%|█         | 29/284 [00:10<01:36,  2.64it/s][A

344



 11%|█         | 30/284 [00:11<01:35,  2.66it/s][A

330



 11%|█         | 31/284 [00:11<01:34,  2.68it/s][A

2



 11%|█▏        | 32/284 [00:12<01:34,  2.68it/s][A

2



 12%|█▏        | 33/284 [00:12<01:33,  2.70it/s][A

1027



 12%|█▏        | 34/284 [00:12<01:31,  2.73it/s][A

846



 12%|█▏        | 35/284 [00:13<01:30,  2.75it/s][A

11066



 13%|█▎        | 36/284 [00:13<01:30,  2.75it/s][A

244112



 13%|█▎        | 37/284 [00:13<01:29,  2.76it/s][A

270



 13%|█▎        | 38/284 [00:14<01:28,  2.77it/s][A

279



 14%|█▎        | 39/284 [00:14<01:28,  2.77it/s][A

368



 14%|█▍        | 40/284 [00:14<01:28,  2.77it/s][A

202



 14%|█▍        | 41/284 [00:15<01:28,  2.76it/s][A

349



 15%|█▍        | 42/284 [00:15<01:27,  2.76it/s][A

455



 15%|█▌        | 43/284 [00:15<01:28,  2.73it/s][A

134325



 15%|█▌        | 44/284 [00:16<01:27,  2.73it/s][A

1593



 16%|█▌        | 45/284 [00:16<01:27,  2.74it/s][A

216



 16%|█▌        | 46/284 [00:17<01:26,  2.74it/s][A

294



 17%|█▋        | 47/284 [00:17<01:27,  2.72it/s][A

81



 17%|█▋        | 48/284 [00:17<01:26,  2.72it/s][A

1074



 17%|█▋        | 49/284 [00:18<01:26,  2.72it/s][A

97090



 18%|█▊        | 50/284 [00:18<01:25,  2.73it/s][A

5513



 18%|█▊        | 51/284 [00:18<01:26,  2.70it/s][A

1482



 18%|█▊        | 52/284 [00:19<01:26,  2.68it/s][A

1312



 19%|█▊        | 53/284 [00:19<01:26,  2.66it/s][A

85



 19%|█▉        | 54/284 [00:20<01:26,  2.65it/s][A

1512



 19%|█▉        | 55/284 [00:20<01:26,  2.64it/s][A

13587



 20%|█▉        | 56/284 [00:20<01:26,  2.65it/s][A

196108



 20%|██        | 57/284 [00:21<01:25,  2.67it/s][A

12387



 20%|██        | 58/284 [00:21<01:23,  2.70it/s][A

2332



 21%|██        | 59/284 [00:21<01:22,  2.72it/s][A

4411



 21%|██        | 60/284 [00:22<01:22,  2.72it/s][A

150603



 21%|██▏       | 61/284 [00:22<01:21,  2.75it/s][A

164



 22%|██▏       | 62/284 [00:23<01:20,  2.76it/s][A

262



 22%|██▏       | 63/284 [00:23<01:20,  2.76it/s][A

14257



 23%|██▎       | 64/284 [00:23<01:19,  2.76it/s][A

2625



 23%|██▎       | 65/284 [00:24<01:19,  2.77it/s][A

121



 23%|██▎       | 66/284 [00:24<01:18,  2.77it/s][A

149



 24%|██▎       | 67/284 [00:24<01:18,  2.76it/s][A

148882



 24%|██▍       | 68/284 [00:25<01:17,  2.77it/s][A

46940



 24%|██▍       | 69/284 [00:25<01:17,  2.78it/s][A

937



 25%|██▍       | 70/284 [00:25<01:17,  2.78it/s][A

1303



 25%|██▌       | 71/284 [00:26<01:16,  2.78it/s][A

7775



 25%|██▌       | 72/284 [00:26<01:15,  2.79it/s][A

5128



 26%|██▌       | 73/284 [00:26<01:16,  2.77it/s][A

17144



 26%|██▌       | 74/284 [00:27<01:15,  2.78it/s][A

74899



 26%|██▋       | 75/284 [00:27<01:15,  2.78it/s][A

12779



 27%|██▋       | 76/284 [00:28<01:15,  2.76it/s][A

1158



 27%|██▋       | 77/284 [00:28<01:15,  2.74it/s][A

34867



 27%|██▋       | 78/284 [00:28<01:15,  2.72it/s][A

52870



 28%|██▊       | 79/284 [00:29<01:15,  2.70it/s][A

3782



 28%|██▊       | 80/284 [00:29<01:14,  2.73it/s][A

5669



 29%|██▊       | 81/284 [00:29<01:14,  2.73it/s][A

349704



 29%|██▉       | 82/284 [00:30<01:13,  2.74it/s][A

356169



 29%|██▉       | 83/284 [00:30<01:13,  2.75it/s][A

2302



 30%|██▉       | 84/284 [00:30<01:12,  2.75it/s][A

5970



 30%|██▉       | 85/284 [00:31<01:12,  2.76it/s][A

9229



 30%|███       | 86/284 [00:31<01:12,  2.75it/s][A

5070



 31%|███       | 87/284 [00:32<01:11,  2.75it/s][A

301803



 31%|███       | 88/284 [00:32<01:11,  2.75it/s][A

142935



 31%|███▏      | 89/284 [00:32<01:10,  2.75it/s][A

122



 32%|███▏      | 90/284 [00:33<01:10,  2.75it/s][A

88



 32%|███▏      | 91/284 [00:33<01:09,  2.76it/s][A

159



 32%|███▏      | 92/284 [00:33<01:09,  2.78it/s][A

200



 33%|███▎      | 93/284 [00:34<01:08,  2.78it/s][A

227



 33%|███▎      | 94/284 [00:34<01:08,  2.77it/s][A

5958



 33%|███▎      | 95/284 [00:34<01:08,  2.77it/s][A

11503



 34%|███▍      | 96/284 [00:35<01:08,  2.75it/s][A

3682



 34%|███▍      | 97/284 [00:35<01:07,  2.75it/s][A

39429



 35%|███▍      | 98/284 [00:36<01:07,  2.76it/s][A

4858



 35%|███▍      | 99/284 [00:36<01:07,  2.75it/s][A

3878



 35%|███▌      | 100/284 [00:36<01:06,  2.76it/s][A

17512



 36%|███▌      | 101/284 [00:37<01:06,  2.75it/s][A

10872



 36%|███▌      | 102/284 [00:37<01:06,  2.75it/s][A

25103



 36%|███▋      | 103/284 [00:37<01:05,  2.76it/s][A

52769



 37%|███▋      | 104/284 [00:38<01:05,  2.76it/s][A

2758



 37%|███▋      | 105/284 [00:38<01:04,  2.76it/s][A

10643



 37%|███▋      | 106/284 [00:38<01:04,  2.75it/s][A

12361



 38%|███▊      | 107/284 [00:39<01:04,  2.74it/s][A

1681



 38%|███▊      | 108/284 [00:39<01:04,  2.74it/s][A

1580



 38%|███▊      | 109/284 [00:40<01:04,  2.73it/s][A

1111



 39%|███▊      | 110/284 [00:40<01:03,  2.72it/s][A

769



 39%|███▉      | 111/284 [00:40<01:04,  2.69it/s][A

136027



 39%|███▉      | 112/284 [00:41<01:03,  2.69it/s][A

2934



 40%|███▉      | 113/284 [00:41<01:03,  2.68it/s][A

432



 40%|████      | 114/284 [00:41<01:04,  2.65it/s][A

429



 40%|████      | 115/284 [00:42<01:03,  2.64it/s][A

1975



 41%|████      | 116/284 [00:42<01:03,  2.64it/s][A

1895



 41%|████      | 117/284 [00:43<01:02,  2.67it/s][A

312



 42%|████▏     | 118/284 [00:43<01:01,  2.68it/s][A

537



 42%|████▏     | 119/284 [00:43<01:01,  2.67it/s][A

1278



 42%|████▏     | 120/284 [00:44<01:01,  2.67it/s][A

607



 43%|████▎     | 121/284 [00:44<01:00,  2.69it/s][A

5459



 43%|████▎     | 122/284 [00:44<00:59,  2.71it/s][A

861



 43%|████▎     | 123/284 [00:45<00:59,  2.72it/s][A

22602



 44%|████▎     | 124/284 [00:45<00:59,  2.71it/s][A

4743



 44%|████▍     | 125/284 [00:46<00:58,  2.71it/s][A

3534



 44%|████▍     | 126/284 [00:46<00:58,  2.70it/s][A

1453



 45%|████▍     | 127/284 [00:46<00:58,  2.70it/s][A

4



 45%|████▌     | 128/284 [00:47<00:58,  2.67it/s][A

6



 45%|████▌     | 129/284 [00:47<00:57,  2.67it/s][A

39378



 46%|████▌     | 130/284 [00:47<00:57,  2.70it/s][A

3554



 46%|████▌     | 131/284 [00:48<00:57,  2.64it/s][A

2255



 46%|████▋     | 132/284 [00:48<00:57,  2.65it/s][A

4516



 47%|████▋     | 133/284 [00:49<00:57,  2.64it/s][A

2655



 47%|████▋     | 134/284 [00:49<00:56,  2.65it/s][A

9899



 48%|████▊     | 135/284 [00:49<00:55,  2.68it/s][A

439



 48%|████▊     | 136/284 [00:50<00:54,  2.71it/s][A

1439



 48%|████▊     | 137/284 [00:50<00:54,  2.71it/s][A

148212



 49%|████▊     | 138/284 [00:50<00:53,  2.72it/s][A

358513



 49%|████▉     | 139/284 [00:51<00:52,  2.74it/s][A

43



 49%|████▉     | 140/284 [00:51<00:52,  2.74it/s][A

28



 50%|████▉     | 141/284 [00:51<00:52,  2.74it/s][A

14579



 50%|█████     | 142/284 [00:52<00:51,  2.73it/s][A

23978



 50%|█████     | 143/284 [00:52<00:51,  2.73it/s][A

2567



 51%|█████     | 144/284 [00:53<00:51,  2.74it/s][A

3052



 51%|█████     | 145/284 [00:53<00:50,  2.74it/s][A

2426



 51%|█████▏    | 146/284 [00:53<00:50,  2.76it/s][A

188



 52%|█████▏    | 147/284 [00:54<00:49,  2.76it/s][A

21803



 52%|█████▏    | 148/284 [00:54<00:49,  2.76it/s][A

7965



 52%|█████▏    | 149/284 [00:54<00:49,  2.75it/s][A

2627



 53%|█████▎    | 150/284 [00:55<00:48,  2.76it/s][A

1236



 53%|█████▎    | 151/284 [00:55<00:48,  2.75it/s][A

16104



 54%|█████▎    | 152/284 [00:55<00:47,  2.76it/s][A

1694



 54%|█████▍    | 153/284 [00:56<00:47,  2.75it/s][A

271762



 54%|█████▍    | 154/284 [00:56<00:47,  2.76it/s][A

22285



 55%|█████▍    | 155/284 [00:57<00:46,  2.76it/s][A

1830



 55%|█████▍    | 156/284 [00:57<00:46,  2.74it/s][A

14470



 55%|█████▌    | 157/284 [00:57<00:46,  2.74it/s][A

1989



 56%|█████▌    | 158/284 [00:58<00:46,  2.74it/s][A

23020



 56%|█████▌    | 159/284 [00:58<00:45,  2.75it/s][A

237



 56%|█████▋    | 160/284 [00:58<00:44,  2.76it/s][A

99



 57%|█████▋    | 161/284 [00:59<00:44,  2.76it/s][A

40



 57%|█████▋    | 162/284 [00:59<00:44,  2.76it/s][A

22



 57%|█████▋    | 163/284 [00:59<00:44,  2.75it/s][A

1230



 58%|█████▊    | 164/284 [01:00<00:43,  2.73it/s][A

5077



 58%|█████▊    | 165/284 [01:00<00:43,  2.74it/s][A

1



 58%|█████▊    | 166/284 [01:01<00:42,  2.75it/s][A

1



 59%|█████▉    | 167/284 [01:01<00:42,  2.74it/s][A

6321



 59%|█████▉    | 168/284 [01:01<00:42,  2.75it/s][A

4652



 60%|█████▉    | 169/284 [01:02<00:41,  2.76it/s][A

1067



 60%|█████▉    | 170/284 [01:02<00:41,  2.75it/s][A

425



 60%|██████    | 171/284 [01:02<00:40,  2.76it/s][A

6895



 61%|██████    | 172/284 [01:03<00:40,  2.75it/s][A

112965



 61%|██████    | 173/284 [01:03<00:40,  2.76it/s][A

1053



 61%|██████▏   | 174/284 [01:03<00:39,  2.76it/s][A

1260



 62%|██████▏   | 175/284 [01:04<00:39,  2.76it/s][A

19681



 62%|██████▏   | 176/284 [01:04<00:39,  2.77it/s][A

42448



 62%|██████▏   | 177/284 [01:05<00:38,  2.76it/s][A

159667



 63%|██████▎   | 178/284 [01:05<00:38,  2.77it/s][A

61899



 63%|██████▎   | 179/284 [01:05<00:37,  2.77it/s][A

107498



 63%|██████▎   | 180/284 [01:06<00:38,  2.73it/s][A

96647



 64%|██████▎   | 181/284 [01:06<00:37,  2.74it/s][A

15772



 64%|██████▍   | 182/284 [01:06<00:37,  2.75it/s][A

419144



 64%|██████▍   | 183/284 [01:07<00:36,  2.75it/s][A

39002



 65%|██████▍   | 184/284 [01:07<00:36,  2.77it/s][A

14936



 65%|██████▌   | 185/284 [01:07<00:35,  2.77it/s][A

32378



 65%|██████▌   | 186/284 [01:08<00:35,  2.78it/s][A

2008



 66%|██████▌   | 187/284 [01:08<00:34,  2.77it/s][A

4968



 66%|██████▌   | 188/284 [01:09<00:34,  2.77it/s][A

6408



 67%|██████▋   | 189/284 [01:09<00:34,  2.77it/s][A

22612



 67%|██████▋   | 190/284 [01:09<00:34,  2.76it/s][A

4847



 67%|██████▋   | 191/284 [01:10<00:33,  2.76it/s][A

11



 68%|██████▊   | 192/284 [01:10<00:33,  2.77it/s][A

10



 68%|██████▊   | 193/284 [01:10<00:32,  2.76it/s][A

470



 68%|██████▊   | 194/284 [01:11<00:32,  2.76it/s][A

20



 69%|██████▊   | 195/284 [01:11<00:32,  2.76it/s][A

2280



 69%|██████▉   | 196/284 [01:11<00:31,  2.77it/s][A

2342



 69%|██████▉   | 197/284 [01:12<00:31,  2.77it/s][A

482



 70%|██████▉   | 198/284 [01:12<00:31,  2.77it/s][A

592



 70%|███████   | 199/284 [01:13<00:30,  2.75it/s][A

213



 70%|███████   | 200/284 [01:13<00:30,  2.75it/s][A

2324



 71%|███████   | 201/284 [01:13<00:30,  2.76it/s][A

8877



 71%|███████   | 202/284 [01:14<00:29,  2.76it/s][A

9034



 71%|███████▏  | 203/284 [01:14<00:29,  2.78it/s][A

308



 72%|███████▏  | 204/284 [01:14<00:28,  2.78it/s][A

523



 72%|███████▏  | 205/284 [01:15<00:28,  2.77it/s][A

21178



 73%|███████▎  | 206/284 [01:15<00:28,  2.77it/s][A

20961



 73%|███████▎  | 207/284 [01:15<00:27,  2.77it/s][A

17



 73%|███████▎  | 208/284 [01:16<00:27,  2.76it/s][A

55



 74%|███████▎  | 209/284 [01:16<00:27,  2.76it/s][A

14019



 74%|███████▍  | 210/284 [01:16<00:26,  2.76it/s][A

163737



 74%|███████▍  | 211/284 [01:17<00:26,  2.77it/s][A

93145



 75%|███████▍  | 212/284 [01:17<00:26,  2.77it/s][A

102110



 75%|███████▌  | 213/284 [01:18<00:25,  2.77it/s][A

17



 75%|███████▌  | 214/284 [01:18<00:25,  2.77it/s][A

30



 76%|███████▌  | 215/284 [01:18<00:25,  2.75it/s][A

195584



 76%|███████▌  | 216/284 [01:19<00:24,  2.77it/s][A

34037



 76%|███████▋  | 217/284 [01:19<00:24,  2.70it/s][A

3382



 77%|███████▋  | 218/284 [01:19<00:24,  2.70it/s][A

5264



 77%|███████▋  | 219/284 [01:20<00:23,  2.71it/s][A

13976



 77%|███████▋  | 220/284 [01:20<00:23,  2.72it/s][A

8012



 78%|███████▊  | 221/284 [01:21<00:23,  2.71it/s][A

8



 78%|███████▊  | 222/284 [01:21<00:22,  2.72it/s][A

7



 79%|███████▊  | 223/284 [01:21<00:22,  2.73it/s][A

154



 79%|███████▉  | 224/284 [01:22<00:21,  2.74it/s][A

71



 79%|███████▉  | 225/284 [01:22<00:21,  2.74it/s][A

2773



 80%|███████▉  | 226/284 [01:22<00:21,  2.74it/s][A

1579



 80%|███████▉  | 227/284 [01:23<00:20,  2.74it/s][A

187



 80%|████████  | 228/284 [01:23<00:20,  2.74it/s][A

425



 81%|████████  | 229/284 [01:23<00:19,  2.76it/s][A

67482



 81%|████████  | 230/284 [01:24<00:19,  2.76it/s][A

1377



 81%|████████▏ | 231/284 [01:24<00:19,  2.73it/s][A

407774



 82%|████████▏ | 232/284 [01:25<00:18,  2.74it/s][A

166212



 82%|████████▏ | 233/284 [01:25<00:18,  2.75it/s][A

28



 82%|████████▏ | 234/284 [01:25<00:18,  2.74it/s][A

88



 83%|████████▎ | 235/284 [01:26<00:17,  2.74it/s][A

4143



 83%|████████▎ | 236/284 [01:26<00:17,  2.76it/s][A

2662



 83%|████████▎ | 237/284 [01:26<00:17,  2.76it/s][A

24561



 84%|████████▍ | 238/284 [01:27<00:16,  2.77it/s][A

156727



 84%|████████▍ | 239/284 [01:27<00:16,  2.75it/s][A

26551



 85%|████████▍ | 240/284 [01:27<00:15,  2.76it/s][A

17337



 85%|████████▍ | 241/284 [01:28<00:15,  2.76it/s][A

29



 85%|████████▌ | 242/284 [01:28<00:15,  2.76it/s][A

17



 86%|████████▌ | 243/284 [01:29<00:14,  2.77it/s][A

41559



 86%|████████▌ | 244/284 [01:29<00:14,  2.76it/s][A

210732



 86%|████████▋ | 245/284 [01:29<00:14,  2.76it/s][A

3181



 87%|████████▋ | 246/284 [01:30<00:13,  2.76it/s][A

1697



 87%|████████▋ | 247/284 [01:30<00:13,  2.77it/s][A

51858



 87%|████████▋ | 248/284 [01:30<00:13,  2.76it/s][A

53953



 88%|████████▊ | 249/284 [01:31<00:12,  2.76it/s][A

21628



 88%|████████▊ | 250/284 [01:31<00:12,  2.75it/s][A

147396



 88%|████████▊ | 251/284 [01:31<00:11,  2.77it/s][A

1089



 89%|████████▊ | 252/284 [01:32<00:11,  2.77it/s][A

424



 89%|████████▉ | 253/284 [01:32<00:11,  2.77it/s][A

1284



 89%|████████▉ | 254/284 [01:32<00:10,  2.77it/s][A

1297



 90%|████████▉ | 255/284 [01:33<00:10,  2.77it/s][A

72273



 90%|█████████ | 256/284 [01:33<00:10,  2.77it/s][A

1341



 90%|█████████ | 257/284 [01:34<00:09,  2.77it/s][A

6551



 91%|█████████ | 258/284 [01:34<00:09,  2.76it/s][A

41259



 91%|█████████ | 259/284 [01:34<00:09,  2.76it/s][A

4352



 92%|█████████▏| 260/284 [01:35<00:08,  2.74it/s][A

773



 92%|█████████▏| 261/284 [01:35<00:08,  2.76it/s][A

1936



 92%|█████████▏| 262/284 [01:35<00:07,  2.75it/s][A

56330



 93%|█████████▎| 263/284 [01:36<00:07,  2.74it/s][A

74688



 93%|█████████▎| 264/284 [01:36<00:07,  2.75it/s][A

3813



 93%|█████████▎| 265/284 [01:36<00:06,  2.75it/s][A

219



 94%|█████████▎| 266/284 [01:37<00:06,  2.76it/s][A

160



 94%|█████████▍| 267/284 [01:37<00:06,  2.76it/s][A

5320



 94%|█████████▍| 268/284 [01:38<00:05,  2.75it/s][A

8576



 95%|█████████▍| 269/284 [01:38<00:05,  2.74it/s][A

1130



 95%|█████████▌| 270/284 [01:38<00:05,  2.75it/s][A

1861



 95%|█████████▌| 271/284 [01:39<00:04,  2.74it/s][A

3890



 96%|█████████▌| 272/284 [01:39<00:04,  2.75it/s][A

604



 96%|█████████▌| 273/284 [01:39<00:03,  2.76it/s][A

639



 96%|█████████▋| 274/284 [01:40<00:03,  2.75it/s][A

23



 97%|█████████▋| 275/284 [01:40<00:03,  2.72it/s][A

1571



 97%|█████████▋| 276/284 [01:41<00:02,  2.71it/s][A

19493



 98%|█████████▊| 277/284 [01:41<00:02,  2.70it/s][A

110



 98%|█████████▊| 278/284 [01:41<00:02,  2.70it/s][A

913



 98%|█████████▊| 279/284 [01:42<00:01,  2.70it/s][A

955



 99%|█████████▊| 280/284 [01:42<00:01,  2.69it/s][A

642



 99%|█████████▉| 281/284 [01:42<00:01,  2.69it/s][A

15992



 99%|█████████▉| 282/284 [01:43<00:00,  2.67it/s][A

45805



100%|█████████▉| 283/284 [01:43<00:00,  2.68it/s][A

6593



100%|██████████| 284/284 [01:44<00:00,  2.66it/s][A

8882


In [25]:
result = pd.DataFrame(records, columns=["query_idx", "target_idx", "rank", "dist", "set_size", "filter"])
result["query_addr"] = result["query_idx"].apply(lambda x: idx_to_address[x])
result["target_addr"] = result["target_idx"].apply(lambda x: idx_to_address[x])
result.drop(["query_idx", "target_idx"], axis=1)

result.to_csv("output_file.csv", index=False)