In [3]:
import igraph as ig
import pandas as pd
import os

In [4]:
os.listdir("data/")

['lt_l1rep6rec3m12enhancedConnections.csv', 'lt_existingConnections.csv']

In [11]:
%%time
edges_df = pd.read_csv("data/lt_l1rep6rec3m12enhancedConnections.csv")

CPU times: user 2.3 s, sys: 338 ms, total: 2.64 s
Wall time: 2.96 s


In [12]:
edges_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3456854 entries, 0 to 3456853
Data columns (total 3 columns):
 #   Column  Dtype 
---  ------  ----- 
 0   i       object
 1   j       object
 2   v       int64 
dtypes: int64(1), object(2)
memory usage: 79.1+ MB


In [13]:
gml_filepath = "data/fc_l1rep6rec3m12enhancedConnections.gml"
print(os.path.isfile(gml_filepath))

False


In [14]:
%%time
if os.path.isfile(gml_filepath):
    print("loading existing graph model")
    g = ig.load(gml_filepath)
else:
    g = ig.Graph.DataFrame(edges_df, directed=True, use_vids=False)
    g.save(gml_filepath)

CPU times: user 9.15 s, sys: 1.21 s, total: 10.4 s
Wall time: 12 s


In [15]:
ig.summary(g)

IGRAPH DN-- 44784 3456854 -- 
+ attr: name (v), v (e)


In [67]:
g.degree(["0x0a860752374e8430b7ae704df85b24d6f6b9adf4"], mode="out")

[242]

In [68]:
%%time
mindist = 1 # exclude the input vertex
order = 1
k1 = g.neighborhood(['0x0a860752374e8430b7ae704df85b24d6f6b9adf4'], order=order, mode="out", mindist=mindist)

CPU times: user 283 µs, sys: 614 µs, total: 897 µs
Wall time: 2.44 ms


In [69]:
len(k1[0])

242

In [72]:
%%time
mindist = 2 # exclude k1
order = 2
k2 = g.neighborhood(['0x0a860752374e8430b7ae704df85b24d6f6b9adf4'], order=order, mode="out", mindist=mindist)

CPU times: user 2.3 ms, sys: 18.7 ms, total: 21 ms
Wall time: 42 ms


In [73]:
len(k2[0])

14057

In [74]:
%%time
mindist = 3 # exclude k2
order = 3
k3 = g.neighborhood(['0x0a860752374e8430b7ae704df85b24d6f6b9adf4'], order=order, mode="out", mindist=mindist)

CPU times: user 20.1 ms, sys: 51.4 ms, total: 71.5 ms
Wall time: 87.2 ms


In [75]:
len(k3[0])

19516

In [76]:
%%time
mindist = 4 # exclude k3
order = 4
k4 = g.neighborhood(['0x0a860752374e8430b7ae704df85b24d6f6b9adf4'], order=order, mode="out", mindist=mindist)

CPU times: user 17.3 ms, sys: 10.6 ms, total: 27.9 ms
Wall time: 31.3 ms


In [77]:
len(k4[0])

1018

In [45]:
g.vs.find(name='0x0ee0e0d0d396c2e3ce3a0496c2aa0892db860662').index

664

In [46]:
g.vs.find(name='0x0a860752374e8430b7ae704df85b24d6f6b9adf4').index

5024

In [143]:
g.vs.find(name='0x0a860752374e8430b7ae704df85b24d6f6b9adf4').out_edges()[:5]

[igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 116302, {'v': 24}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 116324, {'v': 1}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 116325, {'v': 1}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 116326, {'v': 1}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 116327, {'v': 13})]

In [149]:
[edge for j in k1[0][:5] for edge in g.vs[j].out_edges() ]

[igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 0, {'v': 7}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 375279, {'v': 1}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 375280, {'v': 14}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 375281, {'v': 1}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 375282, {'v': 14}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 375283, {'v': 2}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 375284, {'v': 1}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 375285, {'v': 6}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 375286, {'v': 1}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 375287, {'v': 1}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 375288, {'v': 7}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 375289, {'v': 1}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 375290, {'v': 1}),
 igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 375291, {'v': 1}),
 igraph.Edge(<igraph.Gr

In [144]:
g.es[116302]

igraph.Edge(<igraph.Graph object at 0x1197bdb40>, 116302, {'v': 24})

In [111]:
def into_ijv_for_edge(graph:ig.Graph, edge:ig.Edge, degree:int):
    return {
        'i': graph.vs[edge.tuple[0]]["name"],
        'j': graph.vs[edge.tuple[1]]["name"],
        'v': edge["v"],
        'k': degree
    }

In [175]:
def get_localtrust_with_degree(graph:ig.Graph, in_addr:str, limit_rows:int, max_degree:int) -> list: 
    lt = []
    mindist_and_order = 1
    limit = limit_rows
    while mindist_and_order <= max_degree:
        neighbors = graph.neighborhood(
            [in_addr], order=mindist_and_order, mode="out", mindist=mindist_and_order
        )
        k_edges = [edge for j in neighbors[0][:limit] for edge in graph.vs[j].out_edges()]
        
        [lt.append(into_ijv_for_edge(graph, e, mindist_and_order)) for e in k_edges]

        limit = limit - len(neighbors[0])
        if limit <= 0:
            break # we have reached limit of neighbors
        mindist_and_order += 1
    # end of while
    return lt

In [155]:
%%time
localtrust = get_localtrust_with_degree(graph=g,
                                        in_addr="0x0a860752374e8430b7ae704df85b24d6f6b9adf4",
                                        limit_rows=10,
                                        max_degree=3)

limit:10 order:1 mindist:1
CPU times: user 17.3 ms, sys: 1.71 ms, total: 19 ms
Wall time: 18.9 ms


In [157]:
len(localtrust)

3527

In [158]:
import random
random.sample(localtrust,5)

[{'i': '0xcd1bae2391651198850b076c2b600d534df53ade',
  'j': '0xb5cc423792f6d431a548cf9cf0884652a8815dc4',
  'v': 1,
  'k': 1},
 {'i': '0xcd1bae2391651198850b076c2b600d534df53ade',
  'j': '0x9377060c5625200074fa956b76c6c0cb58876262',
  'v': 1,
  'k': 1},
 {'i': '0xcd1bae2391651198850b076c2b600d534df53ade',
  'j': '0x3466def688a8057d62a2722f1f73ee6ec77fbab8',
  'v': 1,
  'k': 1},
 {'i': '0xcd1bae2391651198850b076c2b600d534df53ade',
  'j': '0x7f2fe84c1d9d8dec87c8871cfbcbef93abca00dc',
  'v': 7,
  'k': 1},
 {'i': '0xcd1bae2391651198850b076c2b600d534df53ade',
  'j': '0xa5acef63ba1ad6fcace61e8640b039b7f54a23c8',
  'v': 1,
  'k': 1}]

In [166]:
%%time
localtrust = get_localtrust_with_degree(graph=g,
                                        in_addr="0x0a860752374e8430b7ae704df85b24d6f6b9adf4",
                                        limit_rows=100,
                                        max_degree=5)

limit:100 order:1 mindist:1
CPU times: user 195 ms, sys: 18.6 ms, total: 213 ms
Wall time: 228 ms


In [167]:
len(localtrust)

44272

In [164]:
import random
random.sample(localtrust, 5)

[{'i': '0x52c86459268be2a6d46e7b900a6452a02f63e03e',
  'j': '0x57b15d8996a4f9b869a94f7c558b0314da59a530',
  'v': 13,
  'k': 2},
 {'i': '0x330807f36ea25e5a35532b4d17c989aa061a36d3',
  'j': '0x096f5d02777e9385739bd220eb86de8ef545c300',
  'v': 1,
  'k': 2},
 {'i': '0x49b25f8e4503919ec6a7ea664d569041ae0f09e2',
  'j': '0x51b93c6399053cbb33158e4b7305ca489da20ef7',
  'v': 3,
  'k': 2},
 {'i': '0xdf37c3e9bd198a679f72bbac53946b27bad3064f',
  'j': '0xb2aff6af66ed88297557761e7ac406c0385ffe74',
  'v': 1,
  'k': 2},
 {'i': '0x981b849a2857e47927300c32da50b07592cb4c46',
  'j': '0xd2344f892241e3a83c7eca8e586093714be8ca7f',
  'v': 1,
  'k': 2}]

In [168]:
%%time
localtrust = get_localtrust_with_degree(graph=g,
                                        in_addr="0x7ea38f878aec603bbce2376ded83d502bd2d881e",
                                        limit_rows=100,
                                        max_degree=5)

limit:100 order:1 mindist:1
limit:65 order:2 mindist:2
CPU times: user 252 ms, sys: 15.1 ms, total: 267 ms
Wall time: 280 ms


In [169]:
len(localtrust)

59251

In [170]:
len(g.vs)

44784

In [173]:
g.vs[0]["name"]

'0xcd1bae2391651198850b076c2b600d534df53ade'

In [177]:
%%time
from tqdm import tqdm
for v in tqdm(g.vs):
    get_localtrust_with_degree(
        graph=g,
        in_addr=v["name"],
        limit_rows=100,
        max_degree=5
    )

 11%|██████████████▉                                                                                                                        | 4954/44784 [21:45<2:54:55,  3.80it/s]


KeyboardInterrupt: 