In [1]:
import igraph as ig
import pandas as pd
import os

In [2]:
os.listdir("data/")

['fc_l1rep6rec3m12enhancedConnections_fids.gml',
 'lt_existingConnections_fids.csv',
 'fc_l1rep6rec3m12enhancedConnections.gml',
 'lt_l1rep6rec3m12enhancedConnections.csv',
 'lt_l1rep6rec3m12enhancedConnections_fids.csv',
 'lt_existingConnections.csv']

In [168]:
df_pkl_filepath = "data/fc_l1rep6rec3m12enhancedConnections_df.pkl"
print(os.path.isfile(df_pkl_filepath))

True


In [169]:
%%time
if os.path.isfile(df_pkl_filepath):
    print("loading existing dataframe pickle")
    edges_df = pd.read_pickle(df_pkl_filepath)
else:
    edges_df = pd.read_csv("data/lt_l1rep6rec3m12enhancedConnections.csv")
    edges_df.to_pickle(df_pkl_filepath)

loading existing dataframe pickle
CPU times: user 309 ms, sys: 135 ms, total: 444 ms
Wall time: 476 ms


In [170]:
edges_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3456854 entries, 0 to 3456853
Data columns (total 3 columns):
 #   Column  Dtype 
---  ------  ----- 
 0   i       object
 1   j       object
 2   v       int64 
dtypes: int64(1), object(2)
memory usage: 79.1+ MB


In [5]:
edges_df.sample(5)

Unnamed: 0,i,j,v
744866,0xfdacd6cbc0b082422bd66196364c5ef886c428d3,0x8b63441ade2fa6e64e30d43ae8c71eb40f107344,1
681736,0x35d6e84de62a5050c227370fc2442cd14bb4cc98,0x0d441a09bfb7712bd00c72d4e4305021b7b8d615,1
1231034,0x72d9471869bf52d0da523fa9437d2d8ea077d2b6,0xf58ffd2cb44db1c4de799a5118e47681c6dcca9c,2
3423603,0xca042423b365128810e96c7b1e0196ad0a513855,0xde1e96c194cd54fdebb21db1cecd0c964f1c5054,1
2378041,0xeab9dba3ea9701c728a7369d3e4ff820863414c1,0x034af1e6161eb5feb44638ea0141e7b11ab43047,1


In [6]:
gml_filepath = "data/fc_l1rep6rec3m12enhancedConnections.gml"
print(os.path.isfile(gml_filepath))

True


In [171]:
pkl_filepath = "data/fc_l1rep6rec3m12enhancedConnections_ig.pkl"
print(os.path.isfile(pkl_filepath))

True


In [7]:
%%time
if os.path.isfile(pkl_filepath):
    print("loading existing graph pickle")
    g = igraph.Graph.Read_Pickle(pkl_filepath)
elif os.path.isfile(gml_filepath):
    print("loading existing graph model")
    g = ig.load(gml_filepath)
else:
    g = ig.Graph.DataFrame(edges_df, directed=True, use_vids=False)
    g.write_pickle(pkl_filepath)
    g.save(gml_filepath)

loading existing graph model
CPU times: user 29.7 s, sys: 1.99 s, total: 31.7 s
Wall time: 34.3 s


In [43]:
ig.summary(g)

IGRAPH DN-- 44784 3456854 -- 
+ attr: id (v), name (v), v (e)


In [44]:
g.degree(["0x0a860752374e8430b7ae704df85b24d6f6b9adf4"], mode="out")

[242]

In [10]:
%%time
mindist = 1 # exclude the input vertex
order = 1
k1 = g.neighborhood(['0x0a860752374e8430b7ae704df85b24d6f6b9adf4'], order=order, mode="out", mindist=mindist)

CPU times: user 185 µs, sys: 428 µs, total: 613 µs
Wall time: 2.15 ms


In [11]:
len(k1[0])

242

In [12]:
%%time
mindist = 2 # exclude k1
order = 2
k2 = g.neighborhood(['0x0a860752374e8430b7ae704df85b24d6f6b9adf4'], order=order, mode="out", mindist=mindist)

CPU times: user 1.06 ms, sys: 640 µs, total: 1.7 ms
Wall time: 3.35 ms


In [13]:
len(k2[0])

14057

In [14]:
%%time
mindist = 3 # exclude k2
order = 3
k3 = g.neighborhood(['0x0a860752374e8430b7ae704df85b24d6f6b9adf4'], order=order, mode="out", mindist=mindist)

CPU times: user 11.4 ms, sys: 1.8 ms, total: 13.2 ms
Wall time: 14.4 ms


In [15]:
len(k3[0])

19516

In [16]:
%%time
mindist = 4 # exclude k3
order = 4
k4 = g.neighborhood(['0x0a860752374e8430b7ae704df85b24d6f6b9adf4'], order=order, mode="out", mindist=mindist)

CPU times: user 17.5 ms, sys: 1.84 ms, total: 19.3 ms
Wall time: 21 ms


In [17]:
len(k4[0])

1018

In [18]:
g.vs.find(name='0x0ee0e0d0d396c2e3ce3a0496c2aa0892db860662').index

664

In [19]:
g.vs.find(name='0x0a860752374e8430b7ae704df85b24d6f6b9adf4').index

5024

In [20]:
g.vs.find(name='0x0a860752374e8430b7ae704df85b24d6f6b9adf4').out_edges()[:5]

[igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 116302, {'v': 24.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 116324, {'v': 1.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 116325, {'v': 1.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 116326, {'v': 1.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 116327, {'v': 13.0})]

In [21]:
[edge for j in k1[0][:5] for edge in g.vs[j].out_edges() ]

[igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 0, {'v': 7.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 375279, {'v': 1.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 375280, {'v': 14.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 375281, {'v': 1.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 375282, {'v': 14.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 375283, {'v': 2.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 375284, {'v': 1.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 375285, {'v': 6.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 375286, {'v': 1.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 375287, {'v': 1.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 375288, {'v': 7.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 375289, {'v': 1.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 375290, {'v': 1.0}),
 igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 375291, {'v': 1.

In [22]:
g.es[116302]

igraph.Edge(<igraph.Graph object at 0x11cdecb40>, 116302, {'v': 24.0})

In [23]:
def into_ijv_for_edge(graph:ig.Graph, edge:ig.Edge, degree:int):
    return {
        'i': graph.vs[edge.tuple[0]]["name"],
        'j': graph.vs[edge.tuple[1]]["name"],
        'v': edge["v"],
        'k': degree
    }

In [73]:
def get_localtrust_with_degree(graph:ig.Graph, in_addr:str, max_neighbors:int, max_degree:int) -> list: 
    lt = []
    mindist_and_order = 1
    limit = max_neighbors
    while mindist_and_order <= max_degree:
        neighbors = graph.neighborhood(
            [in_addr], order=mindist_and_order, mode="out", mindist=mindist_and_order
        )
        k_edges = [edge for j in neighbors[0][:limit] for edge in graph.vs[j].out_edges()]
        
        [lt.append(into_ijv_for_edge(graph, e, mindist_and_order)) for e in k_edges]

        limit = limit - len(neighbors[0])
        if limit <= 0:
            break # we have reached limit of neighbors
        mindist_and_order += 1
    # end of while
    return lt

In [75]:
%%time
localtrust = get_localtrust_with_degree(graph=g,
                                        in_addr="0x0a860752374e8430b7ae704df85b24d6f6b9adf4",
                                        max_neighbors=100,
                                        max_degree=3)

CPU times: user 185 ms, sys: 26.8 ms, total: 212 ms
Wall time: 217 ms


In [76]:
len(localtrust)

44272

In [77]:
import random
random.sample(localtrust,5)

[{'i': '0x2b09450cc18019d2c505bbcecb27485e0469bc2c',
  'j': '0xfbca89c404fdbec685dd8b412b450a80af26d38f',
  'v': 1.0,
  'k': 1},
 {'i': '0x74c6b9a4217e0e391294a807a264fd0502990baa',
  'j': '0xb3dfe30cc10995be126118c4b7a38e99f6bd39d4',
  'v': 1.0,
  'k': 1},
 {'i': '0x452c3c4acb32aad6abe53ad813dee3fe71a93669',
  'j': '0xaa11ebefbb4723debabbfc85932c11f358e68306',
  'v': 1.0,
  'k': 1},
 {'i': '0xe1b1e3bbf4f29bd7253d6fc1e2ddc9cacb0a546a',
  'j': '0x99bce75fc42e0e6114b170dfa89f833e21b2be78',
  'v': 13.0,
  'k': 1},
 {'i': '0x1ca66c990e86b750ea6b2180d17fff89273a5c0d',
  'j': '0x7ab82b13992e38462f50df7065256071204099e5',
  'v': 7.0,
  'k': 1}]

In [78]:
%%time
localtrust = get_localtrust_with_degree(graph=g,
                                        in_addr="0x0a860752374e8430b7ae704df85b24d6f6b9adf4",
                                        max_neighbors=1000,
                                        max_degree=5)

CPU times: user 1.31 s, sys: 85.1 ms, total: 1.4 s
Wall time: 1.65 s


In [79]:
len(localtrust)

363559

In [80]:
random.sample(localtrust, 5)

[{'i': '0x214573f904759874c9c2e4903b1cbd0bb5821e1d',
  'j': '0x89b7dffc40b5cfb274d87555dce72b6a90d5d240',
  'v': 13.0,
  'k': 2},
 {'i': '0xbe2195d82d9d9f3a14c8ddf6b78c3872ed49a1ce',
  'j': '0x27afccc7af1a7fde223f778eeccd308512ed7df9',
  'v': 1.0,
  'k': 2},
 {'i': '0xa35567b3b5ad54ffb88f6789aa2f4d69ee7097b1',
  'j': '0x322d3353c3bd12f9a5308f90c808364326274178',
  'v': 7.0,
  'k': 2},
 {'i': '0xa35567b3b5ad54ffb88f6789aa2f4d69ee7097b1',
  'j': '0xb5e6353fb6d02a9a66e7b83280927b16d87577d8',
  'v': 19.0,
  'k': 2},
 {'i': '0x19c29a3cea8733314e501d42f1a0d6dcb23e1b3d',
  'j': '0x89141cdee17c6cad747efdb37dc7d5e3c8074bd6',
  'v': 6.0,
  'k': 2}]

In [82]:
%%time
localtrust = get_localtrust_with_degree(graph=g,
                                        in_addr="0x7ea38f878aec603bbce2376ded83d502bd2d881e",
                                        max_neighbors=100,
                                        max_degree=5)

CPU times: user 260 ms, sys: 24.4 ms, total: 284 ms
Wall time: 292 ms


In [83]:
len(localtrust)

59251

In [84]:
len(g.vs)

44784

In [85]:
g.vs[0]["name"]

'0xcd1bae2391651198850b076c2b600d534df53ade'

In [89]:
%%script false --no-raise-error # skip because this cell takes too long
%%time
from tqdm import tqdm
for v in tqdm(g.vs):
    get_localtrust_with_degree(
        graph=g,
        in_addr=v["name"],
        max_neighbors=100,
        max_degree=5
    )

In [190]:
def get_neighbors(graph:ig.Graph, in_addr:str, max_neighbors:int, max_degree:int) -> list:
    import itertools
    klists = []
    mindist_and_order = 1
    limit = max_neighbors
    while mindist_and_order <= max_degree:
        neighbors = graph.neighborhood(
            [in_addr], order=mindist_and_order, mode="out", mindist=mindist_and_order
        )
        klists.append(g.vs[neighbors[0][:limit]]["name"])
        
        limit = limit - len(neighbors[0])
        if limit <= 0:
            break # we have reached limit of neighbors
        mindist_and_order += 1
    # end of while
    return list(itertools.chain(*klists))

In [191]:
%%time
neighbors = get_neighbors(
                graph=g,
                in_addr="0x7ea38f878aec603bbce2376ded83d502bd2d881e",
                max_neighbors=1000,
                max_degree=5
            )

CPU times: user 667 µs, sys: 278 µs, total: 945 µs
Wall time: 959 µs


In [192]:
sum(1 for _ in neighbors)

1000

In [193]:
random.sample(neighbors,10)

['0xf16cad728c210c0e3c0c9d7c80a247120b1248a4',
 '0x81b3aa1a462ce3476dfd2aef65637155eecdba17',
 '0x5f447c08bb3dbd21bd4ddd389cd29156d9233594',
 '0xe4e3c59c8df9f365747c25006ff298d3a50fddde',
 '0x0b5454a3c0be024120d47b5fe2b355248dfc508a',
 '0xd68c39c9b2b1b947a5888a47e8299b2156de4565',
 '0xd09cf607675eae595017309f46f0837e85296a92',
 '0x0412883ac41b904c55b10c82e947aec06551f124',
 '0xe6f3078a49129375903d0b2598628bfec8dd9e56',
 '0x2d86f58e2e0f068a1720d4508280df24991b044f']

In [194]:
%%time
lt_df = edges_df[edges_df['i'].isin(neighbors) & edges_df['j'].isin(neighbors)]

CPU times: user 353 ms, sys: 36.8 ms, total: 390 ms
Wall time: 480 ms


In [195]:
lt_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 134624 entries, 0 to 3007031
Data columns (total 3 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   i       134624 non-null  object
 1   j       134624 non-null  object
 2   v       134624 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 4.1+ MB


In [157]:
lt_df.sample(5)

Unnamed: 0,i,j,v
800614,0xf5f23d337785825859de51f0bb36550ea7f4aec2,0x8c5e18af335fcd77b49eb32813dd57bb757e24f6,1
523251,0x9589f0b846e7f6b8c547ef0403ecd9a0c915b3f4,0x1500cfc7d2819442f1f5d7e2281554472c275075,67
204466,0x913ce0cb2ebeda248c3b80d53893b94b089c3f2c,0x38eaa03bd222ba7e616ff2d14f2c499df3ef3214,1
290748,0x0e43a2acb2fb3d9baa5222b183e0432bee2c65ab,0x0faeba7a10eaf727f0ebb9e9d844682d38c4a02a,53
254559,0xbc3d0b227834388550b95b5d2c636638bbc13f6e,0x95583d2bba5af877482a2ef3215e7ee8366cf87d,7


In [158]:
%%time
out_json = edges_df[edges_df['i'].isin(neighbors) & edges_df['j'].isin(neighbors)].to_json(orient="records")

CPU times: user 466 ms, sys: 37.7 ms, total: 504 ms
Wall time: 518 ms


In [163]:
import json
json.loads(out_json)[:5]

[{'i': '0xcd1bae2391651198850b076c2b600d534df53ade',
  'j': '0x49fe8e96072b417f084fd8dd5b546cf19f3933b0',
  'v': 7},
 {'i': '0x3a82f8eb5560af36b27420b586a05209e87ed3f6',
  'j': '0xffb471f878e142ea8a357d341fec7424cffaff60',
  'v': 2},
 {'i': '0x3a82f8eb5560af36b27420b586a05209e87ed3f6',
  'j': '0xe6aa796c1b52bdbdebafad07ba324d88d7d4e842',
  'v': 13},
 {'i': '0x3a82f8eb5560af36b27420b586a05209e87ed3f6',
  'j': '0xe2f0d0523f8dfcc80b544513fe38baedf5d19a2e',
  'v': 7},
 {'i': '0x3a82f8eb5560af36b27420b586a05209e87ed3f6',
  'j': '0xa4b7c9173ff5be5c116d2ff846d23dd12a3cc5ec',
  'v': 32}]

In [94]:
%%time
from tqdm import tqdm
for v in tqdm(g.vs):
    get_neighbors(
        graph=g,
        in_addr=v["name"],
        max_neighbors=100,
        max_degree=5
    )

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 44784/44784 [00:43<00:00, 1020.09it/s]

CPU times: user 29.2 s, sys: 11.9 s, total: 41 s
Wall time: 43.9 s





In [111]:
def get_neighbor_edges(graph:ig.Graph, in_addr:str, max_neighbors:int, max_degree:int) -> list:
    import itertools
    klists = []
    mindist_and_order = 1
    limit = max_neighbors
    while mindist_and_order <= max_degree:
        neighbors = graph.neighborhood(
            [in_addr], order=mindist_and_order, mode="out", mindist=mindist_and_order
        )
        klists.append([(graph.vs[j]["name"], edge["v"]) for j in neighbors[0][:limit] for edge in graph.vs[j].out_edges()])
        
        limit = limit - len(neighbors[0])
        if limit <= 0:
            break # we have reached limit of neighbors
        mindist_and_order += 1
    # end of while
    return list(itertools.chain(*klists))

In [112]:
%%time
neighbor_edges = get_neighbor_edges(
                graph=g,
                in_addr="0x7ea38f878aec603bbce2376ded83d502bd2d881e",
                max_neighbors=1000,
                max_degree=5
            )

CPU times: user 642 ms, sys: 33.6 ms, total: 675 ms
Wall time: 743 ms


In [113]:
sum(1 for _ in neighbor_edges)

380332

In [114]:
random.sample(neighbors, 5)

[(723, 7.0), (2601, 1.0), (18151, 1.0), (20668, 1.0), (2061, 45.0)]

In [None]:
%%time
from tqdm import tqdm
for v in tqdm(g.vs):
    get_neighbor_edges(
        graph=g,
        in_addr=v["name"],
        max_neighbors=100,
        max_degree=5
    )