In [45]:
import igraph
import numpy as np
import time

In [2]:
# TASK 1

In [3]:
# loading the graph into memory
g = igraph.Graph.Read_Pickle('ogbn-arxiv.pickle')

In [4]:
print(f'number of vertices: {len(g.vs)}')
print(f'number of edges: {len(g.es)}')

number of vertices: 169343
number of edges: 1166243


In [25]:
# finding the node with highest degree, i.e. the most cited paper
most_cited = g.vs.select(_degree=g.maxdegree())
print(f"most cited papers: {most_cited['year']},\n"
      f"{most_cited.degree()} citations\n", 
      f"subject areas: {most_cited['label']}")

most cited papers: [array([2014], dtype=int64)],
[13161] citations
 subject areas: [array([24], dtype=int64)]


In [9]:
# TASK 2

In [10]:
train_set = g.vs.select(year_lt=2019)
test_set = g.vs.select(year_ge=2019)
print(f'train_set: {len(train_set)} entries')
print(f'test_set: {len(test_set)} entries')

train_set: 120740 entries
test_set: 48603 entries


In [27]:
# TASK 3

Lemma 1 Let G be a graph and v ∈ V (G). Then, $∆(v) = |E(G[N (v)])|$

We need to calculate the number of triangle subgraphs of G that contain v.
By definition, the neighbourhood N(v) of a vertex v in a graph G is the subgraph of G induced by all vertices adjacent to v. Since there is an edge between each node in N(v) and v, to complete a triangle between two neighbor nodes and v, we need those neighbor nodes to be connected by edge. As a result, the number of number of triangle subgraphs of G that contain v will simply be the number of edges in the neighborhood of v.

In [47]:
# TASK 3.1

# for each vertex, getting the list of its neighbors,
# selecting a subgraph that consists only of the neighbors
# and calculating the number of edges in it. The number of triangles 
# equals the number of the edges.
triangles_list = np.zeros(len(g.vs), dtype=int)

time_start = time.time()
for i, vertex in enumerate(g.vs):
    neighbors = vertex.neighbors()
    subgraph = g.induced_subgraph(neighbors)
    triangles_list[i] = len(subgraph.es)
print(f'number of triangles for first 10 vertices:')
print(triangles_list[0:10])
print(f'time taken: {time.time() - time_start:.2f}')

number of triangles for first 10 vertices:
[1460    1   20    1   12    0    0    1    0    2]
time taken: 31.49


In [None]:
# TASK 3.2

def check_time(method_name):
    global time_start
    print(f"{method_name}: {time.time() - time_start:.3f}s")
    time_start = time.time()

time_start = time.time()
pageranks = g.pagerank()
check_time('pageranks')

corenesses = g.coreness()
check_time('corenesses')

eigen_centralities = g.eigenvector_centrality()
check_time('eigen_centralities')

# strengths = g.strength()
# check_time('strengths')

# diversities = g.diversity()
# check_time('diversities')

degrees = g.degree(g.vs)
check_time('degrees')
    