In [4]:
import networkx as nx

# Path to your local edge-list file
path = "web-Google.txt"

# Read into a directed graph
G = nx.read_edgelist(
    path,
    comments="#",            # ignore SNAP header lines
    delimiter="\t",          # tab-separated
    create_using=nx.DiGraph(),  # build a DiGraph
    nodetype=int             # cast node IDs to int
)

In [6]:
# Print basic info manually
print(f"Nodes: {G.number_of_nodes()}")
print(f"Edges: {G.number_of_edges()}")

Nodes: 875713
Edges: 5105039


In [15]:
import statistics

# Compute PageRank
pr = nx.pagerank(G, alpha=0.85, tol=1e-6, max_iter=100)

# compute the median score
median_pr = statistics.median(pr.values())

# find the node whose score is closest to the median
node_med, score_med = min(pr.items(), key=lambda kv: abs(kv[1] - median_pr))

print(f"Median PageRank value:  {median_pr:.9f}")
print(f"Node closest to median:  {node_med} (score = {score_med:.9f})")

Median PageRank value:  0.000000495
Node closest to median:  4091 (score = 0.000000495)


In [16]:
# Simulate a link-farm attack:
# Create 100 new nodes each pointing at target
G_attack = G.copy()
max_id = max(G_attack.nodes())
new_nodes = list(range(max_id+1, max_id+1+100))
for n in new_nodes:
    G_attack.add_edge(n, node_med)

In [17]:
pr_attack = nx.pagerank(G_attack, alpha=0.85, tol=1e-6, max_iter=100)
atk_score = pr_attack[node_med]
print(f"After attack → node {node_med} has PageRank {atk_score:.9f}")

After attack → node 4091 has PageRank 0.000027887
