In [2]:
import os
import lzma
import json
import snap

In [3]:
data_path = '../../temporal_askubuntu/sx-askubuntu.txt'

UGraph = snap.LoadEdgeList(snap.TUNGraph, data_path, 0, 1)
DegToCntV = snap.TIntPrV()
snap.GetDegCnt(UGraph, DegToCntV)
for item in DegToCntV:
    print(f"Degree {item.GetVal1()} : {item.GetVal2()} nodes")

Degree 1 : 55500 nodes
Degree 2 : 36068 nodes
Degree 3 : 22161 nodes
Degree 4 : 12869 nodes
Degree 5 : 7779 nodes
Degree 6 : 4848 nodes
Degree 7 : 3235 nodes
Degree 8 : 2380 nodes
Degree 9 : 1845 nodes
Degree 10 : 1453 nodes
Degree 11 : 1106 nodes
Degree 12 : 863 nodes
Degree 13 : 775 nodes
Degree 14 : 664 nodes
Degree 15 : 589 nodes
Degree 16 : 507 nodes
Degree 17 : 464 nodes
Degree 18 : 386 nodes
Degree 19 : 352 nodes
Degree 20 : 308 nodes
Degree 21 : 281 nodes
Degree 22 : 239 nodes
Degree 23 : 213 nodes
Degree 24 : 225 nodes
Degree 25 : 187 nodes
Degree 26 : 179 nodes
Degree 27 : 144 nodes
Degree 28 : 176 nodes
Degree 29 : 131 nodes
Degree 30 : 129 nodes
Degree 31 : 120 nodes
Degree 32 : 108 nodes
Degree 33 : 91 nodes
Degree 34 : 85 nodes
Degree 35 : 92 nodes
Degree 36 : 84 nodes
Degree 37 : 80 nodes
Degree 38 : 80 nodes
Degree 39 : 75 nodes
Degree 40 : 77 nodes
Degree 41 : 68 nodes
Degree 42 : 62 nodes
Degree 43 : 47 nodes
Degree 44 : 51 nodes
Degree 45 : 51 nodes
Degree 46 : 42 no

In [5]:
import gzip

input_file = "../../temporal_askubuntu/sx-askubuntu.txt"  
output_file = "sx-askubuntu_static.txt"  #

with open(input_file, "rt", encoding="utf-8") as fin, open(output_file, "w", encoding="utf-8") as fout:
    for line in fin:
        parts = line.strip().split()
        if len(parts) < 2:
            continue
        u, v = parts[0], parts[1]
        fout.write(f"{u}\t{v}\n")

print(f"Static graph saved to {output_file}")


Static graph saved to sx-askubuntu_static.txt


In [6]:
import snap
import networkx as nx
import community
import time

print("Loading edges from graph.txt...")
edges = []
nodes = set()
with open('sx-askubuntu_static.txt') as f:
    for line in f:
        u, v = line.strip().split()
        u, v = int(u), int(v)
        edges.append((u, v))
        nodes.add(u)
        nodes.add(v)

nodes = list(nodes)
nid2idx = {nid: idx for idx, nid in enumerate(nodes)}  
idx2nid = {idx: nid for nid, idx in nid2idx.items()} 

UGraph = snap.PUNGraph.New()
for idx in range(len(nodes)):
    UGraph.AddNode(idx)
for u, v in edges:
    UGraph.AddEdge(nid2idx[u], nid2idx[v])

print("Original graph:", UGraph.GetNodes(), "nodes,", UGraph.GetEdges(), "edges")

# k-core
Gk = snap.GetKCore(UGraph, 2)
print("2-core graph:", Gk.GetNodes(), "nodes,", Gk.GetEdges(), "edges")

# NetworkX
Gnx = nx.Graph()
for EI in Gk.Edges():
    Gnx.add_edge(EI.GetSrcNId(), EI.GetDstNId())

# Louvain
t0 = time.time()
partition = community.best_partition(Gnx)
t1 = time.time()
print("Louvain finished in %.2f s" % (t1 - t0))

# Save the community, map to the original Twitter ID
with open("communities_ubuntu_louvain.txt", "w") as f:
    for idx, comm in partition.items():
        f.write(f"{idx2nid[idx]}\t{comm}\n")

print("Saved community assignments to communities_louvain_corrected.txt")


Loading edges from graph.txt...
Original graph: 159316 nodes, 508003 edges
2-core graph: 99996 nodes, 450500 edges
Louvain finished in 15.26 s
Saved community assignments to communities_louvain_corrected.txt


In [None]:
from collections import defaultdict


community_file = "communities_ubuntu_louvain.txt"

comm_count = defaultdict(int) 

with open(community_file, "r") as f:
    for line in f:
        node, comm = line.strip().split()
        comm = int(comm)
        comm_count[comm] += 1

# 
for comm, count in sorted(comm_count.items()):
    print(f"Community {comm}: {count} nodes")

sizes = list(comm_count.values())
print(f"total community number: {len(sizes)}")


In [None]:
from collections import defaultdict

community_file = "communities_ubuntu_louvain.txt"
filtered_file = "communities_ubuntu_louvain_filtered.txt"

comm_count = defaultdict(int)
node2comm = {}

with open(community_file, "r") as f:
    for line in f:
        node, comm = line.strip().split()
        comm = int(comm)
        node2comm[node] = comm
        comm_count[comm] += 1

filtered_nodes = {node: comm for node, comm in node2comm.items() if comm_count[comm] >= 5}

with open(filtered_file, "w") as f:
    for node, comm in filtered_nodes.items():
        f.write(f"{node}\t{comm}\n")

print(f"Filtered communities saved to {filtered_file}")
print(f"original community number: {len(comm_count)}, filtered community number: {len(set(filtered_nodes.values()))}")


In [9]:
import networkx as nx

Gnx = nx.read_edgelist("sx-askubuntu_static.txt", nodetype=int)

valid_nodes = set()
with open("communities_ubuntu_louvain_filtered.txt") as f:
    for line in f:
        node, comm = line.strip().split("\t")
        valid_nodes.add(int(node))

G_filtered = Gnx.subgraph(valid_nodes).copy()

print("Filtered graph:", G_filtered.number_of_nodes(), "nodes,", G_filtered.number_of_edges(), "edges")

# save the new graph
nx.write_edgelist(G_filtered, "ubuntu_graph_filtered.txt", data=False)


Filtered graph: 99776 nodes, 450095 edges
