In [None]:
import json
import pandas as pd
from graph_tool.all import *
from collections import defaultdict

In [None]:
with open("static/graph/my/092de6601f26128b3e47.json", "r") as f: #file with result of server work
    vertex_data = json.load(f)

edges_df = pd.read_csv("graph/all_edges.csv")
idmap_df = pd.read_csv("graph/id_to_node_id.csv")

In [None]:
uid_to_nid = dict(zip(idmap_df["user_id_1"], idmap_df["node_id"]))
uid_to_nid

In [None]:
g = Graph(directed=False)
node_map = {}
v_user_id = g.new_vertex_property("long")
v_community = g.new_vertex_property("int")

In [None]:
v_screen_name = g.new_vertex_property("string")
v_score = g.new_vertex_property("double")

In [None]:
for vtx in vertex_data:
    uid = vtx["user_id"]
    name = vtx["screen_name"]
    if uid not in uid_to_nid:
        continue
    nid = uid_to_nid[uid]
    if nid in node_map:
        continue
    v = g.add_vertex()
    node_map[nid] = v
    v_user_id[v] = uid
    v_screen_name[v] = name
    v_score[v] = vtx["score"]
    comm = vtx.get("community")
    v_community[v] = int(comm) if comm is not None else -1


In [None]:
for _, row in edges_df.iterrows():
    src = int(row["src"])
    dst = int(row["dst"])
    if src in node_map and dst in node_map:
        g.add_edge(node_map[src], node_map[dst])

g.vertex_properties["user_id"] = v_user_id
g.vertex_properties["community"] = v_community
g.vertex_properties["screen_name"] = v_screen_name
g.vertex_properties["score"] = v_score

In [None]:
cluster_graph = Graph(directed=False)
cluster_map = {}
for v in g.vertices():
    c = v_community[v]
    if c not in cluster_map:
        cluster_map[c] = cluster_graph.add_vertex()

In [None]:
ce_weights = cluster_graph.new_edge_property("int")
edge_count = defaultdict(int)

for e in g.edges():
    c1 = v_community[e.source()]
    c2 = v_community[e.target()]
    if c1 != c2:
        key = tuple(sorted((c1, c2)))
        edge_count[key] += 1

for (c1, c2), w in edge_count.items():
    e = cluster_graph.add_edge(cluster_map[c1], cluster_map[c2])
    ce_weights[e] = w

In [None]:
from math import log, log1p

norm_weights = cluster_graph.new_edge_property("double")
max_w = max(ce_weights.a)
for e in cluster_graph.edges():
    norm_weights[e] = log1p(ce_weights[e] / max_w)

pos = random_layout(cluster_graph, dim=3)
#pos = arf_layout(cluster_graph, weight=norm_weights, max_iter=500)
#pos = fruchterman_reingold_layout(cluster_graph, weight=ce_weights, n_iter=200, circular=True)

e_pen_width = cluster_graph.new_edge_property("double")
for e in cluster_graph.edges():
    e_pen_width[e] = norm_weights[e]

graph_draw(
    cluster_graph,
    pos=pos,
    edge_pen_width=e_pen_width,
    vertex_text=cluster_graph.vertex_index,
    output_size=(1000, 1000)
)


In [None]:
def draw_cluster(cluster_id):
    verts = [v for v in g.vertices() if v_community[v] == cluster_id]
    if not verts:
        print(f"Кластер {cluster_id} пуст")
        return
    sub = Graph(directed=False)
    vmap = {}
    for v in verts:
        vmap[v] = sub.add_vertex()
    for v in verts:
        for e in v.out_edges():
            u = e.target()
            if v_community[u] == cluster_id:
                sub.add_edge(vmap[v], vmap[u])
    pos = random_layout(sub, dim=3)
   # pos = arf_layout(sub, max_iter=1, dim=3)
    #pos = fruchterman_reingold_layout(sub, n_iter=200, circular=True)
    # pos = sfdp_layout(sub)
    graph_draw(sub, pos=pos, output_size=(1600, 1600))

In [None]:
draw_cluster(5)

In [None]:
import json
import os

os.makedirs("tmp", exist_ok=True)

cluster_pos = {int(v): [float(pos[v][0]), float(pos[v][1]), float(pos[v][2])] for v in cluster_graph.vertices()}
with open("tmp/cluster_positions.json", "w") as f:
    json.dump(cluster_pos, f)

cluster_edges = [
    {"source": int(e.source()), "target": int(e.target()), "weight": float(ce_weights[e])}
    for e in cluster_graph.edges()
]
with open("tmp/cluster_edges.json", "w") as f:
    json.dump(cluster_edges, f)


In [None]:
from tqdm import tqdm


def export_cluster(cluster_id):
    verts = [v for v in g.vertices() if v_community[v] == cluster_id]
    if not verts:
        return

    sub = Graph(directed=False)
    vmap = {}
    names = g.new_vertex_property("string")
    scores = g.new_vertex_property("double")
    communities = g.new_vertex_property("int")

    for v in verts:
        vmap[v] = sub.add_vertex()
        names[vmap[v]] = str(v_screen_name[v])
        scores[vmap[v]] = str(v_score[v])
        communities[vmap[v]] = str(v_community[v])

    edge_list = []
    for v in verts:
        for e in v.out_edges():
            u = e.target()
            if v_community[u] == cluster_id:
                sub.add_edge(vmap[v], vmap[u])
                edge_list.append((int(vmap[v]), int(vmap[u])))

    pos = random_layout(sub, dim=3)
    # pos = fruchterman_reingold_layout(sub, n_iter=200, circular=True)
    # pos = sfdp_layout(sub)
    node_data = [
        {
            "id": int(v),
            "name": names[v],
            "x": float(pos[v][0]),
            "y": float(pos[v][1]),
            "z": float(pos[v][2]),
            "score": scores[v],
            "community": communities[v],
        }
        for v in sub.vertices()
    ]
    edge_data = [{"source": s, "target": t} for s, t in edge_list]

    with open(f"tmp/cluster_{cluster_id}_nodes.json", "w") as f:
        json.dump(node_data, f)
    with open(f"tmp/cluster_{cluster_id}_edges.json", "w") as f:
        json.dump(edge_data, f)

for i in tqdm(range(-1, 255)):
    export_cluster(i)
