In [1]:
import pandas as pd
import networkx as nx
import igraph as ig
# import leidenalg
import numpy as np
import os
import gc
import ast
import hashlib
from collections import defaultdict, Counter
from itertools import combinations
import json
import ijson
from datetime import datetime
import pickle

In [3]:
from project_cda.anime_graph_builder import AnimeGraphBuilder
from project_cda.community_tracker import CommunityTracker
from project_cda.cluster_evaluation import ClusterEvaluation
from project_cda.partition_enricher import PartitionEnricher
from project_cda.cluster_visualizer import ClusterVisualizer

In [4]:
os.chdir("/home/yaroslav/FCUL/MARS_1.0")

In [5]:
# –ù–∞—Å—Ç—Ä–æ–π–∫–∞ –æ—Ç–æ–±—Ä–∞–∂–µ–Ω–∏—è
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 100)


DATA_DIR = "data"
# INPUT PATHES
USERS_PATH = "data/datasets/anime_azathoth42/users_sterilized.csv"
USER_DICT_PATH = "data/helpers/user_dict_filtered.json"
ANIME_META_PATH = "data/datasets/anime_azathoth42/anime_sterilized.csv"

# OUTPUT PATHES
REPORTS_DIR = "data/reports"
PARTITIONS_DIR = f"data/partitions"
PLOTS_DIR = f"data/plots"

# –°–æ–∑–¥–∞–µ–º —Å—Ç—Ä—É–∫—Ç—É—Ä—É –ø–∞–ø–æ–∫
os.makedirs(REPORTS_DIR, exist_ok=True)
os.makedirs(PARTITIONS_DIR, exist_ok=True)
os.makedirs(PLOTS_DIR, exist_ok=True)

In [None]:
print(">>> Initializing Metadata & Builders...")
# 1. –ó–∞–≥—Ä—É–∂–∞–µ–º –º–µ—Ç–∞–¥–∞–Ω–Ω—ã–µ (Enricher —Å—Ä–∞–∑—É —Ä–∞—Å–ø–∞—Ä—Å–∏—Ç –∂–∞–Ω—Ä—ã)
enricher = PartitionEnricher(ANIME_META_PATH)
# 2. –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ —Å–ª–æ–≤–∞—Ä—è –¥–ª—è Evaluation (—á–∏—Å—Ç–∞—è –∫–æ–ø–∏—è –¥–ª—è –º–µ—Ç—Ä–∏–∫)
# –ü—Ä–µ–≤—Ä–∞—â–∞–µ–º DataFrame –≤ —Å–ª–æ–≤–∞—Ä—å {id: {'genres': {...}, 'source': '...'}}
anime_info_clean = enricher.meta_df.set_index('anime_id')[['genres', 'source']].to_dict('index')
builder = AnimeGraphBuilder(USERS_PATH, USER_DICT_PATH, ANIME_META_PATH)

1. –ú–µ—Ç–æ–¥—ã –∫–ª–∞—Å—Ç–µ—Ä–∏–∑–∞—Ü–∏–∏ (ALGO)

    ------------------------------------------------------------------------------
    **leiden_mod** ‚Äî Leiden (Modularity). –ö–ª–∞—Å—Å–∏–∫–∞. –ò—â–µ—Ç –∫—Ä—É–ø–Ω—ã–µ —Å–æ–æ–±—â–µ—Å—Ç–≤–∞.

    *partition_object = g_igraph.community_leiden(objective_function="modularity", weights=weights, n_iterations=-1)*

    ------------------------------------------------------------------------------
    **leiden_cpm** ‚Äî Leiden (CPM). –ò—â–µ—Ç –ø–ª–æ—Ç–Ω—ã–µ —è–¥—Ä–∞. –¢—Ä–µ–±—É–µ—Ç RESOLUTION (–Ω–∞—á–Ω–∏ —Å 0.01‚Äì0.05).
    
    *partition_object = g_igraph.community_leiden(objective_function="cpm", weights=weights, resolution_parameter=resolution, n_iterations=-1)*

    ------------------------------------------------------------------------------
    **infomap** ‚Äî Infomap. –ü–æ—Ç–æ–∫ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏. –õ—É—á—à–µ –≤—Å–µ–≥–æ –Ω–∞—Ö–æ–¥–∏—Ç –≥—Ä–∞–Ω–∏—Ü—ã –∂–∞–Ω—Ä–æ–≤.

    *partition_object = g_igraph.community_infomap(edge_weights=weights)*

    ------------------------------------------------------------------------------
    **label_prop** ‚Äî Label Propagation. –ë—ã—Å—Ç—Ä–æ, –≥—Ä—è–∑–Ω–æ, –Ω–µ—Å—Ç–∞–±–∏–ª—å–Ω–æ (Baseline).

    *partition_object = g_igraph.community_label_propagation(weights=weights)*

    ------------------------------------------------------------------------------
    **eigenvector** ‚Äî Leading Eigenvector. –°–ø–µ–∫—Ç—Ä–∞–ª—å–Ω—ã–π –º–µ—Ç–æ–¥. –ú–µ–¥–ª–µ–Ω–Ω–µ–µ.

    *partition_object = g_igraph.community_leading_eigenvector(weights=weights)*

    ------------------------------------------------------------------------------
    **walktrap** ‚Äî Walktrap. –°–ª—É—á–∞–π–Ω—ã–µ –±–ª—É–∂–¥–∞–Ω–∏—è (—Å—Ç–∞–±–∏–ª—å–Ω–µ–µ, —á–µ–º LabelProp).

    *dendrogram = g_igraph.community_walktrap(weights=weights)*

    *partition_object = dendrogram.as_clustering()*

2. –ú–µ—Ç–æ–¥—ã –ø—Ä–æ—Ä–µ–∂–∏–≤–∞–Ω–∏—è (SPARS_TYPE)

    ------------------------------------------------------------------------------
    **knn** ‚Äî –û—Å—Ç–∞–≤–∏—Ç—å K –ª—É—á—à–∏—Ö —Å–æ—Å–µ–¥–µ–π (–Ω–∞–ø—Ä. 20). –ì–∞—Ä–∞–Ω—Ç–∏—Ä—É–µ—Ç —Å–≤—è–∑–Ω–æ—Å—Ç—å.

    *with open(gpickle_path, "rb") as f: G = pickle.load(f)*

    *sparse_graph = sparsify_knn(self, gpickle_path, k: int = 20)*

    ------------------------------------------------------------------------------
    **backbone** ‚Äî –û—Å—Ç–∞–≤–∏—Ç—å —Å—Ç–∞—Ç–∏—Å—Ç–∏—á–µ—Å–∫–∏ –∑–Ω–∞—á–∏–º—ã–µ —Å–≤—è–∑–∏ (ALPHA ‚âà 0.05). –ú–æ–∂–µ—Ç —Ä–∞–∑–≤–∞–ª–∏—Ç—å –≥—Ä–∞—Ñ.


    *with open(gpickle_path, "rb") as f: G = pickle.load(f)*

    *sparse_graph = sparsify_backbone(G, weight="weight", alpha=0.05, weight_threshold=1, save_path=None)*

    ------------------------------------------------------------------------------
    –ë–µ–∑ —á–∏—Å—Ç–∫–∏ (—Ç–æ–ª—å–∫–æ –µ—Å–ª–∏ –ø–æ—Ä–æ–≥ –≤–µ—Å–∞ –≤—ã—Å–æ–∫–∏–π).

### 3. –ú–µ—Ç–æ–¥—ã —Ñ–æ—Ä–º–∏—Ä–æ–≤–∞–Ω–∏—è –≥—Ä–∞—Ñ–∞: ***build_edges(year, max_users=100000, method="jaccard")***

    method = **jaccard | raw**

### 4. –ò–º–µ–Ω–æ–≤–∞–Ω–∏–µ —Ñ–∞–π–ª–∞ (EXP_TAG)

    –°—Ç–∞–Ω–¥–∞—Ä—Ç: {Algo}_{Graph}_{Spars}_{Track}

    –ü—Ä–∏–º–µ—Ä: Infomap_Jac0.05_KNN20_Tr0.1

In [None]:
import igraph as ig

def get_partition(g: ig.Graph, method_code: str, weights=None):
    """
    –§–∞–±—Ä–∏–∫–∞ –∞–ª–≥–æ—Ä–∏—Ç–º–æ–≤.
    –ü—Ä–∏–Ω–∏–º–∞–µ—Ç –∫–æ–¥—ã: 'leiden_mod', 'leiden_cpm_0.05', 'infomap', 'label_prop', 'walktrap'.
    """
    if g.vcount() == 0: return None
    
    # 1. LEIDEN MODULARITY (–°—Ç–∞–Ω–¥–∞—Ä—Ç)
    if method_code == "leiden_mod":
        return g.community_leiden(
            objective_function="modularity", weights=weights, n_iterations=-1
        )

    # 2. LEIDEN CPM (–° –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–º resolution)
    # –ö–æ–¥ –≤–∏–¥–∞: 'leiden_cpm_0.05'
    elif method_code.startswith("leiden_cpm"):
        try:
            res_val = float(method_code.split("_")[-1])
        except:
            res_val = 0.01
        return g.community_leiden(
            objective_function="cpm", weights=weights, 
            resolution_parameter=res_val, n_iterations=-1
        )

    # 3. INFOMAP (–ü–æ—Ç–æ–∫)
    elif method_code == "infomap":
        return g.community_infomap(edge_weights=weights)

    # 4. LABEL PROPAGATION (–ë—ã—Å—Ç—Ä–æ)
    elif method_code == "label_prop":
        return g.community_label_propagation(weights=weights)
        
    # 5. WALKTRAP (–°–ª—É—á–∞–π–Ω—ã–µ –±–ª—É–∂–¥–∞–Ω–∏—è, –∞–ª—å—Ç–µ—Ä–Ω–∞—Ç–∏–≤–∞ Infomap)
    elif method_code == "walktrap":
        # steps=4 - –¥–µ—Ñ–æ–ª—Ç, –º–æ–∂–Ω–æ –Ω–µ –º–µ–Ω—è—Ç—å
        dendrogram = g.community_walktrap(weights=weights, steps=4)
        return dendrogram.as_clustering()
        
    # 6. EIGENVECTOR (–°–ø–µ–∫—Ç—Ä–∞–ª—å–Ω—ã–π)
    elif method_code == "eigenvector":
        return g.community_leading_eigenvector(weights=weights)

    else:
        raise ValueError(f"Unknown method: {method_code}")

def generate_exp_key(algo, graph_type, edge_th, spars_type, spars_param, track_th):
    """–ì–µ–Ω–µ—Ä–∏—Ä—É–µ—Ç –∫—Ä–∞—Å–∏–≤–æ–µ –∏–º—è —Ñ–∞–π–ª–∞ –ø–æ —Å–ª–æ—Ç-—Å–∏—Å—Ç–µ–º–µ."""
    
    # –°–ª–æ—Ç 1: ALGO
    if algo == 'leiden_mod': a_tag = "LMod"
    elif algo.startswith('leiden_cpm'): 
        # leiden_cpm_0.05 -> LCPM05
        res = algo.split('_')[-1].replace('0.', '')
        a_tag = f"LCPM{res}"
    elif algo == 'infomap': a_tag = "Info"
    elif algo == 'label_prop': a_tag = "LProp"
    elif algo == 'walktrap': a_tag = "Walk"
    elif algo == 'eigenvector': a_tag = "Eigen"
    else: a_tag = algo

    # –°–ª–æ—Ç 2: GRAPH
    # jaccard, 0.02 -> Jac02
    g_tag = f"{graph_type[:3].capitalize()}{str(edge_th).replace('0.', '')}"

    # –°–ª–æ—Ç 3: SPARS
    # knn, 20 -> KNN20
    if spars_type == 'knn': s_tag = f"KNN{spars_param}"
    elif spars_type == 'backbone': s_tag = f"BB{str(spars_param).replace('0.', '')}"
    else: s_tag = "NoSpars"

    # –°–ª–æ—Ç 4: TRACK
    t_tag = f"Tr{str(track_th).replace('0.', '')}"

    return f"{a_tag}_{g_tag}_{s_tag}_{t_tag}"

In [None]:
# ================= –ú–ï–ù–Æ –≠–ö–°–ü–ï–†–ò–ú–ï–ù–¢–û–í =================

# –°—Ü–µ–Ω–∞—Ä–∏–π 1: "–ë–∏—Ç–≤–∞ –¢–∏—Ç–∞–Ω–æ–≤" (–°—Ä–∞–≤–Ω–µ–Ω–∏–µ –æ—Å–Ω–æ–≤–Ω—ã—Ö –ø–æ–¥—Ö–æ–¥–æ–≤ –Ω–∞ —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω—ã—Ö –Ω–∞—Å—Ç—Ä–æ–π–∫–∞—Ö)
# –ì–∏–ø–æ—Ç–µ–∑–∞: Infomap –¥–∞—Å—Ç –ª—É—á—à–µ–µ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–æ–µ –∫–∞—á–µ—Å—Ç–≤–æ, Leiden Modularity - –ª—É—á—à—É—é —Å—Ç–∞–±–∏–ª—å–Ω–æ—Å—Ç—å.
EXPERIMENTS = [
    # (Algo, GraphMethod, EdgeTh, SparsType, SparsParam)
    ("leiden_mod", "jaccard", 0.02, "knn", 20),
    ("infomap",    "jaccard", 0.02, "knn", 20),
    ("label_prop", "jaccard", 0.02, "knn", 20), # Baseline
]

# –°—Ü–µ–Ω–∞—Ä–∏–π 2: "–ú–∏–∫—Ä–æ—Å–∫–æ–ø" (Leiden CPM —Å —Ä–∞–∑–Ω—ã–º —Ä–∞–∑—Ä–µ—à–µ–Ω–∏–µ–º)
# –ì–∏–ø–æ—Ç–µ–∑–∞: CPM 0.05 –Ω–∞–π–¥–µ—Ç –∫—Ä—É–ø–Ω—ã–µ –∂–∞–Ω—Ä—ã, CPM 0.1 –Ω–∞–π–¥–µ—Ç —É–∑–∫–∏–µ —Ñ–∞–Ω–¥–æ–º—ã.
# EXPERIMENTS = [
#     ("leiden_cpm_0.01", "jaccard", 0.02, "knn", 20), # –ö—Ä—É–ø–Ω—ã–µ –∫—É—Å–∫–∏
#     ("leiden_cpm_0.05", "jaccard", 0.02, "knn", 20), # –°—Ä–µ–¥–Ω–∏–µ (–†–µ–∫–æ–º–µ–Ω–¥—É—é)
#     ("leiden_cpm_0.10", "jaccard", 0.02, "knn", 20), # –ú–µ–ª–∫–∏–µ –¥–µ—Ç–∞–ª–∏
# ]

# –°—Ü–µ–Ω–∞—Ä–∏–π 3: "–¢–æ–ø–æ–ª–æ–≥–∏—è" (–í–ª–∏—è–Ω–∏–µ –ø—Ä–æ—Ä–µ–∂–∏–≤–∞–Ω–∏—è)
# –ì–∏–ø–æ—Ç–µ–∑–∞: Backbone –ª—É—á—à–µ —Å–æ—Ö—Ä–∞–Ω—è–µ—Ç —Å—Ç—Ä—É–∫—Ç—É—Ä—É "–∑–≤–µ–∑–¥", KNN –ª—É—á—à–µ —Å–≤—è–∑—ã–≤–∞–µ—Ç –Ω–∏—à–∏.
# EXPERIMENTS = [
#     ("leiden_mod", "jaccard", 0.02, "knn", 10),      # –ñ–µ—Å—Ç–∫–∏–π KNN
#     ("leiden_mod", "jaccard", 0.02, "knn", 30),      # –ú—è–≥–∫–∏–π KNN
#     # ("leiden_mod", "jaccard", 0.02, "backbone", 0.05), # Backbone (–µ—Å–ª–∏ —Ä–µ–∞–ª–∏–∑–æ–≤–∞–Ω)
# ]

# –°—Ü–µ–Ω–∞—Ä–∏–π 4: "–ü—Ä–∏—Ä–æ–¥–∞ —Å–≤—è–∑–µ–π" (Jaccard vs Raw)
# –ì–∏–ø–æ—Ç–µ–∑–∞: Raw –ø–æ–∫–∞–∂–µ—Ç, —á—Ç–æ –≤—Å–µ —Å–º–æ—Ç—Ä—è—Ç –ù–∞—Ä—É—Ç–æ. Jaccard –ø–æ–∫–∞–∂–µ—Ç —Ä–µ–∞–ª—å–Ω—ã–µ –≤–∫—É—Å—ã.
# EXPERIMENTS = [
#     ("leiden_mod", "jaccard", 0.05, "knn", 20), # –í—ã—Å–æ–∫–∏–π –ø–æ—Ä–æ–≥ –ñ–∞–∫–∫–∞—Ä–∞
#     ("leiden_mod", "raw",     2,    "knn", 20), # –°—ã—Ä—ã–µ –ø–µ—Ä–µ—Å–µ—á–µ–Ω–∏—è > 2 —é–∑–µ—Ä–æ–≤
# ]

# –û–±—â–∏–π –ø–∞—Ä–∞–º–µ—Ç—Ä —Ç—Ä–µ–∫–∏–Ω–≥–∞ –¥–ª—è –≤—Å–µ—Ö (–º–æ–∂–Ω–æ –≤—ã–Ω–µ—Å—Ç–∏, —Ç–∞–∫ –∫–∞–∫ –æ–Ω —Ä–µ–¥–∫–æ –º–µ–Ω—è–µ—Ç—Å—è)
TRACK_THRESH = 0.15

In [None]:
# --- RUNNER ---

# –ö—ç—à –¥–ª—è –≥—Ä–∞—Ñ–æ–≤, —á—Ç–æ–±—ã –Ω–µ –ø–µ—Ä–µ—Å–æ–±–∏—Ä–∞—Ç—å –∏—Ö, –µ—Å–ª–∏ –º–µ–Ω—è–µ—Ç—Å—è —Ç–æ–ª—å–∫–æ –∞–ª–≥–æ—Ä–∏—Ç–º –∫–ª–∞—Å—Ç–µ—Ä–∏–∑–∞—Ü–∏–∏
# –°—Ç—Ä—É–∫—Ç—É—Ä–∞: graph_cache[(method, edge_th, spars_type, spars_k)][year] = igraph_object
# –ï—Å–ª–∏ –ø–∞–º—è—Ç–∏ –º–∞–ª–æ - —É–±–µ—Ä–∏ –∫–µ—à–∏—Ä–æ–≤–∞–Ω–∏–µ –∏ –ø–µ—Ä–µ—Å–æ–±–∏—Ä–∞–π –∫–∞–∂–¥—ã–π —Ä–∞–∑.
graph_cache = {} 

for algo, g_method, edge_th, s_type, s_param in EXPERIMENTS:
    
    # 1. –ì–µ–Ω–µ—Ä–∏—Ä—É–µ–º –∏–º—è
    EXP_KEY = generate_exp_key(algo, g_method, edge_th, s_type, s_param, TRACK_THRESH)
    print(f"\n\nüöÄ STARTING: {EXP_KEY}")
    
    history = {}
    
    for year in range(2006, 2012):
        
        # –ö–ª—é—á –¥–ª—è –∫–µ—à–∞ –≥—Ä–∞—Ñ–∞ (–≥—Ä–∞—Ñ –∑–∞–≤–∏—Å–∏—Ç —Ç–æ–ª—å–∫–æ –æ—Ç —ç—Ç–∏—Ö –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤, –Ω–µ –æ—Ç –∞–ª–≥–æ—Ä–∏—Ç–º–∞ –∫–ª–∞—Å—Ç–µ—Ä–∏–∑–∞—Ü–∏–∏)
        cache_key = (g_method, edge_th, s_type, s_param)
        
        # –ü—Ä–æ–≤–µ—Ä—è–µ–º –∫–µ—à
        if year in graph_cache.get(cache_key, {}):
            h = graph_cache[cache_key][year]
            # print(f"  [Year {year}] Loaded graph from cache.")
        else:
            # –ï—Å–ª–∏ –Ω–µ—Ç –≤ –∫–µ—à–µ ‚Äî —Å—Ç—Ä–æ–∏–º
            print(f"  [Year {year}] Building graph...", end=" ")
            raw_edges, counts = builder.build_edges(year, max_users=20000, method=g_method)
            G = builder.build_graph(raw_edges, node_counts=counts, weight_threshold=edge_th)
            
            if s_type == "knn":
                G_clean = builder.sparsify_knn(G, k=s_param)
            elif s_type == "backbone":
                # G_clean = builder.sparsify_backbone_graph(G, alpha=s_param)
                pass
            else:
                G_clean = G
                
            if G_clean.number_of_nodes() == 0:
                print("Empty.")
                continue
                
            h = ig.Graph.TupleList(G_clean.edges(data=True), directed=False)
            
            # –°–æ—Ö—Ä–∞–Ω—è–µ–º –≤ –∫–µ—à
            if cache_key not in graph_cache: graph_cache[cache_key] = {}
            graph_cache[cache_key][year] = h
            print("Done.")

        # –î–æ—Å—Ç–∞–µ–º –≤–µ—Å–∞
        weights = h.es['weight'] if 'weight' in h.edge_attributes() else None
        
        # –ö–õ–ê–°–¢–ï–†–ò–ó–ê–¶–ò–Ø
        partition = get_partition(h, algo, weights=weights)
        
        if partition:
            history[year] = CommunityTracker.get_membership(h, partition)

    # --- POST PROCESSING ---
    print(f"  üèÅ Finishing {EXP_KEY}...")
    
    # 1. Tracking
    aligned = CommunityTracker.track_communities(history, threshold=TRACK_THRESH)
    
    # 2. Saving
    csv_path = f"reports/partitions/{EXP_KEY}.csv"
    CommunityTracker.save_aligned_history_to_csv(aligned, csv_path)
    
    # 3. Metrics
    evaluator = ClusterEvaluation(EXP_KEY, aligned, anime_info_clean)
    stats = evaluator.evaluate()
    # –°–æ—Ö—Ä–∞–Ω—è–µ–º –º–µ—Ç—Ä–∏–∫–∏ (–º–æ–∂–Ω–æ append –≤ –æ–±—â–∏–π csv)
    pd.DataFrame([stats]).to_csv("reports/all_metrics.csv", mode='a', header=not os.path.exists("reports/all_metrics.csv"), index=False)
    
    # 4. Sankey
    df_viz = enricher.enrich_partition(csv_path)
    ClusterVisualizer(df_viz).plot_evolution_sankey(
        f"reports/plots/Sankey_{EXP_KEY}.html", min_link_size=5, title=EXP_KEY
    )
    
    # –ß–∏—Å—Ç–∏–º –ø–∞–º—è—Ç—å –æ—Ç –Ω–µ–Ω—É–∂–Ω–æ–≥–æ
    del aligned, df_viz, evaluator, stats
    gc.collect()

print("\nAll experiments done.")

In [None]:
# 1. TRACKING (–°—à–∏–≤–∞–µ–º –≥–æ–¥–∞)
aligned_data = CommunityTracker.track_communities(partition_history, threshold=TRACKING_THRESH)

# 2. SAVING CSV
csv_filename = f"{REPORTS_DIR}/partitions/{EXP_KEY}.csv"
CommunityTracker.save_aligned_history_to_csv(aligned_data, csv_filename)

# 3. EVALUATION (–°—á–∏—Ç–∞–µ–º –º–µ—Ç—Ä–∏–∫–∏)
evaluator = ClusterEvaluation(EXP_KEY, aligned_data, anime_info_clean)
stats = evaluator.evaluate()

print("\nüìä SCOREBOARD:")
print(pd.Series(stats))

# –°–æ—Ö—Ä–∞–Ω—è–µ–º –º–µ—Ç—Ä–∏–∫–∏ –≤ –æ–±—â–∏–π —Ñ–∞–π–ª (append mode)
metrics_file = f"{REPORTS_DIR}/all_experiments_metrics.csv"
df_stats = pd.DataFrame([stats])
if os.path.exists(metrics_file):
    df_stats.to_csv(metrics_file, mode='a', header=False, index=False)
else:
    df_stats.to_csv(metrics_file, index=False)

In [None]:
# 1. Enrich (–î–æ–±–∞–≤–ª—è–µ–º –Ω–∞–∑–≤–∞–Ω–∏—è –∏ –∂–∞–Ω—Ä—ã)
df_viz = enricher.enrich_partition(csv_filename)

# 2. Visualize
viz = ClusterVisualizer(df_viz)
html_path = f"{REPORTS_DIR}/plots/Sankey_{EXP_KEY}.html"

# min_link_size=10 —É–±–∏—Ä–∞–µ—Ç —Ç–æ–Ω–∫–∏–µ –ª–∏–Ω–∏–∏ (—à—É–º)
viz.plot_evolution_sankey(html_path, min_link_size=10, title=f"Evolution: {EXP_KEY}")

print(f"Open report: {html_path}")