In [None]:
import os
os.chdir("/home/yaroslav/FCUL/MARS_1.0")

# from google.colab import drive
# drive.mount('/content/drive')
# import os
# os.chdir('/content/drive/My Drive/lisboa_ciencia_de_dados/MARS_1.0')
# !pip install -qq igraph ijson

In [None]:
import gc
import pickle
import networkx as nx
import igraph as ig
import pandas as pd
from time import perf_counter
import logging
import traceback
from project_cda.tag_formatter import set_log_level, log
# from collections import defaultdict

In [3]:
set_log_level('DEBUG')
# set_log_level('INFO')
# set_log_level('WARNING')
# set_log_level('ERROR')

In [4]:
from project_cda.anime_graph_builder import AnimeGraphBuilder
from project_cda.community_tracker import CommunityTracker
from project_cda.cluster_evaluation import ClusterEvaluation
from project_cda.partition_enricher import PartitionEnricher
from project_cda.cluster_visualizer import ClusterVisualizer

In [5]:
DATA_DIR = "data"
USERS_CSV_PATH = f"{DATA_DIR}/datasets/anime_azathoth42/users_sterilized.csv"
USER_DICT_PATH = f"{DATA_DIR}/helpers/user_dict_filtered.json"   # 95 percentile
ANIME_CSV_PATH = f"{DATA_DIR}/datasets/anime_azathoth42/anime_sterilized.csv"

## EDGING SETTINGS (keep **ONE** option uncommented)

In [None]:
# === EDGES SETTINGS ===
def get_edges_config():
    # --- METHOD: Jaccard + KNN ---
    return {                                #   STRING TO BE UNCOMMENTED
        "method": "jaccard",                #   STRING TO BE UNCOMMENTED
        "threshold": 0.05                   #   STRING TO BE UNCOMMENTED
    }                                       #   STRING TO BE UNCOMMENTED

    # --- METHOD: Raw / Projected ---
    # return {                              #   STRING TO BE UNCOMMENTED
    #     "method": "raw",                  #   STRING TO BE UNCOMMENTED
    #     "threshold": 0,                   #   STRING TO BE UNCOMMENTED
    # }                                     #   STRING TO BE UNCOMMENTED

## SPARSING SETTINGS (keep **ONE** option uncommented)

In [None]:

def get_sparsing_config():
    # --- METHOD: No sparsing ---
    return {}                               #   STRING TO BE UNCOMMENTED

    # --- METHOD: KNN ---
    # return {                              #   STRING TO BE UNCOMMENTED
    #     "type": "knn",                    #   STRING TO BE UNCOMMENTED
    #     "k": 10,                          #   STRING TO BE UNCOMMENTED
    # }                                     #   STRING TO BE UNCOMMENTED

    # --- METHOD: Backbone ---
    # return {                              #   STRING TO BE UNCOMMENTED
    #     "type": "backbone",               #   STRING TO BE UNCOMMENTED
    #     "alpha": 0.05                     #   STRING TO BE UNCOMMENTED
    # }                                     #   STRING TO BE UNCOMMENTED

## CLUSTERING ALGORITHM SETTINGS (keep **ONE** option uncommented)

In [None]:
# === –ù–ê–°–¢–†–û–ô–ö–ò –ê–õ–ì–û–†–ò–¢–ú–ê ===
def get_algo_config():
    # --- 1. LEIDEN: MODULARITY (–ö–ª–∞—Å—Å–∏–∫–∞) ---
    return {
        "name": "leiden_mod",
        "kwargs": {
            "objective_function": "modularity",
            "resolution": 1.0,  # "Gamma". 1.0 - —Å—Ç–∞–Ω–¥–∞—Ä—Ç. –ë–æ–ª—å—à–µ - –º–µ–ª—å—á–µ –∫–ª–∞—Å—Ç–µ—Ä—ã.
            "n_iterations": -1            # -1 = –∫—Ä—É—Ç–∏—Ç—å –¥–æ —Å—Ö–æ–¥–∏–º–æ—Å—Ç–∏ (—Ä–µ–∫–æ–º–µ–Ω–¥—É–µ—Ç—Å—è)
        }
    }

    # --- 2. LEIDEN: CPM (Constant Potts Model) ---
    # return {
    #     "name": "leiden_cpm",
    #     "kwargs": {
    #         "objective_function": "CPM",
    #         "resolution": 0.05, # 0.01, 0.05, 0.1...
    #         "n_iterations": -1
    #     }
    # }

    # --- 3. LEADING EIGENVECTOR (–°–ø–µ–∫—Ç—Ä–∞–ª—å–Ω—ã–π) ---
    # return {
    #     "name": "eigenvector",
    #     "kwargs": {
    #         # clusters=None -> –∞–ª–≥–æ—Ä–∏—Ç–º —Å–∞–º —Ä–µ—à–∏—Ç
    #         "clusters": None 
    #     }
    # }

    # --- 4. WALKTRAP (Random Walks) ---
    # return {
    #     "name": "walktrap",
    #     "kwargs": {
    #         "steps": 4  # –î–ª–∏–Ω–∞ –±–ª—É–∂–¥–∞–Ω–∏—è. (3-4) -> –º–µ–ª–∫–∏–µ. (8-10) -> –∫—Ä—É–ø–Ω—ã–µ.
    #     }
    # }

    # --- 5. INFOMAP (Flow-based) ---
    # return {
    #     "name": "infomap",
    #     "kwargs": {
    #         "trials": 10  # –ö–æ–ª–∏—á–µ—Å—Ç–≤–æ –ø–æ–ø—ã—Ç–æ–∫. –ë–æ–ª—å—à–µ -> —Å—Ç–∞–±–∏–ª—å–Ω–µ–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç.
    #     }
    # }

    # --- 6. LABEL PROPAGATION ---
    # return {
    #     "name": "label_propagation",
    #     "kwargs": {}
    # }

In [9]:
def make_experiment_name(edge_conf, sparse_conf, algo_conf):
    parts = []
    
    # 1. Edges part (e.g., "Jac005")
    e_name = edge_conf['method'][:3].capitalize()
    th_str = str(edge_conf['threshold']).replace('.', '')
    parts.append(f"{e_name}{th_str}")
    
    # 2. Spars part (e.g., "KNN20")
    s_type = sparse_conf.get('type')
    if s_type == 'knn':
        k = sparse_conf.get('k')
        parts.append(f"KNN{k}")
    elif s_type == 'backbone':
        a = str(sparse_conf.get('alpha')).replace('.', '')
        parts.append(f"BB{a}")
    else:
        parts.append("Full")
        
    # 3. Algo part (e.g., "LeidenM10")
    algo = algo_conf['name']
    if algo == 'leiden':
        res = str(algo_conf['kwargs'].get('resolution_parameter', 1.0)).replace('.', '')
        parts.append(f"LMod{res}")
    elif algo == 'infomap':
        parts.append(f"InfoT{algo_conf['kwargs'].get('trials', 1)}")
    else:
        parts.append(algo.capitalize())
        
    return "_".join(parts)

In [10]:
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

GRAPH_DIR = f"{DATA_DIR}/graphs/"
if not os.path.exists(GRAPH_DIR):
    os.makedirs(GRAPH_DIR)

REPORT_DIR = f"{DATA_DIR}/reports/"
if not os.path.exists(REPORT_DIR):
    os.makedirs(REPORT_DIR)

PARTITION_DIR = f"{DATA_DIR}/partitions/"
if not os.path.exists(PARTITION_DIR):
    os.makedirs(PARTITION_DIR)

PLOTS_DIR = f"{DATA_DIR}/plots/"
if not os.path.exists(PLOTS_DIR):
    os.makedirs(PLOTS_DIR)

In [11]:
# 1. –ó–∞–±–∏—Ä–∞–µ–º –Ω–∞—Å—Ç—Ä–æ–π–∫–∏
EDGES_CONF = get_edges_config()
SPARS_CONF = get_sparsing_config()
ALGO_CONF = get_algo_config()

# 2. –§–æ—Ä–º–∏—Ä—É–µ–º –∏–º—è
EXP_NAME = make_experiment_name(EDGES_CONF, SPARS_CONF, ALGO_CONF)
CURRENT_EXP_GRAPH_DIR = os.path.join(GRAPH_DIR, EXP_NAME)
CURRENT_EXP_REPORT_DIR = os.path.join(REPORT_DIR, EXP_NAME)
CURRENT_EXP_PARTITION_DIR = os.path.join(PARTITION_DIR, EXP_NAME)
CURRENT_EXP_PLOTS_DIR = os.path.join(PLOTS_DIR, EXP_NAME)

if not os.path.exists(CURRENT_EXP_GRAPH_DIR):
    os.makedirs(CURRENT_EXP_GRAPH_DIR)
if not os.path.exists(CURRENT_EXP_REPORT_DIR):
    os.makedirs(CURRENT_EXP_REPORT_DIR)
if not os.path.exists(CURRENT_EXP_PARTITION_DIR):
    os.makedirs(CURRENT_EXP_PARTITION_DIR)
if not os.path.exists(CURRENT_EXP_PLOTS_DIR):
    os.makedirs(CURRENT_EXP_PLOTS_DIR)

print(f"EDGES CONFIG:           {EDGES_CONF}")
print(f"SPARSING CONFIG:        {SPARS_CONF}")
print(f"ALGORITHM CONFIG:       {ALGO_CONF}")
print(f"EXPERIMENT:             {EXP_NAME}")
print(f"OUTPUT GRAPH PATH:      {CURRENT_EXP_GRAPH_DIR}")
print(f"OUTPUT PARTITIONS PATH: {CURRENT_EXP_PARTITION_DIR}")
print(f"OUTPUT REPORT PATH:     {CURRENT_EXP_REPORT_DIR}")
print(f"OUTPUT PLOT PATH:       {CURRENT_EXP_PLOTS_DIR}")

EDGES CONFIG:           {'method': 'jaccard', 'threshold': 0.05}
SPARSING CONFIG:        {}
ALGORITHM CONFIG:       {'name': 'leiden_mod', 'kwargs': {'objective_function': 'modularity', 'resolution': 1.0, 'n_iterations': -1}}
EXPERIMENT:             Jac005_Full_Leiden_mod
OUTPUT GRAPH PATH:      data/graphs/Jac005_Full_Leiden_mod
OUTPUT PARTITIONS PATH: data/partitions/Jac005_Full_Leiden_mod
OUTPUT REPORT PATH:     data/reports/Jac005_Full_Leiden_mod
OUTPUT PLOT PATH:       data/plots/Jac005_Full_Leiden_mod


In [12]:
graph_builder = AnimeGraphBuilder(users_csv_path=USERS_CSV_PATH,
                                 user_dict_json_path=USER_DICT_PATH,
                                 anime_csv_path=ANIME_CSV_PATH)

[00:05:24] [DEBUG] [AGB] Anime Graph Builder initialzed for


In [13]:
partitions_by_year = {}
modularity_by_year = {}
for year in range(2006, 2012):
    log(f">>> Processing {year}...", tag="YEAR", level='INFO')

    # --- 1. FILENAME GENERATION ---
    # –ò–º—è –∑–∞–≤–∏—Å–∏—Ç –¢–û–õ–¨–ö–û –æ—Ç –≥–æ–¥–∞ –∏ –Ω–∞—Å—Ç—Ä–æ–µ–∫ –ø–æ—Å—Ç—Ä–æ–µ–Ω–∏—è —Ä–µ–±–µ—Ä (Edges Config)
    # –°–ø–∞—Ä—Å–∏–Ω–≥ –∏ –ê–ª–≥–æ—Ä–∏—Ç–º—ã –Ω–µ –≤–ª–∏—è—é—Ç –Ω–∞ –∏–º—è –±–∞–∑–æ–≤–æ–≥–æ —Ñ–∞–π–ª–∞ –≥—Ä–∞—Ñ–∞
    e_method = EDGES_CONF['method']
    e_thresh = str(EDGES_CONF['threshold']).replace('.', '')
    
    # –ü—Ä–∏–º–µ—Ä: base_2013_jaccard_005.gpickle
    base_graph_filename = f"base_{year}_{e_method}_{e_thresh}.pickle"
    base_graph_path = os.path.join(CURRENT_EXP_GRAPH_DIR, base_graph_filename)
    
    G: ig.Graph = None

    # --- 2. LOADING OR BUILDING TARGET GRAPH ---
    if os.path.exists(base_graph_path):
        # --- Load a graph... ---
        log(f"Loading cached graph from {base_graph_filename}...", tag="CACHE", level='DEBUG')
        try:
            with open(base_graph_path, "rb") as f: G = pickle.load(f)
        except Exception as e:
            log(f"Error loading cache: {e}. Rebuilding...", tag="ERROR", level='ERROR')

    if G is None:
        log(f"Building graph from scratch for {year}...", tag="BUILD", level='DEBUG')
        # --- Build a graph... ---
        # ...using EDGES_CONF
        t = perf_counter()
        edges, counts = graph_builder.build_edges(year=year, **EDGES_CONF)   # –ê–≤—Ç–æ–º–∞—Ç–∏—á–µ—Å–∫–∏ –ø–æ–¥—Å—Ç–∞–≤–∏—Ç method="jaccard", threshold=0.05
        t_e = perf_counter() - t
        log(f"Edges calculation time: {perf_counter() - t:.2f}s", tag="TIME", level='DEBUG')
        t = perf_counter()
        G = graph_builder.build_graph(edges, counts, output_path=base_graph_path)  # –°–æ—Ö—Ä–∞–Ω—è–µ–º –ø–æ–ª–Ω—ã–π –≥—Ä–∞—Ñ –≤ –∫—ç—à
        t_g = perf_counter() - t
        del edges, counts; gc.collect()

    # --- 3. SPARSING ---  
    # --- 3. –§–ò–õ–¨–¢–†–ê–¶–ò–Ø (Sparsification) ---
    # –†–∞–±–æ—Ç–∞–µ–º —É–∂–µ —Å –æ–±—ä–µ–∫—Ç–æ–º G (–∑–∞–≥—Ä—É–∂–µ–Ω–Ω—ã–º –∏–ª–∏ —Ç–æ–ª—å–∫–æ —á—Ç–æ —Å–æ–∑–¥–∞–Ω–Ω—ã–º)
    s_type = SPARS_CONF.get('type') # –ë–µ–∑–æ–ø–∞—Å–Ω–æ–µ –ø–æ–ª—É—á–µ–Ω–∏–µ, –µ—Å–ª–∏ dict –ø—É—Å—Ç–æ–π -> None

    if s_type:
        spars_filename = f"sparse_{year}_{s_type}.pickle"
        spars_path = os.path.join(CURRENT_EXP_GRAPH_DIR, spars_filename)

        if os.path.exists(spars_path):
             log(f"Sparse graph loaded from cache: {spars_filename}", tag="CACHE", level='DEBUG')
             with open(spars_path, "rb") as f: G = pickle.load(f)
        else:
            # –í—ã–∑—ã–≤–∞–µ–º –Ω—É–∂–Ω—ã–π –º–µ—Ç–æ–¥
            spars_args = {k: v for k, v in SPARS_CONF.items() if k != 'type'}
            spars_args['output_path'] = spars_path # üëá –ü–æ–¥–∫–∏–¥—ã–≤–∞–µ–º –ø—É—Ç—å –¥–ª—è —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏—è


            if s_type == "knn":
                log(f"Sparsifying: KNN (k={SPARS_CONF.get('k')})", tag="SPARSE", level='DEBUG')
                # –ü–µ—Ä–µ–¥–∞–µ–º –ø–∞—Ä–∞–º–µ—Ç—Ä—ã, –∏—Å–∫–ª—é—á–∞—è 'type'
                G = graph_builder.sparsify_knn(G, **spars_args)
        
            elif s_type == "backbone":
                log(f"Sparsifying: Backbone (alpha={SPARS_CONF.get('alpha')})", tag="SPARSE", level='DEBUG')
                G = graph_builder.sparsify_backbone(G, **spars_args)
        
    else:
        log("No sparsification applied. Using full graph.", tag="SPARSE", level='DEBUG')

    # --- –ö–õ–ê–°–¢–ï–†–ò–ó–ê–¶–ò–Ø ---
    log(f"Clustering with {ALGO_CONF['name']}...", tag="ALGO", level='DEBUG')

    # –ü—Ä–∞–≤–∏–ª—å–Ω–∞—è –∫–æ–Ω–≤–µ—Ä—Ç–∞—Ü–∏—è NX -> iGraph
    # –°–æ—Ö—Ä–∞–Ω—è–µ—Ç –∞—Ç—Ä–∏–±—É—Ç 'weight' –∏ –∏–º–µ–Ω–∞ —É–∑–ª–æ–≤ (–≤ –∞—Ç—Ä–∏–±—É—Ç–µ '_nx_name')
    # h = ig.Graph.from_networkx(G)
    
    # –ü—Ä–æ–≤–µ—Ä—è–µ–º –≤–µ—Å–∞
    if "weight" in G.edge_attributes():
        weights = G.es["weight"]
    else:
        weights = None
        log("No weights found in graph, clustering will be unweighted.", tag="WARN", level='WARNING')

    algo_name = ALGO_CONF['name']
    algo_args = ALGO_CONF['kwargs']
    
    partition = None
    
    try:
        if algo_name in ['leiden_mod', 'leiden_cpm']:
            # Leiden –≤—ã–∑—ã–≤–∞–µ—Ç—Å—è –æ–¥–∏–Ω–∞–∫–æ–≤–æ, –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –≤–Ω—É—Ç—Ä–∏ kwargs —Ä–∞–∑–Ω—ã–µ
            partition = G.community_leiden(weights=weights, **algo_args)
            
        elif algo_name == 'eigenvector':
            partition = G.community_leading_eigenvector(weights=weights, **algo_args)
            
        elif algo_name == 'walktrap':
            wc = G.community_walktrap(weights=weights, **algo_args)
            partition = wc.as_clustering()
            
        elif algo_name == 'infomap':
            partition = G.community_infomap(edge_weights=weights, **algo_args)

        elif algo_name == 'label_propagation':
            partition = G.community_label_propagation(weights=weights) # kwargs –ø—É—Å—Ç—ã–µ –æ–±—ã—á–Ω–æ

        else:
            raise ValueError(f"Unknown algo: {algo_name}")

        # –ö–æ–Ω–≤–µ—Ä—Ç–∏—Ä—É–µ–º VertexClustering –≤ —Å–ª–æ–≤–∞—Ä—å {anime_real_id: cluster_id}
        # current_partition = {}
        
        # partition.membership –¥–∞–µ—Ç —Å–ø–∏—Å–æ–∫ id –∫–ª–∞—Å—Ç–µ—Ä–æ–≤ [0, 0, 1, 2, 0 ...] —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤—É—é—â–∏–π –∏–Ω–¥–µ–∫—Å–∞–º —É–∑–ª–æ–≤
        # G.vs['name'] —Ö—Ä–∞–Ω–∏—Ç —Ä–µ–∞–ª—å–Ω—ã–µ ID ('3452', '112'), –∫–æ—Ç–æ—Ä—ã–µ –º—ã –∑–∞–¥–∞–ª–∏ –≤ Builder
        
        # –û–ø—Ç–∏–º–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã–π —Å–ø–æ—Å–æ–± —Å–æ–∑–¥–∞–Ω–∏—è —Å–ª–æ–≤–∞—Ä—è (–±—ã—Å—Ç—Ä–µ–µ —Ü–∏–∫–ª–∞)
        real_names = G.vs['name']
        membership = partition.membership
        current_partition = dict(zip(real_names, membership))
        partitions_by_year[year] = current_partition

        try:
            mod_val = G.modularity(membership, weights=weights)
        except Exception:
            mod_val = 0.0 # –ù–∞–ø—Ä–∏–º–µ—Ä, –¥–ª—è CPM –º–æ–¥—É–ª—è—Ä–Ω–æ—Å—Ç—å –º–æ–∂–µ—Ç –±—ã—Ç—å –Ω–µ –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∞ –≤ —Å—Ç–∞—Ä—ã—Ö –≤–µ—Ä—Å–∏—è—Ö
            
        modularity_by_year[year] = mod_val
        
        log(f"Done. Modularity: {mod_val:.4f}, Clusters: {len(set(membership))}", tag="RESULT", level='INFO')

    except Exception as e:
        log(f"Clustering Error in {year}: {e}", tag="ERROR", level='ERROR')
        traceback.print_exc()

    del G, partition, weights
    gc.collect()

[00:05:24] [INFO] [YEAR] >>> Processing 2006...
[00:05:24] [DEBUG] [CACHE] Loading cached graph from base_2006_jaccard_005.pickle...
[00:05:24] [DEBUG] [SPARSE] No sparsification applied. Using full graph.
[00:05:24] [DEBUG] [ALGO] Clustering with leiden_mod...
[00:05:24] [INFO] [RESULT] Done. Modularity: 0.3573, Clusters: 9
[00:05:25] [INFO] [YEAR] >>> Processing 2007...
[00:05:25] [DEBUG] [CACHE] Loading cached graph from base_2007_jaccard_005.pickle...
[00:05:25] [DEBUG] [SPARSE] No sparsification applied. Using full graph.
[00:05:25] [DEBUG] [ALGO] Clustering with leiden_mod...
[00:05:25] [INFO] [RESULT] Done. Modularity: 0.3416, Clusters: 10
[00:05:25] [INFO] [YEAR] >>> Processing 2008...
[00:05:25] [DEBUG] [BUILD] Building graph from scratch for 2008...
[00:05:25] [INFO] [AGB] Building stats for 2008...
[00:05:25] [DEBUG] [AGB] Users joined until 2008: 15733


KeyboardInterrupt: 

In [None]:
base_partition_filename = f"partition_{EXP_NAME}.csv"
base_partition_path = os.path.join(CURRENT_EXP_PARTITION_DIR, base_partition_filename)

tracker = CommunityTracker(threshold=0.1)
aligned_partitions = tracker.track_communities(partitions_by_year)
CommunityTracker.save_aligned_history_to_csv(aligned_partitions, base_partition_path)

–ì–æ–¥ 2006: –æ–±—Ä–∞–±–æ—Ç–∞–Ω–æ. –í—Å–µ–≥–æ —É–Ω–∏–∫–∞–ª—å–Ω—ã—Ö –∫–ª–∞—Å—Ç–µ—Ä–æ–≤ –≤ —ç—Ç–æ–º –≥–æ–¥—É: 9
–ì–æ–¥ 2007: –æ–±—Ä–∞–±–æ—Ç–∞–Ω–æ. –í—Å–µ–≥–æ —É–Ω–∏–∫–∞–ª—å–Ω—ã—Ö –∫–ª–∞—Å—Ç–µ—Ä–æ–≤ –≤ —ç—Ç–æ–º –≥–æ–¥—É: 10
–ì–æ–¥ 2008: –æ–±—Ä–∞–±–æ—Ç–∞–Ω–æ. –í—Å–µ–≥–æ —É–Ω–∏–∫–∞–ª—å–Ω—ã—Ö –∫–ª–∞—Å—Ç–µ—Ä–æ–≤ –≤ —ç—Ç–æ–º –≥–æ–¥—É: 10
–ì–æ–¥ 2009: –æ–±—Ä–∞–±–æ—Ç–∞–Ω–æ. –í—Å–µ–≥–æ —É–Ω–∏–∫–∞–ª—å–Ω—ã—Ö –∫–ª–∞—Å—Ç–µ—Ä–æ–≤ –≤ —ç—Ç–æ–º –≥–æ–¥—É: 18
Saved partition detail to data/partitions/Jac005_Full_Leiden_mod/partition_Jac005_Full_Leiden_mod.csv
Saved partition stats to  data/partitions/Jac005_Full_Leiden_mod/partition_Jac005_Full_Leiden_mod_stats.csv


In [None]:
enricher = PartitionEnricher(metadata_path=ANIME_CSV_PATH, key_col="anime_id", set_cols=["genres", "studio"])
anime_meta_dict = enricher.get_metadata_dict()

partition_enriched = enricher.enrich_partition(f"{CURRENT_EXP_PARTITION_DIR}/{base_partition_filename}")

In [None]:
print(partition_enriched.head())

   year  anime_id  cluster_id  Unnamed: 0  \
0  2006       457           0         387   
1  2006       558           0         469   
2  2006       565           0         475   
3  2006       846           0         650   
4  2006       853           0         656   

                                           image_url  score  scored_by  \
0  https://myanimelist.cdn-dena.com/images/anime/...   8.74     147314   
1  https://myanimelist.cdn-dena.com/images/anime/...   8.36      26374   
2  https://myanimelist.cdn-dena.com/images/anime/...   7.42      28053   
3  https://myanimelist.cdn-dena.com/images/anime/...   8.11      65095   
4  https://myanimelist.cdn-dena.com/images/anime/...   8.34     335137   

     rank                               opening_theme  \
0    38.0       ['"The Sore Feet Song" by Ally Kerr']   
1   188.0  ['"Saraba Aoki Omakage" by Road of Major']   
2  1951.0                                          []   
3   426.0  ['"Sentimental Generation" by Ami Tokito']   

In [None]:
evaluator = ClusterEvaluation(EXP_NAME, aligned_partitions, anime_info=anime_meta_dict,modularity_dict=modularity_by_year)
evaluation_df = evaluator.get_trajectory_df()

base_evaluation_filename = f"evaluation_{EXP_NAME}.csv"
base_evaluation_path = os.path.join(CURRENT_EXP_REPORT_DIR, base_evaluation_filename)
evaluation_df.to_csv(base_evaluation_path, index=False, encoding='utf-8')
evaluation_df

Calculating trajectory for Jac005_Full_Leiden_mod...


Unnamed: 0_level_0,Method,Gini_Spatial,Entropy_Info,Stability_AMI,Purity_Source,Purity_Genre,Modularity,N_Clusters
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2006,Jac005_Full_Leiden_mod,0.361263,1.959904,,0.525956,0.57377,0.357527,9
2007,Jac005_Full_Leiden_mod,0.620631,1.582187,0.11778,0.460993,0.496999,0.341613,10
2008,Jac005_Full_Leiden_mod,0.505144,1.835225,0.473037,0.467277,0.553229,0.325033,10
2009,Jac005_Full_Leiden_mod,0.698979,1.950639,0.646798,0.471756,0.56374,0.298104,18


In [None]:
viz = ClusterVisualizer(partition_enriched)

# A. Sankey
viz.plot_sankey(
    filename=os.path.join(CURRENT_EXP_PLOTS_DIR, "sankey.html"),
    key_col="anime_id",
    name_col="title",
    feature_cols=["genres", "studio"],
    metric_col="score",
    sort_col="members",
    age_col="year_start",
    min_link_size=5,
    title="Anime Clusters Over Time"
    )

# B. Streamgraph
viz.plot_streamgraph(
    filename=os.path.join(CURRENT_EXP_PLOTS_DIR, "stream.html"),
    feature_col="genres", 
    title="Rise and Fall of Anime Genres"
)

# C. Bubbles (Optional)
viz.plot_bubbles(
    filename=os.path.join(CURRENT_EXP_PLOTS_DIR, "bubbles.html"),
    x_col="score",      # –ß–µ–º –ø—Ä–∞–≤–µ–µ, —Ç–µ–º –≤—ã—à–µ –æ—Ü–µ–Ω–∫–∞
    y_col="members",    # –ß–µ–º –≤—ã—à–µ, —Ç–µ–º –ø–æ–ø—É–ª—è—Ä–Ω–µ–µ
    size_col="count",   # –†–∞–∑–º–µ—Ä = –∫–æ–ª-–≤–æ —Ç–∞–π—Ç–ª–æ–≤
    title="Anime Landscape: Quality vs Popularity"
)

# D. –°–æ–ª–Ω—Ü–µ (–ò–µ—Ä–∞—Ä—Ö–∏—è)
viz.plot_sunburst(
    filename=os.path.join(CURRENT_EXP_PLOTS_DIR, "sunburst.html"),
    feature_col="genres", # –ì–æ–¥ -> –ö–ª–∞—Å—Ç–µ—Ä -> –°—Ç—É–¥–∏—è
    title="Anime Source Hierarchy"
)

Generating Sankey diagram (Anime Clusters Over Time)...
Plot saved to data/plots/Jac005_Full_Leiden_mod/sankey.html
Generating Streamgraph (Rise and Fall of Anime Genres)...
Saved: data/plots/Jac005_Full_Leiden_mod/stream.html
Generating Bubble Chart (Anime Landscape: Quality vs Popularity)...
Saved: data/plots/Jac005_Full_Leiden_mod/bubbles.html
Generating Sunburst (Anime Source Hierarchy)...
Saved: data/plots/Jac005_Full_Leiden_mod/sunburst.html
