In [1]:
from Calculators.GED_Calculator import exact_GED_Calculator, build_exact_ged_calculator
from Dataset import Dataset


In [2]:
from Experiment import experiment
import sys
import os
import traceback

from Models.SVC.GED.hybrid_prototype_selector import HybridPrototype_GED_SVC
sys.path.append(os.getcwd())
from Models.SVC.GED.RandomWalk_edit import Random_Walk_edit_accelerated, Random_walk_edit_SVC
from Models.SVC.WeisfeilerLehman_SVC import WeisfeilerLehman_SVC
from Models.Graph_Classifier import GraphClassifier
from Models.SVC.Baseline_SVC import VertexHistogram_SVC,EdgeHistogram_SVC, CombinedHistogram_SVC, NX_Histogram_SVC
from Models.Blind_Classifier import Blind_Classifier
from Models.Random_Classifer import Random_Classifier
from Models.KNN_Classifer import KNN
from Models.SVC.GED.Trivial_GED_SVC import Trivial_GED_SVC
from Custom_Kernels.GEDLIB_kernel import GEDKernel
from Calculators.Dummy_Calculator import Dummy_Calculator
from Calculators.Base_Calculator import Base_Calculator
from Calculators.GEDLIB_Caclulator import GEDLIB_Calculator
from Models.SVC.GED.GED_Diffu_SVC import DIFFUSION_GED_SVC
from Models.SVC.GED.Zero_GED_SVC import ZERO_GED_SVC
from Models.SVC.GED.simiple_prototype_GED_SVC import Simple_Prototype_GED_SVC
from Models.SVC.Base_GED_SVC import Base_GED_SVC, set_global_ged_calculator
from Models.KNN.GEDLIB_KNN import GED_KNN, set_global_ged_calculator_KNN
from Models.KNN.feature_KNN import Feature_KNN
import pandas as pd
from io_Manager import IO_Manager


In [3]:
def convert_Dataset_to_exact_GED_format(dataset:Dataset):
    dataset_name = dataset.get_name()
    filepath = f"Datasets/ged/{dataset_name}/"
    os.makedirs(os.path.dirname(filepath) or '.', exist_ok=True)

    for graph_index, G in enumerate(dataset.get_graphs()):
        graph_path = os.path.join(filepath, f"g_{graph_index}.txt")
        output_lines = []
        # 1. Create a mapping from NetworkX's original node IDs to new sequential 0-based IDs
        original_nodes = list(G.nodes())
        node_id_map = {original_id: new_id for new_id, original_id in enumerate(original_nodes)}
        
        num_nodes = G.number_of_nodes()
        
        # Determine the arbitrary strings for the 't' line
        t_val1 = G.graph.get("Name", f"#{graph_index}")
        t_val2 = num_nodes
        # 2. Generate the 't' line
        output_lines.append(f"t {t_val1} {t_val2}")
        
        # 3. Generate 'v' lines
        for original_id, new_id in node_id_map.items():
            # Safely retrieve the node label attribute
            label = G.nodes[original_id].get(dataset.Node_label_name, 0)
            output_lines.append(f"v {new_id} {label}")
            
        # 4. Generate 'e' lines
        for u_orig, v_orig, data in G.edges(data=True):
            # Get the new sequential IDs
            u_new = node_id_map[u_orig]
            v_new = node_id_map[v_orig]
            
            # Safely retrieve the edge label attribute
            label = data.get(dataset.Edge_label_name, 0)
            
            # The format expects 'e [vertex_id1] [vertex_id2] [edge_label]'
            output_lines.append(f"e {u_new} {v_new} {label}")
        # 5. Write to file
        try:
            # Ensure the directory exists
            with open(graph_path, 'w') as f:
                f.write('\n'.join(output_lines) + '\n') # Add final newline for file hygiene
        except IOError as e:
            print(f"\nError writing to file '{graph_path}': {e}")

In [4]:


DATASET_NAME="MUTAG"
PRELOAD_CALCULATORS=True
USE_NODE_LABELS="labels"
USE_EDGE_LABELS="labels"
GED_BOUND="IPFP"  # "UpperBound-Distance", "Mean-Distance", "LowerBound-Distance"
def get_classifier(ged_calculator):
    set_global_ged_calculator_KNN(ged_calculator)

    # return ZERO_GED_SVC(calculator_id=ged_calculator.get_identifier_name(), ged_bound=GED_BOUND, C=1.0,kernel_type="precomputed", selection_split="classwise",prototype_size=7, aggregation_method="sum",dataset_name=DATASET.name,selection_method="k-CPS")
    # return Random_walk_edit_SVC(ged_calculator=ged_calculator, ged_bound=GED_BOUND, decay_lambda=0.1, max_walk_length=-1, C=1.0,kernel_type="precomputed", class_weight='balanced')
    # random_walk_calculator = RandomWalkCalculator(ged_calculator=ged_calculator, llambda_samples=[0.005,0.01,0.03,0.05,0.1,0.2,0.45,0.89], dataset=DATASET,ged_method=GED_BOUND)
    # random_walk_calculator = build_Randomwalk_GED_calculator(ged_calculator=ged_calculator)
    # random_walk_calculator_id = random_walk_calculator.get_identifier_name()

    # return Random_Walk_edit_accelerated(calculator_id=ged_calculator.get_identifier_name(), ged_bound=GED_BOUND, decay_lambda=0.1, max_walk_length=-1, C=1.0,kernel_type="precomputed", class_weight='balanced',random_walk_calculator_id=random_walk_calculator_id)
    return Trivial_GED_SVC(calculator_id=ged_calculator.get_identifier_name(),ged_bound=GED_BOUND, C=1.0,kernel_type="precomputed", class_weight='balanced',similarity_function='k4')
    # return  Simple_Prototype_GED_SVC(calculator_id=ged_calculator.get_identifier_name(), ged_bound=GED_BOUND, C=1.0,kernel_type="poly", class_weight='balanced',prototype_size=8, selection_method="k-CPS", selection_split="all",dataset_name=DATASET_NAME)
    # return DIFFUSION_GED_SVC(C=1.0, llambda=1.0, calculator_id=ged_calculator.get_identifier_name(), ged_bound=GED_BOUND, diffusion_function="exp_diff_kernel", class_weight='balanced', t_iterations=5)
    # return CombinedHistogram_SVC(kernel_type='precomputed')
    # return Feature_KNN(vector_feature_list=["VertexHistogram","density","Prototype-Distance"], dataset_name=DATASET.name, prototype_size=5, selection_split="all", selection_method="TPS", metric="minkowski", calculator_id=ged_calculator.get_identifier_name(), ged_bound=GED_BOUND, n_neighbors=5, weights='uniform', algorithm='auto')
def get_Dataset(dataset_name: str, ged_calculator):
    DATASET= Dataset(name=dataset_name, source="TUD", domain="Bioinformatics", ged_calculator=None, use_node_labels=USE_NODE_LABELS, use_edge_labels=USE_EDGE_LABELS,load_now=False)
    DATASET.load()
    convert_Dataset_to_exact_GED_format(DATASET)
    ged_calculator = build_exact_ged_calculator(DATASET.get_graphs(),dataset_name=DATASET_NAME, n_jobs=8)
    return DATASET, ged_calculator
DATASET, ged_calculator = get_Dataset(DATASET_NAME, None)
set_global_ged_calculator(ged_calculator)

Loading MUTAG into NetworkX from Datasets/TUD/MUTAG...
Loaded 7442 edges.
Loaded 3371 node-to-graph mappings.
Loaded 188 graph labels.
Loaded node labels for 3371 nodes.
Loaded edge labels for 7442 edges.


Converting graphs to NetworkX format: 100%|██████████| 188/188 [00:00<00:00, 1890957.20it/s]

Converted 188 graphs to NetworkX format.
Now setting up the Calculator
Starting calculation of exact GED distance matrix with 8 parallel jobs...





Timeout expired when computing GED between graph 0 and graph 62.
Timeout expired when computing GED between graph 3 and graph 22.
Timeout expired when computing GED between graph 3 and graph 5.
Timeout expired when computing GED between graph 3 and graph 26.
Timeout expired when computing GED between graph 3 and graph 31.
Timeout expired when computing GED between graph 3 and graph 46.
Timeout expired when computing GED between graph 3 and graph 45.
Timeout expired when computing GED between graph 3 and graph 58.
Timeout expired when computing GED between graph 3 and graph 32.
Timeout expired when computing GED between graph 3 and graph 62.
Timeout expired when computing GED between graph 3 and graph 57.
Timeout expired when computing GED between graph 3 and graph 29.
Timeout expired when computing GED between graph 3 and graph 67.
Timeout expired when computing GED between graph 3 and graph 84.
Timeout expired when computing GED between graph 3 and graph 90.
Timeout expired when compu