# Testing Data Loading

In [23]:
# Imports
from sklearn.model_selection import train_test_split
import networkx as nx
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from grakel.kernels import WeisfeilerLehman
from grakel.graph import Graph # To convert NetworkX to GraKeL Graph objects
import traceback
import os


In [24]:
LOCAL_DATA_PATH = 'Datasets/TUD/MUTAG' # Make sure this points to your extracted MUTAG folder
DATASET_NAME = 'MUTAG'

In [25]:
def load_mutag_into_networkx(data_dir, dataset_name):
    print(f"Loading {dataset_name} into NetworkX from {data_dir}...")
    adj_file = os.path.join(data_dir, f"{dataset_name}_A.txt")
    graph_indicator_file = os.path.join(data_dir, f"{dataset_name}_graph_indicator.txt")
    node_labels_file = os.path.join(data_dir, f"{dataset_name}_node_labels.txt")
    graph_labels_file = os.path.join(data_dir, f"{dataset_name}_graph_labels.txt")

    with open(adj_file, 'r') as f:
        edges_raw = [list(map(int, line.strip().split(','))) for line in f]
    edges = [(u - 1, v - 1) for u, v in edges_raw] # 0-indexed

    with open(graph_indicator_file, 'r') as f:
        node_to_graph_map = np.array([int(line.strip()) for line in f]) - 1

    with open(node_labels_file, 'r') as f:
        node_labels_raw = [int(line.strip()) for line in f]
    node_labels = {i: label for i, label in enumerate(node_labels_raw)}

    with open(graph_labels_file, 'r') as f:
        graph_labels_raw = [int(line.strip()) for line in f]
    y = np.array([(1 if label == 1 else 0) for label in graph_labels_raw])

    num_graphs = np.max(node_to_graph_map) + 1
    nx_graphs = []

    for i in range(num_graphs):
        G = nx.Graph()
        nodes_in_graph_i = np.where(node_to_graph_map == i)[0]
        
        if len(nodes_in_graph_i) == 0: # Handle empty graphs
            nx_graphs.append(G)
            continue

        # Add nodes with their labels
        for node_idx_global in nodes_in_graph_i:
            G.add_node(node_idx_global, label=node_labels[node_idx_global])

        # Add edges within the current graph
        for u, v in edges:
            if u in nodes_in_graph_i and v in nodes_in_graph_i:
                G.add_edge(u, v)
        
        # Re-index nodes locally for simpler computations if needed, 
        # but NetworkX can often work with original node IDs
        # For simplicity, we keep original node IDs here.

        nx_graphs.append(G)
    return nx_graphs, y


In [26]:
# --- Feature Extraction Function ---
def extract_simple_graph_features(graph_list):
    features = []
    for G in graph_list:
        num_nodes = G.number_of_nodes()
        num_edges = G.number_of_edges()
        
        # Avoid division by zero if graph has no nodes or edges
        avg_degree = np.mean([d for n, d in G.degree()]) if num_nodes > 0 else 0
        
        # Add more features as desired
        # e.g., density = nx.density(G) if num_nodes > 1 else 0
        
        features.append([num_nodes, num_edges, avg_degree]) # This is our feature vector for one graph
    return np.array(features)


In [27]:
def convert_nx_to_grakel_graph(nx_graph):
    """
    Converts a NetworkX Graph object to a grakel.Graph object
    using the constructor signature:
    Graph(initialization_object, node_labels, edge_labels, ...)
    """
    # Extract edges as a list of tuples (required for initialization_object)
    edges_list = list(nx_graph.edges())
    
    # Extract node labels as a dictionary {node_id: label}
    # Ensure all nodes in the graph are included, even if they have no edges
    # Use .get() with a default value if 'label' attribute might be missing for some nodes
    node_labels_dict = {node: nx_graph.nodes[node].get('label') for node in nx_graph.nodes()}
    
    # Construct the GraKeL Graph object using the provided signature
    # Pass edges_list as the initialization_object
    # Pass node_labels_dict using the 'node_labels' keyword argument for clarity and safety.
    return Graph(initialization_object=edges_list, node_labels=node_labels_dict)
    

In [28]:
try:
        # Load data into NetworkX graphs using your existing function
        nx_G, y = load_mutag_into_networkx(LOCAL_DATA_PATH, DATASET_NAME)
        print(f"Loaded {len(nx_G)} NetworkX graphs.")

        # Split NetworkX graphs and labels
        G_train_nx, G_test_nx, y_train, y_test = train_test_split(nx_G, y, test_size=0.2, random_state=42)
        print(f"Split into {len(G_train_nx)} training and {len(G_test_nx)} test graphs.")

        # --- Convert NetworkX graphs to GraKeL Graph objects ---
        print("\nConverting NetworkX graphs to GraKeL Graph objects...")
        G_train_gk = [convert_nx_to_grakel_graph(g) for g in G_train_nx]
        G_test_gk = [convert_nx_to_grakel_graph(g) for g in G_test_nx]
        print("Conversion complete.")
        
        # --- Initialize WL-Kernel and Compute Kernel Matrices ---
        print("\nInitializing WeisfeilerLehman Kernel and computing kernel matrices...")
        wl_kernel = WeisfeilerLehman(n_iter=5, normalize=True) # You can tune n_iter and normalize

        # K_train will be a (n_train_samples, n_train_samples) matrix
        K_train = wl_kernel.fit_transform(G_train_gk)
        print(f"Shape of K_train: {K_train.shape}")

        # K_test will be a (n_test_samples, n_train_samples) matrix
        K_test = wl_kernel.transform(G_test_gk)
        print(f"Shape of K_test: {K_test.shape}")

        # --- Train and Test SVM Classifier ---
        print("\nTraining SVM Classifier with precomputed kernel...")
        svm_classifier = SVC(kernel="precomputed", C=1.0, random_state=42) # C is the regularization parameter
        svm_classifier.fit(K_train, y_train)

        print("Making predictions on the test set...")
        y_pred = svm_classifier.predict(K_test)

        # --- Evaluate Performance ---
        accuracy = accuracy_score(y_test, y_pred)
        print(f"\nAccuracy of SVM with WL-Kernel: {accuracy:.4f}")

except FileNotFoundError as e:
        print(f"\nFATAL ERROR: {e}")
        print("Please ensure MUTAG.zip is downloaded and extracted into the specified folder.")
        print(f"Expected folder: '{os.path.abspath(LOCAL_DATA_PATH)}'")
        print("Download link for MUTAG.zip: https://ls11-www.cs.tu-dortmund.de/people/morris/graphkerneldatasets/MUTAG.zip")
except Exception as e:
        print(f"\nAn unexpected error occurred during processing: {e}")
        traceback.print_exc() # Print full traceback for unexpected errors

Loading MUTAG into NetworkX from Datasets/TUD/MUTAG...
Loaded 188 NetworkX graphs.
Split into 150 training and 38 test graphs.

Converting NetworkX graphs to GraKeL Graph objects...
Conversion complete.

Initializing WeisfeilerLehman Kernel and computing kernel matrices...
Shape of K_train: (150, 150)
Shape of K_test: (38, 150)

Training SVM Classifier with precomputed kernel...
Making predictions on the test set...

Accuracy of SVM with WL-Kernel: 0.8158


ValueError: purpose is either "adjacency" of "dictionary"

NameError: name 'X_train' is not defined