In [1]:
import torch 
import random

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [2]:
import copy
import pickle
import os
from torch_geometric.data import InMemoryDataset
import numpy as np
import torch.nn as nn
import networkx as nx
from torch.nn import Linear, Sequential, BatchNorm1d, ReLU, Dropout
from torch_geometric.transforms import VirtualNode
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GINConv, global_add_pool, PNAConv
from torch_geometric.utils import from_networkx, to_networkx, to_undirected, to_scipy_sparse_matrix, degree
from torch_geometric.data.data import Data
from sklearn.model_selection import train_test_split

# Prepare Dataset

In [3]:
import numpy as np

file_path = "../dataset/BREC/raw/brec_nonGNN.npy"
data = np.load(file_path, allow_pickle=True)

In [4]:
part_dict = {
    "Basic": (0, 60),
    "Regular": (60, 160),
    "Extension": (160, 260),
    "CFI": (260, 360),
    "4-Vertex_Condition": (360, 380),
    "Distance_Regular": (380, 400),
}

In [12]:
# Dictionary to store extracted graphs for each part
graphs_parts = {}

# Loop through the part_dict and extract the corresponding graphs
for part_name, (start_idx, end_idx) in part_dict.items():
    graphs_parts[part_name] = []
    
    # Iterate through the graph pairs and add them to the corresponding part
    for graph_pair in data[start_idx:end_idx]:
        for graph in graph_pair:  # Extract individual graphs from the pair
            graphs_parts[part_name].append(graph)

In [13]:
G_Basics = graphs_parts["Basic"]
G_Regular = graphs_parts["Regular"]
G_Extension = graphs_parts["Extension"]
G_CFI = graphs_parts["CFI"]
G_4Vertex = graphs_parts["4-Vertex_Condition"]
G_Distance_Regular = graphs_parts["Distance_Regular"]

# Transformations

In [14]:
# VN
transform = VirtualNode()
def apply_vn(dgl_graphs):
  vn_EXP_dgl = []
  for graph in dgl_graphs:
    graph_pyg = from_dgl(graph)
    graph_pyg_copy = copy.deepcopy(graph_pyg)
    graph_vn = transform(graph_pyg_copy)
    graph_vn_dgl = to_dgl(graph_vn)
    vn_EXP_dgl.append(graph_vn_dgl)

  return vn_EXP_dgl

# Centrality
def add_centrality_to_node_features(dgl_graph, centrality_measure='degree'):
    # Convert DGL data to NetworkX graph
    G = dgl_graph.to_networkx()
    G = nx.Graph(G)

    # Compute the centrality measure
    if centrality_measure == 'degree':
        centrality = nx.degree_centrality(G)
    elif centrality_measure == 'closeness':
        centrality = nx.closeness_centrality(G)
    elif centrality_measure == 'betweenness':
        centrality = nx.betweenness_centrality(G)
    elif centrality_measure == 'eigenvector':
        if not nx.is_connected(G):
        # Handle connected components separately
            centrality = {}
            for component in nx.connected_components(G):
                subgraph = G.subgraph(component)
                sub_centrality = nx.eigenvector_centrality(subgraph, max_iter=500, tol=1e-4)
                centrality.update(sub_centrality)
        else:
            centrality = nx.eigenvector_centrality(G, max_iter=500, tol=1e-4)
    else:
        raise ValueError(f'Unknown centrality measure: {centrality_measure}')

    # Convert centrality to tensor and add as node feature
    centrality_values = np.array([centrality[node] for node in range(dgl_graph.number_of_nodes())], dtype=np.float32).reshape(-1, 1)
    centrality_values = torch.round(torch.tensor(centrality_values) * 10000) / 10000
    # Concatenate the centrality with existing node features
    if 'x' in dgl_graph.ndata:
        dgl_graph.ndata['x'] = torch.cat([dgl_graph.ndata['x'], centrality_values], dim=1)
    else:
        dgl_graph.ndata['x'] = centrality_values
    return dgl_graph

# Degree
def degree_dataset(dataset):
    # Compute centrality and add it as an additional feature
    Graph_data_degree = []
    for data in dataset:
        data_copy = copy.deepcopy(data)  # Create a deep copy of the graph
        data_copy = add_centrality_to_node_features(data_copy, centrality_measure='degree')
        Graph_data_degree.append(data_copy)
    return Graph_data_degree

# Closeness
def closeness_dataset(dataset):
    # Compute centrality and add it as an additional feature
    Graph_data_clo = []
    for data in dataset:
        data_copy = copy.deepcopy(data)
        data_copy = add_centrality_to_node_features(data_copy, centrality_measure='closeness')
        Graph_data_clo.append(data_copy)
    return Graph_data_clo

#Betweenness
def betweenness_dataset(dataset):
    # Compute centrality and add it as an additional feature
    Graph_data_bet = []
    for data in dataset:
        data_copy = copy.deepcopy(data)
        data_copy = add_centrality_to_node_features(data_copy, centrality_measure='betweenness')
        Graph_data_bet.append(data_copy)
    return Graph_data_bet

# Eigenvector
def eigenvector_dataset(dataset):
    # Compute centrality and add it as an additional feature
    Graph_data_eig = []
    for data in dataset:
        data_copy = copy.deepcopy(data)
        data_copy = add_centrality_to_node_features(data_copy, centrality_measure='eigenvector')
        Graph_data_eig.append(data_copy)
    return Graph_data_eig

# DE
def add_distance_encoding(dgl_graph):
    # Compute the shortest distance matrix using dgl.shortest_dist
    dist = dgl.shortest_dist(dgl_graph).float()  # Convert to float to handle inf

    # Replace -1 with inf (to handle unreachable nodes similar to NetworkX's behavior)
    dist[dist == -1] = float('inf')

    # Calculate the average shortest distance for each node
    finite_distances = torch.where(dist == float('inf'), torch.tensor(float('nan')), dist)
    average_distance = torch.nanmean(finite_distances, dim=1).view(-1, 1)  # Use nanmean to ignore infinities

    # Add the average distance to the existing node features in the DGL graph
    if 'x' in dgl_graph.ndata:
        dgl_graph.ndata['x'] = torch.cat([dgl_graph.ndata['x'], average_distance], dim=1)
    else:
        dgl_graph.ndata['x'] = average_distance

    return dgl_graph

def distance_encoding(dataset):
    Graph_data_DE = []
    for data in dataset:
        data_copy = copy.deepcopy(data)
        data_copy = add_distance_encoding(data_copy)
        Graph_data_DE.append(data_copy)
    return Graph_data_DE

# GE
from torch_geometric.transforms import AddLaplacianEigenvectorPE

def canonicalize_eigenvectors(eigenvectors):
    """Canonicalize eigenvectors by fixing their signs for consistency."""
    for i in range(eigenvectors.shape[1]):
        if eigenvectors[0, i] < 0:  # Flip sign if the first element is negative
            eigenvectors[:, i] = -eigenvectors[:, i]
    return eigenvectors

def add_canonicalized_laplacian_pe(dgl_graph, k=5):
    """
    Add canonicalized Laplacian positional encoding to a DGL graph.

    Args:
        dgl_graph: Input DGL graph.
        k: Number of Laplacian eigenvectors to compute.

    Returns:
        dgl_graph: DGL graph with Laplacian PE appended to node features.
    """
    # Step 1: Convert DGL graph to adjacency matrix
    G = dgl_graph.to_networkx()
    G = nx.Graph(G)
    adj = nx.to_numpy_array(G)

    # Step 2: Compute Laplacian matrix
    degree_matrix = np.diag(np.sum(adj, axis=1))
    laplacian = degree_matrix - adj

    # Step 3: Compute eigenvalues and eigenvectors
    eigenvalues, eigenvectors = np.linalg.eigh(laplacian)

    # Step 4: Select the smallest k eigenvectors (sorted by eigenvalues)
    idx = np.argsort(eigenvalues)[:k]
    eigenvectors = eigenvectors[:, idx]

    # Step 5: Canonicalize eigenvectors
    eigenvectors = canonicalize_eigenvectors(torch.tensor(eigenvectors, dtype=torch.float))

    # Step 6: Add the eigenvectors as new node features
    if 'x' in dgl_graph.ndata:
        dgl_graph.ndata['x'] = torch.cat([dgl_graph.ndata['x'], eigenvectors], dim=1)
    else:
        dgl_graph.ndata['x'] = eigenvectors

    return dgl_graph

def Graph_encoding(dataset, k=3):
    """
    Apply canonicalized Laplacian positional encoding to a list of DGL graphs.

    Args:
        dgl_graphs: List of DGL graphs.
        k: Number of Laplacian eigenvectors to compute.

    Returns:
        GE_EXP_dgl: List of DGL graphs with Laplacian PE added.
    """
    GE_EXP_dgl = []
    for data in dataset:
        data_copy = copy.deepcopy(data)
        graph_pe = add_canonicalized_laplacian_pe(data_copy, k=k)
        GE_EXP_dgl.append(graph_pe)
    return GE_EXP_dgl

# Sub
def extract_local_subgraph_features(dgl_graph, radius=2):
    # Convert PyG data to NetworkX graph
    G = dgl_graph.to_networkx()
    G = nx.Graph(G)

    # Initialize a list to store subgraph features for each node
    subgraph_sizes = []
    subgraph_degrees = []

    for node in G.nodes():
        # Extract the ego graph (subgraph) around the node
        subgraph = nx.ego_graph(G, node, radius=radius)

        # Example feature 1: Size of the subgraph (number of nodes)
        subgraph_size = subgraph.number_of_nodes()
        subgraph_sizes.append(subgraph_size)

        # Example feature 2: Average degree of the subgraph
        subgraph_degree = np.mean([d for n, d in subgraph.degree()])
        subgraph_degrees.append(subgraph_degree)

    # Convert the features to tensors and add them as node features
    subgraph_sizes_tensor = torch.tensor(subgraph_sizes, dtype=torch.float).view(-1, 1)
    subgraph_degrees_tensor = torch.tensor(subgraph_degrees, dtype=torch.float).view(-1, 1)

    if 'x' in dgl_graph.ndata:
        dgl_graph.ndata['x'] = torch.cat([dgl_graph.ndata['x'], subgraph_sizes_tensor, subgraph_degrees_tensor], dim=1)
    else:
        dgl_graph.ndata['x'] = torch.cat([subgraph_sizes_tensor, subgraph_degrees_tensor], dim=1)

    return dgl_graph

def subgraph_dataset(dataset, radius=3):
    # Compute centrality and add it as an additional feature
    Graph_data_sub = []
    for data in dataset:
        data_copy = copy.deepcopy(data)
        data_copy = extract_local_subgraph_features(data_copy, radius=radius)
        Graph_data_sub.append(data_copy)
    return Graph_data_sub

# ExN
def add_extra_node_on_each_edge(dgl_graph):
    # Collect new edges (source, destination) and the new node features
    new_edges_src = []
    new_edges_dst = []
    new_node_features = []

    # Original number of nodes
    num_original_nodes = dgl_graph.num_nodes()

    # Use a set to track edges we have already processed (to avoid duplicates)
    processed_edges = set()

    # Iterate over all edges
    for i in range(dgl_graph.num_edges()):
        u, v = dgl_graph.edges()[0][i].item(), dgl_graph.edges()[1][i].item()

        # Avoid processing reverse edges (v, u) if (u, v) is already processed
        if (u, v) in processed_edges or (v, u) in processed_edges:
            continue

        # Mark the edge as processed
        processed_edges.add((u, v))
        processed_edges.add((v, u))  # In case there is a reverse edge

        # Add a new node
        new_node_id = num_original_nodes + len(new_node_features)
        mean_feature = (dgl_graph.ndata['x'][u] + dgl_graph.ndata['x'][v]) / 2
        new_node_features.append(mean_feature)

        # Add new edges connecting the new node to the original nodes
        new_edges_src.append(u)
        new_edges_dst.append(new_node_id)

        new_edges_src.append(new_node_id)
        new_edges_dst.append(v)

    # Add new nodes to the DGL graph
    dgl_graph.add_nodes(len(new_node_features), {'x': torch.stack(new_node_features)})

    # Remove the original edges
    dgl_graph.remove_edges(torch.arange(dgl_graph.num_edges()))

    # Add new edges to the DGL graph
    dgl_graph.add_edges(new_edges_src, new_edges_dst)

    return dgl_graph

def extra_node_dataset(dataset):
    Graph_data_exN = []
    for data in dataset:
        data_copy = copy.deepcopy(data)
        dgl_graph = add_extra_node_on_each_edge(data_copy)
        Graph_data_exN.append(dgl_graph)
    return Graph_data_exN

def count_3_star(G):
    """Count 3-star graphlets for each node."""
    # A 3-star is a node with at least three neighbors
    star_counts = {}
    for node in G.nodes():
        neighbors = list(G.neighbors(node))
        degree = len(neighbors)
        # Count the number of 3-combinations of neighbors
        star_counts[node] = max(0, (degree * (degree - 1) * (degree - 2)) // 6)
    return star_counts

def count_tailed_triangle(G):
    """Count tailed triangle graphlets for each node."""
    tail_counts = {node: 0 for node in G.nodes()}
    for node in G.nodes():
        neighbors = list(G.neighbors(node))
        for neighbor in neighbors:
            # For each pair of neighbors, check if there's a triangle
            for other in neighbors:
                if neighbor != other and G.has_edge(neighbor, other):
                    # Found a triangle, check for a tail
                    for extra in G.neighbors(node):
                        if extra not in {neighbor, other}:
                            tail_counts[node] += 1
    return tail_counts

def count_4_cycle(G):
    """Count 4-cycle graphlets for each node."""
    cycle_counts = {node: 0 for node in G.nodes()}
    for node in G.nodes():
        neighbors = list(G.neighbors(node))
        for i, neighbor1 in enumerate(neighbors):
            for neighbor2 in neighbors[i + 1:]:
                # Check for shared neighbors forming a 4-cycle
                for shared_neighbor in G.neighbors(neighbor1):
                    if shared_neighbor in G.neighbors(neighbor2):
                        cycle_counts[node] += 1
    return cycle_counts

def graphlet_based_encoding(dgl_graph):
    """
    Add graphlet-based features (3-star, triangle, tailed triangle, 4-cycle) to node features.

    Args:
        dgl_graph: Input DGL graph.

    Returns:
        dgl_graph: DGL graph with graphlet-based features added.
    """
    # Convert DGL graph to NetworkX
    G = dgl_graph.to_networkx()
    G = nx.Graph(G)

    # Count graphlets
    triangle_counts = nx.triangles(G)  # Triangle counts
    star_counts = count_3_star(G)  # 3-star graphlets
    tail_counts = count_tailed_triangle(G)  # Tailed triangles
    cycle_counts = count_4_cycle(G)  # 4-cycles

    # Combine features into tensors
    num_nodes = dgl_graph.num_nodes()
    triangle_tensor = torch.tensor([triangle_counts[node] for node in range(num_nodes)], dtype=torch.float).view(-1, 1)
    star_tensor = torch.tensor([star_counts[node] for node in range(num_nodes)], dtype=torch.float).view(-1, 1)
    tail_tensor = torch.tensor([tail_counts[node] for node in range(num_nodes)], dtype=torch.float).view(-1, 1)
    cycle_tensor = torch.tensor([cycle_counts[node] for node in range(num_nodes)], dtype=torch.float).view(-1, 1)

    # Concatenate all graphlet features
    graphlet_features = torch.cat([triangle_tensor, star_tensor, tail_tensor, cycle_tensor], dim=1)

    # Add to node features
    if 'x' in dgl_graph.ndata:
        dgl_graph.ndata['x'] = torch.cat([dgl_graph.ndata['x'], graphlet_features], dim=1)
    else:
        dgl_graph.ndata['x'] = graphlet_features

    return dgl_graph

def graphlet_encoding_dataset(dgl_dataset):
    """
    Apply graphlet-based encoding to a list of DGL graphs.

    Args:
        dgl_dataset: List of DGL graphs.

    Returns:
        encoded_dataset: List of DGL graphs with graphlet-based features added.
    """
    encoded_dataset = []
    for dgl_graph in dgl_dataset:
        graph_copy = copy.deepcopy(dgl_graph)
        graph_encoded = graphlet_based_encoding(graph_copy)
        encoded_dataset.append(graph_encoded)
    return encoded_dataset

# Add Isomorphic Pairs

In [18]:
def add_isomorphic_pairs_dgl(dataset, num_pairs=5):
    isomorphic_pair = []
    original_indices = []

    for i in range(num_pairs):
        # Pick a random graph from the dataset
        original_graph = random.choice(dataset)
        original_idx = dataset.index(original_graph)

        # Convert to NetworkX and create isomorphic graphs
        G = dgl.to_networkx(original_graph)
        nodes = list(G.nodes())
        random.shuffle(nodes)  # Shuffle to get isomorphic graph
        mapping = {node: nodes[i] for i, node in enumerate(nodes)}
        isomorphic_G = nx.relabel_nodes(G, mapping)

        # Convert back to DGL
        isomorphic_dgl_graph = dgl.from_networkx(isomorphic_G)
        isomorphic_dgl_graph.ndata['x'] = original_graph.ndata['x']  # Copy node features

        isomorphic_pair.append(isomorphic_dgl_graph)
        original_indices.append(original_idx)

    return dataset, isomorphic_pair, original_indices

# Organize Graphs in Pairs

In [19]:
def organize_pairs(dummy_dataset, original_indices):
    original_size = int(len(dummy_dataset)*2/3)
    non_isomorphic_pairs = []
    isomorphic_pairs = []

    # Group original graphs into non-isomorphic pairs (assuming they're already paired)
    for i in range(0, original_size, 2):
      non_isomorphic_pairs.append((dummy_dataset[i], dummy_dataset[i+1]))


    for i in range(0, len(original_indices)):
      indice = original_indices[i]
      isomorphic_pairs.append((dummy_dataset[indice], dummy_dataset[i+original_size]))

    return non_isomorphic_pairs, isomorphic_pairs

# Distinguishing Test

In [46]:
def contrastive_loss2(embedding1, embedding2, label, margin=1.0):
    """Optimized contrastive loss: Pull embeddings together if label == 1, push them apart if label == 0."""
    euclidean_distance = F.pairwise_distance(embedding1, embedding2)
    euclidean_distance_squared = torch.pow(euclidean_distance, 2)
    
    # Ensure that label is a tensor, convert to float tensor
    if isinstance(label, int):  # Check if label is a scalar integer
        label = torch.tensor(label).float().to(embedding1.device)
    else:
        label = label.float()

    # Compute positive and negative losses
    loss_positive = euclidean_distance_squared  # For label == 1
    loss_negative = torch.pow(F.relu(margin - euclidean_distance), 2)  # For label == 0
    
    # Use torch.where with tensor inputs
    loss = torch.where(label == 1, loss_positive, loss_negative)
    return loss.mean()


In [47]:
def evaluate_model_with_pairs(non_isomorphic_pairs, isomorphic_pairs, model, input_dim):
    model.train()  # Set the model to training mode
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

    import dgl
    from torch.utils.data import DataLoader
    
    # Custom collate function to handle DGLGraphs
    def collate_fn(batch):
        # Unpack the batch of pairs
        g1_batch, g2_batch = zip(*batch)
    
        # Batch the graphs using DGL's batch function
        batched_g1 = dgl.batch(g1_batch)
        batched_g2 = dgl.batch(g2_batch)
        
        return batched_g1, batched_g2
    
    # Custom Dataset for graph pairs (as you already have)
    class GraphPairDataset(torch.utils.data.Dataset):
        def __init__(self, pairs):
            self.pairs = pairs
    
        def __len__(self):
            return len(self.pairs)
    
        def __getitem__(self, idx):
            return self.pairs[idx]
    
    # Assuming you already have non_isomorphic_pairs and isomorphic_pairs
    non_isomorphic_dataset = GraphPairDataset(non_isomorphic_pairs)
    isomorphic_dataset = GraphPairDataset(isomorphic_pairs)
    
    # Define DataLoader with custom collate_fn
    non_isomorphic_loader = DataLoader(non_isomorphic_dataset, batch_size=64, shuffle=True, collate_fn=collate_fn)
    isomorphic_loader = DataLoader(isomorphic_dataset, batch_size=64, shuffle=True, collate_fn=collate_fn)
    
    # Main training loop
    for epoch in range(100):
        total_loss = 0
    
        # Iterate over non-isomorphic pairs in batches
        for batched_g1, batched_g2 in non_isomorphic_loader:
            optimizer.zero_grad()
    
            batched_g1 = batched_g1.to(device)
            batched_g2 = batched_g2.to(device)
    
            embedding1 = model(batched_g1, batched_g1.ndata['x'])
            embedding2 = model(batched_g2, batched_g2.ndata['x'])
    
            # Label is 0 for non-isomorphic pairs
            loss = contrastive_loss2(embedding1, embedding2, label=torch.zeros(batched_g1.batch_size).to(device))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
    
        # Iterate over isomorphic pairs in batches
        for batched_g1, batched_g2 in isomorphic_loader:
            optimizer.zero_grad()
    
            batched_g1 = batched_g1.to(device)
            batched_g2 = batched_g2.to(device)
    
            embedding1 = model(batched_g1, batched_g1.ndata['x'])
            embedding2 = model(batched_g2, batched_g2.ndata['x'])
    
            # Label is 1 for isomorphic pairs
            loss = contrastive_loss2(embedding1, embedding2, label=torch.ones(batched_g1.batch_size).to(device))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
    
        print(f"Epoch {epoch+1}/{100}, Loss: {total_loss:.4f}")

    model.eval()  # Set the model to evaluation mode
    non_isomorphic_different_count = 0
    isomorphic_same_count = 0
    isomorphic_different_count = 0

    with torch.no_grad():  # Disable gradient calculation during evaluation
        # Compare embeddings of non-isomorphic pairs
        for g1, g2 in non_isomorphic_pairs:
            g1 = g1.to(device)
            g2 = g2.to(device)
            embedding1 = model(g1, g1.ndata['x']).to(device)
            embedding2 = model(g2, g2.ndata['x']).to(device)
            cosine_sim = F.cosine_similarity(embedding1, embedding2).item()
            if cosine_sim < 0.99:
                non_isomorphic_different_count += 1  # Correctly classified as different
            #final_embedding1 = calculate_integer_embedding(embedding1)
            #final_embedding2 = calculate_integer_embedding(embedding2)
            #if final_embedding1 != final_embedding2:
            #    non_isomorphic_different_count += 1
                

        # Compare embeddings of isomorphic pairs
        for g1, g2 in isomorphic_pairs:
            g1 = g1.to(device)
            g2 = g2.to(device)
            embedding1 = model(g1, g1.ndata['x']).to(device)
            embedding2 = model(g2, g2.ndata['x']).to(device)
            cosine_sim = F.cosine_similarity(embedding1, embedding2).item()
            if cosine_sim > 0.99:
                isomorphic_same_count += 1  # Correctly classified as the same
            else:
                isomorphic_different_count += 1  # Incorrectly classified as different

            #final_embedding1 = calculate_integer_embedding(embedding1)
            #final_embedding2 = calculate_integer_embedding(embedding2)

            #if final_embedding1 == final_embedding2:
            #   isomorphic_same_count += 1
            #else:
            #    isomorphic_different_count += 1

    print(f"Correctly classified non-isomorphic pairs: {non_isomorphic_different_count}")
    print(f"Correctly classified isomorphic pairs: {isomorphic_same_count}")
    print(f"Incorrectly classified isomorphic pairs: {isomorphic_different_count}")

    return non_isomorphic_different_count, isomorphic_same_count, isomorphic_different_count

# BREC-Basics

## Original Basics

In [128]:
import dgl
from torch_geometric.utils import to_dgl, from_dgl

G_Basics_dgl_graphs = []
for G in G_Basics:
    dgl_graph = dgl.from_networkx(G)
    dgl_graph.ndata['x'] = torch.ones((G.number_of_nodes(), 1), dtype=torch.float32)
    G_Basics_dgl_graphs.append(dgl_graph)


# Step 4: Save the list of DGL graphs using pickling
output_file = '../data/BREC/Basics/G_Basics_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(G_Basics_dgl_graphs, f)

print(f"Converted {len(G_Basics_dgl_graphs)} graphs to DGL format and saved to {output_file}.")

Converted 120 graphs to DGL format and saved to ../data/BREC/Basics/G_Basics_dataset.pkl.


## Basics dummy with isomorphic pairs

In [129]:
G_Basics_dgl_graphs, G_Basics_isomorphic_pair, G_Basics_original_indices = add_isomorphic_pairs_dgl(G_Basics_dgl_graphs, num_pairs=60)
G_Basics_dummy_dgl = G_Basics_dgl_graphs + G_Basics_isomorphic_pair

output_file = '../data/BREC/Basics/G_Basics_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(G_Basics_dummy_dgl, f)

print(f"Converted {len(G_Basics_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")

Converted 180 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Basics/G_Basics_dataset_dummy.pkl.


## VN on Basics original and Basics dummy

In [130]:
vn_G_Basics_dgl = apply_vn(G_Basics_dgl_graphs)
output_file = '../data/BREC/Basics/vn_G_Basics_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(vn_G_Basics_dgl, f)

print(f"Converted {len(vn_G_Basics_dgl)} graphs to DGL format and saved to {output_file}.")

vn_G_Basics_dummy_dgl = apply_vn(G_Basics_dummy_dgl)
output_file = '../data/BREC/Basics/vn_G_Basics_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(vn_G_Basics_dummy_dgl, f)

print(f"Converted {len(vn_G_Basics_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 120 graphs to DGL format and saved to ../data/BREC/Basics/vn_G_Basics_dataset.pkl.
Converted 180 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Basics/vn_G_Basics_dataset_dummy.pkl.


## Degree Centrality on Basics original and Basics dummy

In [131]:
deg_G_Basics_dgl = degree_dataset(G_Basics_dgl_graphs)
output_file = '../data/BREC/Basics/deg_G_Basics_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(deg_G_Basics_dgl, f)

print(f"Converted {len(deg_G_Basics_dgl)} graphs to DGL format and saved to {output_file}.")

deg_G_Basics_dummy_dgl = degree_dataset(G_Basics_dummy_dgl)
output_file = '../data/BREC/Basics/deg_G_Basics_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(deg_G_Basics_dummy_dgl, f)

print(f"Converted {len(deg_G_Basics_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 120 graphs to DGL format and saved to ../data/BREC/Basics/deg_G_Basics_dataset.pkl.
Converted 180 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Basics/deg_G_Basics_dataset_dummy.pkl.


## Closeness Centrality on Basics original and Basics dummy

In [132]:
clo_G_Basics_dgl = closeness_dataset(G_Basics_dgl_graphs)
output_file = '../data/BREC/Basics/clo_G_Basics_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(clo_G_Basics_dgl, f)

print(f"Converted {len(clo_G_Basics_dgl)} graphs to DGL format and saved to {output_file}.")

clo_G_Basics_dummy_dgl = closeness_dataset(G_Basics_dummy_dgl)
output_file = '../data/BREC/Basics/clo_G_Basics_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(clo_G_Basics_dummy_dgl, f)

print(f"Converted {len(clo_G_Basics_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 120 graphs to DGL format and saved to ../data/BREC/Basics/clo_G_Basics_dataset.pkl.
Converted 180 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Basics/clo_G_Basics_dataset_dummy.pkl.


## Betweenness Centrality on Basics original and Basics dummy

In [133]:
bet_G_Basics_dgl = betweenness_dataset(G_Basics_dgl_graphs)
output_file = '../data/BREC/Basics/bet_G_Basics_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(bet_G_Basics_dgl, f)

print(f"Converted {len(bet_G_Basics_dgl)} graphs to DGL format and saved to {output_file}.")

bet_G_Basics_dummy_dgl = betweenness_dataset(G_Basics_dummy_dgl)
output_file = '../data/BREC/Basics/bet_G_Basics_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(bet_G_Basics_dummy_dgl, f)

print(f"Converted {len(bet_G_Basics_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 120 graphs to DGL format and saved to ../data/BREC/Basics/bet_G_Basics_dataset.pkl.
Converted 180 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Basics/bet_G_Basics_dataset_dummy.pkl.


## Eigenvector Centrality on Basics original and Basics dummy

In [134]:
eig_G_Basics_dgl = eigenvector_dataset(G_Basics_dgl_graphs)
output_file = '../data/BREC/Basics/eig_G_Basics_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(eig_G_Basics_dgl, f)

print(f"Converted {len(eig_G_Basics_dgl)} graphs to DGL format and saved to {output_file}.")

eig_G_Basics_dummy_dgl = eigenvector_dataset(G_Basics_dummy_dgl)
output_file = '../data/BREC/Basics/eig_G_Basics_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(eig_G_Basics_dummy_dgl, f)

print(f"Converted {len(eig_G_Basics_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 120 graphs to DGL format and saved to ../data/BREC/Basics/eig_G_Basics_dataset.pkl.
Converted 180 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Basics/eig_G_Basics_dataset_dummy.pkl.


## Distance Encoding on Basics original and Basics dummy

In [135]:
DE_G_Basics_dgl = distance_encoding(G_Basics_dgl_graphs)
output_file = '../data/BREC/Basics/DE_G_Basics_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(DE_G_Basics_dgl, f)

print(f"Converted {len(DE_G_Basics_dgl)} graphs to DGL format and saved to {output_file}.")

DE_G_Basics_dummy_dgl = distance_encoding(G_Basics_dummy_dgl)
output_file = '../data/BREC/Basics/DE_G_Basics_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(DE_G_Basics_dummy_dgl, f)

print(f"Converted {len(DE_G_Basics_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 120 graphs to DGL format and saved to ../data/BREC/Basics/DE_G_Basics_dataset.pkl.
Converted 180 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Basics/DE_G_Basics_dataset_dummy.pkl.


## Graph Encoding on Basics original and Basics dummy

In [136]:
GE_G_Basics_dgl = Graph_encoding(G_Basics_dgl_graphs, k=3)
output_file = '../data/BREC/Basics/GE_G_Basics_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(GE_G_Basics_dgl, f)

print(f"Converted {len(GE_G_Basics_dgl)} graphs to DGL format and saved to {output_file}.")

GE_G_Basics_dummy_dgl = Graph_encoding(G_Basics_dummy_dgl, k=3)
output_file = '../data/BREC/Basics/GE_G_Basics_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(GE_G_Basics_dummy_dgl, f)

print(f"Converted {len(GE_G_Basics_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 120 graphs to DGL format and saved to ../data/BREC/Basics/GE_G_Basics_dataset.pkl.
Converted 180 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Basics/GE_G_Basics_dataset_dummy.pkl.


## Subgraph Extraction on Basics original and Basics dummy

In [137]:
SE_G_Basics_dgl = subgraph_dataset(G_Basics_dgl_graphs, radius=3)
output_file = '../data/BREC/Basics/SE_G_Basics_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(SE_G_Basics_dgl, f)

print(f"Converted {len(SE_G_Basics_dgl)} graphs to DGL format and saved to {output_file}.")

SE_G_Basics_dummy_dgl = subgraph_dataset(G_Basics_dummy_dgl, radius=3)
output_file = '../data/BREC/Basics/SE_G_Basics_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(SE_G_Basics_dummy_dgl, f)

print(f"Converted {len(SE_G_Basics_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 120 graphs to DGL format and saved to ../data/BREC/Basics/SE_G_Basics_dataset.pkl.
Converted 180 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Basics/SE_G_Basics_dataset_dummy.pkl.


## Extra Node on Basics original and Basics dummy

In [138]:
exN_G_Basics_dgl = extra_node_dataset(G_Basics_dgl_graphs)
output_file = '../data/BREC/Basics/exN_G_Basics_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(exN_G_Basics_dgl, f)

print(f"Converted {len(exN_G_Basics_dgl)} graphs to DGL format and saved to {output_file}.")

exN_G_Basics_dummy_dgl = extra_node_dataset(G_Basics_dummy_dgl)
output_file = '../data/BREC/Basics/exN_G_Basics_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(exN_G_Basics_dummy_dgl, f)

print(f"Converted {len(exN_G_Basics_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 120 graphs to DGL format and saved to ../data/BREC/Basics/exN_G_Basics_dataset.pkl.
Converted 180 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Basics/exN_G_Basics_dataset_dummy.pkl.


## Graphlet-Based Encoding on Basics original and Basics dummy

In [139]:
gle_G_Basics_dgl = graphlet_encoding_dataset(G_Basics_dgl_graphs)
output_file = '../data/BREC/Basics/gle_G_Basics_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(gle_G_Basics_dgl, f)

print(f"Converted {len(gle_G_Basics_dgl)} graphs to DGL format and saved to {output_file}.")

gle_G_Basics_dummy_dgl = graphlet_encoding_dataset(G_Basics_dummy_dgl)
output_file = '../data/BREC/Basics/gle_G_Basics_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(gle_G_Basics_dummy_dgl, f)

print(f"Converted {len(gle_G_Basics_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 120 graphs to DGL format and saved to ../data/BREC/Basics/gle_G_Basics_dataset.pkl.
Converted 180 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Basics/gle_G_Basics_dataset_dummy.pkl.


## Equivalence Class

In [140]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import GINConv, SumPooling, PNAConv
import networkx as nx
import random
import pickle
import numpy as np
import glob

# Define a GIN model
class GIN(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers):
        super(GIN, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()

        # Input layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(in_feats, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(GINConv(
                nn.Sequential(
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim)
                ), 'sum'))
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Output layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(hidden_dim, out_dim),
                nn.ReLU(),
                nn.Linear(out_dim, out_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(out_dim))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer, batch_norm in zip(self.layers, self.batch_norms):
            h = layer(g, h)
            h = batch_norm(h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding

# Define a PNA model
class PNA(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers, aggregators, scalers, deg):
        super(PNA, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()

        # Input layer
        self.layers.append(PNAConv(in_feats, hidden_dim, aggregators, scalers, deg))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(PNAConv(hidden_dim, hidden_dim, aggregators, scalers, deg))

        # Output layer
        self.layers.append(PNAConv(hidden_dim, out_dim, aggregators, scalers, deg))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer in self.layers:
            h = layer(g, h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding



# Calculate the final integer embedding
def calculate_integer_embedding(embedding):
    sum_x = embedding.sum().item()

    # Handle variance calculation only if there's more than one element
    if embedding.numel() > 1:
        var_x = embedding.var().item()
    else:
        var_x = 0.0  # Set variance to 0 if there's only one element

    min_x = embedding.min().item()

    # Handle NaN variance by setting it to 0
    if np.isnan(var_x):
        var_x = 0.0

    final_embedding = int((sum_x * 100 + var_x * 10 + min_x * 10) * 10)
    return final_embedding


# Save graphs to a file
def save_graphs_to_file(graphs, filepath):
    with open(filepath, 'wb') as f:
        pickle.dump(graphs, f)

# Load graphs from a file
def load_graphs_from_file(filepath):
    with open(filepath, 'rb') as f:
        graphs = pickle.load(f)
    return graphs

# Compute equivalence classes
def compute_equivalence_classes(filepath, model, input_dim):
    graphs = load_graphs_from_file(filepath)

    # Train the model for a single epoch with a random target
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    model.train()
    target_seed = 100
    for g in graphs:
        # Ensure that 'h' exists
        if 'x' not in g.ndata:
            g.ndata['x'] = torch.ones(g.number_of_nodes(), input_dim)  # Initialize with ones if 'h' is missing
        optimizer.zero_grad()

        output = model(g, g.ndata['x'])  # The output is a tensor of shape (1, out_dim)
        target = torch.randint(0, 2, output.shape)  # Target is a random tensor of the same shape as output

        loss = F.binary_cross_entropy_with_logits(output, target.float())  # Ensure the target is a float tensor
        loss.backward()
        optimizer.step()
        
    embeddings = set()

    with torch.no_grad():
        model.eval()
        for g in graphs:
            if 'x' not in g.ndata:
                g.ndata['x'] = torch.ones(g.number_of_nodes(), input_dim)  # Initialize with ones if 'h' is missing
            embedding = model(g, g.ndata['x'])
            final_embedding = calculate_integer_embedding(embedding)
            embeddings.add(final_embedding)

    print("embeddings: ", embeddings)
    return len(embeddings)

# Process all .pkl files in the current directory
for filepath in glob.glob("../data/BREC/Basics/*G_Basics_dataset*.pkl"):
    print(f"------------- Processing {filepath}...")

    # Load graphs to determine input_dim
    graphs = load_graphs_from_file(filepath)
    if 'x' in graphs[0].ndata:
        input_dim = graphs[0].ndata['x'].size(1)  # Determine the input dimension dynamically
    else:
        # If 'h' does not exist, assume a default input dimension
        input_dim = 1

    # Initialize the models
    gin_model = GIN(input_dim, hidden_dim=32, out_dim=8, num_layers=3)
    pna_model = PNA(input_dim, hidden_dim=32, out_dim=8, num_layers=3,
                    aggregators=['mean', 'max', 'sum', 'min', 'std'],
                    scalers=['identity', 'amplification', 'attenuation'],
                    deg=torch.tensor([1.0]))  # Example degree tensor
    

    # Compute the number of unique embeddings using GIN
    print("Computing equivalence classes using GIN model...")
    num_unique_embeddings_gin = compute_equivalence_classes(filepath, gin_model, input_dim)
    print(f"Number of unique embeddings with GIN: {num_unique_embeddings_gin}\n")

    # Compute the number of unique embeddings using PNA
    print("Computing equivalence classes using PNA model...")
    num_unique_embeddings_pna = compute_equivalence_classes(filepath, pna_model, input_dim)
    print(f"Number of unique embeddings with PNA: {num_unique_embeddings_pna}\n")



------------- Processing ../data/BREC/Basics/deg_G_Basics_dataset_dummy.pkl...
Computing equivalence classes using GIN model...
embeddings:  {98049, 519041, 84611, 41092, 74500, 1290503, 40969, 222218, 79114, 1522316, 1194510, 49041, 1845651, 1375637, 1375638, 28695, 92057, 65945, 371229, 814751, 23328, 83748, 119848, 38188, 57132, 4014255, 13384501, 61365, 84535, 105655, 18305981, 27582, 18305984, 1100995, 1078985, 1474635, 14176843, 14176844, 87758, 58702, 41805, 90705, 3388620, 131545, 185818, 35018459, 59228, 119386, 72670, 78684, 3259616, 999143, 97512, 1435625, 4358124, 94190, 2251631, 73070, 63348, 96116, 58106, 124028, 103423}
Number of unique embeddings with GIN: 63

Computing equivalence classes using PNA model...
embeddings:  {10881, 12931, 12676, 14274, 10635, 7950, 12558, 10256, 34577, 12562, 43668, 15637, 6805, 5781, 6932, 17686, 6170, 17687, 19989, 9758, 6948, 11942, 12584, 13355, 14636, 15921, 15285, 13752, 12221, 6589, 14399, 7102, 12993, 14018, 9922, 12867, 7362, 1670

## Distinguishing Test

In [141]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import GINConv, SumPooling, PNAConv
import networkx as nx
import random
import pickle
import numpy as np
import glob

# Define a GIN model
class GIN(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers):
        super(GIN, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()

        # Input layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(in_feats, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(GINConv(
                nn.Sequential(
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim)
                ), 'sum'))
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Output layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(hidden_dim, out_dim),
                nn.ReLU(),
                nn.Linear(out_dim, out_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(out_dim))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer, batch_norm in zip(self.layers, self.batch_norms):
            h = layer(g, h)
            h = batch_norm(h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding

# Define a PNA model
class PNA(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers, aggregators, scalers, deg):
        super(PNA, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()

        # Input layer
        self.layers.append(PNAConv(in_feats, hidden_dim, aggregators, scalers, deg))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(PNAConv(hidden_dim, hidden_dim, aggregators, scalers, deg))

        # Output layer
        self.layers.append(PNAConv(hidden_dim, out_dim, aggregators, scalers, deg))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer in self.layers:
            h = layer(g, h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding



for filepath in glob.glob("../data/BREC/Basics/*G_Basics_dataset_dummy.pkl"):
    print(f"------------- Processing {filepath}...")

    # Load graphs to determine input_dim
    graphs = load_graphs_from_file(filepath)

    non_isomorphic_pairs, isomorphic_pairs = organize_pairs(graphs, G_Basics_original_indices)

    if 'x' in graphs[0].ndata:
        input_dim = graphs[0].ndata['x'].size(1)  # Determine the input dimension dynamically
    else:
        # If 'x' does not exist, assume a default input dimension
        input_dim = 1


    # Initialize the models
    gin_model = GIN(input_dim, hidden_dim=16, out_dim=8, num_layers=4).to(device)
    pna_model = PNA(input_dim, hidden_dim=16, out_dim=8, num_layers=4,
                    aggregators=['mean', 'max', 'sum', 'min', 'std'],
                    scalers=['identity', 'amplification', 'attenuation'],
                    deg=torch.tensor([1.0]).to(device)).to(device)  # Example degree tensor

    # Compute the number of unique embeddings using PNA
    print("Using PNA model...")
    non_isomorphic_different_count, isomorphic_same_count, isomorphic_different_count = evaluate_model_with_pairs(non_isomorphic_pairs, isomorphic_pairs, pna_model, input_dim)

    # Compute the number of unique embeddings using GIN
    print("Using GIN model...")
    non_isomorphic_different_count, isomorphic_same_count, isomorphic_different_count = evaluate_model_with_pairs(non_isomorphic_pairs, isomorphic_pairs, gin_model, input_dim)



------------- Processing ../data/BREC/Basics/deg_G_Basics_dataset_dummy.pkl...
Using PNA model...
Epoch 1/100, Loss: 1.0000
Epoch 2/100, Loss: 1.0000
Epoch 3/100, Loss: 0.9999
Epoch 4/100, Loss: 0.9999
Epoch 5/100, Loss: 0.9999
Epoch 6/100, Loss: 1.0000
Epoch 7/100, Loss: 0.9999
Epoch 8/100, Loss: 0.9999
Epoch 9/100, Loss: 0.9999
Epoch 10/100, Loss: 0.9999
Epoch 11/100, Loss: 0.9999
Epoch 12/100, Loss: 0.9999
Epoch 13/100, Loss: 0.9999
Epoch 14/100, Loss: 0.9999
Epoch 15/100, Loss: 0.9999
Epoch 16/100, Loss: 0.9999
Epoch 17/100, Loss: 0.9999
Epoch 18/100, Loss: 0.9999
Epoch 19/100, Loss: 0.9999
Epoch 20/100, Loss: 0.9999
Epoch 21/100, Loss: 0.9999
Epoch 22/100, Loss: 0.9999
Epoch 23/100, Loss: 0.9999
Epoch 24/100, Loss: 0.9999
Epoch 25/100, Loss: 0.9999
Epoch 26/100, Loss: 0.9999
Epoch 27/100, Loss: 0.9999
Epoch 28/100, Loss: 0.9999
Epoch 29/100, Loss: 0.9998
Epoch 30/100, Loss: 0.9999
Epoch 31/100, Loss: 0.9998
Epoch 32/100, Loss: 0.9999
Epoch 33/100, Loss: 0.9998
Epoch 34/100, Loss: 

# BREC-Regular

## Original Regular

In [142]:
import dgl
from torch_geometric.utils import to_dgl, from_dgl

G_Regular_dgl_graphs = []
for G in G_Regular:
    dgl_graph = dgl.from_networkx(G)
    dgl_graph.ndata['x'] = torch.ones((G.number_of_nodes(), 1), dtype=torch.float32)
    G_Regular_dgl_graphs.append(dgl_graph)


# Step 4: Save the list of DGL graphs using pickling
output_file = '../data/BREC/Regular/G_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(G_Regular_dgl_graphs, f)

print(f"Converted {len(G_Regular_dgl_graphs)} graphs to DGL format and saved to {output_file}.")

Converted 200 graphs to DGL format and saved to ../data/BREC/Regular/G_Regular_dataset.pkl.


## Regular dummy with isomorphic pairs

In [143]:
G_Regular_dgl_graphs, G_Regular_isomorphic_pair, G_Regular_original_indices = add_isomorphic_pairs_dgl(G_Regular_dgl_graphs, num_pairs=100)
G_Regular_dummy_dgl = G_Regular_dgl_graphs + G_Regular_isomorphic_pair

output_file = '../data/BREC/Regular/G_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(G_Regular_dummy_dgl, f)

print(f"Converted {len(G_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")

Converted 300 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Regular/G_Regular_dataset_dummy.pkl.


## VN on Regular original and Regular dummy

In [144]:
vn_G_Regular_dgl = apply_vn(G_Regular_dgl_graphs)
output_file = '../data/BREC/Regular/vn_G_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(vn_G_Regular_dgl, f)

print(f"Converted {len(vn_G_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

vn_G_Regular_dummy_dgl = apply_vn(G_Regular_dummy_dgl)
output_file = '../data/BREC/Regular/vn_G_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(vn_G_Regular_dummy_dgl, f)

print(f"Converted {len(vn_G_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 200 graphs to DGL format and saved to ../data/BREC/Regular/vn_G_Regular_dataset.pkl.
Converted 300 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Regular/vn_G_Regular_dataset_dummy.pkl.


## Degree Centrality on Regular original and Regular dummy

In [145]:
deg_G_Regular_dgl = degree_dataset(G_Regular_dgl_graphs)
output_file = '../data/BREC/Regular/deg_G_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(deg_G_Regular_dgl, f)

print(f"Converted {len(deg_G_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

deg_G_Regular_dummy_dgl = degree_dataset(G_Regular_dummy_dgl)
output_file = '../data/BREC/Regular/deg_G_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(deg_G_Regular_dummy_dgl, f)

print(f"Converted {len(deg_G_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 200 graphs to DGL format and saved to ../data/BREC/Regular/deg_G_Regular_dataset.pkl.
Converted 300 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Regular/deg_G_Regular_dataset_dummy.pkl.


## Closeness Centrality on Regular original and Regular dummy

In [158]:
clo_G_Regular_dgl = closeness_dataset(G_Regular_dgl_graphs)
output_file = '../data/BREC/Regular/clo_G_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(clo_G_Regular_dgl, f)

print(f"Converted {len(clo_G_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

clo_G_Regular_dummy_dgl = closeness_dataset(G_Regular_dummy_dgl)
output_file = '../data/BREC/Regular/clo_G_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(clo_G_Regular_dummy_dgl, f)

print(f"Converted {len(clo_G_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 200 graphs to DGL format and saved to ../data/BREC/Regular/clo_G_Regular_dataset.pkl.
Converted 300 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Regular/clo_G_Regular_dataset_dummy.pkl.


## Betweenness Centrality on Regular original and Regular dummy

In [147]:
bet_G_Regular_dgl = betweenness_dataset(G_Regular_dgl_graphs)
output_file = '../data/BREC/Regular/bet_G_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(bet_G_Regular_dgl, f)

print(f"Converted {len(bet_G_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

bet_G_Regular_dummy_dgl = betweenness_dataset(G_Regular_dummy_dgl)
output_file = '../data/BREC/Regular/bet_G_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(bet_G_Regular_dummy_dgl, f)

print(f"Converted {len(bet_G_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 200 graphs to DGL format and saved to ../data/BREC/Regular/bet_G_Regular_dataset.pkl.
Converted 300 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Regular/bet_G_Regular_dataset_dummy.pkl.


## Eigenvector Centrality on Regular original and Regular dummy

In [148]:
eig_G_Regular_dgl = eigenvector_dataset(G_Regular_dgl_graphs)
output_file = '../data/BREC/Regular/eig_G_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(eig_G_Regular_dgl, f)

print(f"Converted {len(eig_G_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

eig_G_Regular_dummy_dgl = eigenvector_dataset(G_Regular_dummy_dgl)
output_file = '../data/BREC/Regular/eig_G_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(eig_G_Regular_dummy_dgl, f)

print(f"Converted {len(eig_G_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 200 graphs to DGL format and saved to ../data/BREC/Regular/eig_G_Regular_dataset.pkl.
Converted 300 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Regular/eig_G_Regular_dataset_dummy.pkl.


## Distance Encoding on Regular original and Regular dummy

In [149]:
DE_G_Regular_dgl = distance_encoding(G_Regular_dgl_graphs)
output_file = '../data/BREC/Regular/DE_G_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(DE_G_Regular_dgl, f)

print(f"Converted {len(DE_G_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

DE_G_Regular_dummy_dgl = distance_encoding(G_Regular_dummy_dgl)
output_file = '../data/BREC/Regular/DE_G_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(DE_G_Regular_dummy_dgl, f)

print(f"Converted {len(DE_G_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 200 graphs to DGL format and saved to ../data/BREC/Regular/DE_G_Regular_dataset.pkl.
Converted 300 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Regular/DE_G_Regular_dataset_dummy.pkl.


## Graph Encoding on Regular original and Regular dummy

In [150]:
GE_G_Regular_dgl = Graph_encoding(G_Regular_dgl_graphs, k=3)
output_file = '../data/BREC/Regular/GE_G_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(GE_G_Regular_dgl, f)

print(f"Converted {len(GE_G_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

GE_G_Regular_dummy_dgl = Graph_encoding(G_Regular_dummy_dgl, k=3)
output_file = '../data/BREC/Regular/GE_G_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(GE_G_Regular_dummy_dgl, f)

print(f"Converted {len(GE_G_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 200 graphs to DGL format and saved to ../data/BREC/Regular/GE_G_Regular_dataset.pkl.
Converted 300 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Regular/GE_G_Regular_dataset_dummy.pkl.


## Subgraph Extraction on Regular original and Regular dummy

In [151]:
SE_G_Regular_dgl = subgraph_dataset(G_Regular_dgl_graphs, radius=3)
output_file = '../data/BREC/Regular/SE_G_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(SE_G_Regular_dgl, f)

print(f"Converted {len(SE_G_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

SE_G_Regular_dummy_dgl = subgraph_dataset(G_Regular_dummy_dgl, radius=3)
output_file = '../data/BREC/Regular/SE_G_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(SE_G_Regular_dummy_dgl, f)

print(f"Converted {len(SE_G_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 200 graphs to DGL format and saved to ../data/BREC/Regular/SE_G_Regular_dataset.pkl.
Converted 300 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Regular/SE_G_Regular_dataset_dummy.pkl.


## Extra Node on Regular original and Regular dummy

In [152]:
exN_G_Regular_dgl = extra_node_dataset(G_Regular_dgl_graphs)
output_file = '../data/BREC/Regular/exN_G_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(exN_G_Regular_dgl, f)

print(f"Converted {len(exN_G_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

exN_G_Regular_dummy_dgl = extra_node_dataset(G_Regular_dummy_dgl)
output_file = '../data/BREC/Regular/exN_G_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(exN_G_Regular_dummy_dgl, f)

print(f"Converted {len(exN_G_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 200 graphs to DGL format and saved to ../data/BREC/Regular/exN_G_Regular_dataset.pkl.
Converted 300 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Regular/exN_G_Regular_dataset_dummy.pkl.


## Graphlet-Based Encoding on Regular original and Regular dummy

In [153]:
gle_G_Regular_dgl = graphlet_encoding_dataset(G_Regular_dgl_graphs)
output_file = '../data/BREC/Regular/gle_G_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(gle_G_Regular_dgl, f)

print(f"Converted {len(gle_G_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

gle_G_Regular_dummy_dgl = graphlet_encoding_dataset(G_Regular_dummy_dgl)
output_file = '../data/BREC/Regular/gle_G_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(gle_G_Regular_dummy_dgl, f)

print(f"Converted {len(gle_G_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 200 graphs to DGL format and saved to ../data/BREC/Regular/gle_G_Regular_dataset.pkl.
Converted 300 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Regular/gle_G_Regular_dataset_dummy.pkl.


## Equivalence Class

In [175]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import GINConv, SumPooling, PNAConv
import networkx as nx
import random
import pickle
import numpy as np
import glob

# Define a GIN model
class GIN(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers):
        super(GIN, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()

        # Input layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(in_feats, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(GINConv(
                nn.Sequential(
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim)
                ), 'sum'))
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Output layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(hidden_dim, out_dim),
                nn.ReLU(),
                nn.Linear(out_dim, out_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(out_dim))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer, batch_norm in zip(self.layers, self.batch_norms):
            h = layer(g, h)
            h = batch_norm(h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding

# Define a PNA model
class PNA(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers, aggregators, scalers, deg):
        super(PNA, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()

        # Input layer
        self.layers.append(PNAConv(in_feats, hidden_dim, aggregators, scalers, deg))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(PNAConv(hidden_dim, hidden_dim, aggregators, scalers, deg))

        # Output layer
        self.layers.append(PNAConv(hidden_dim, out_dim, aggregators, scalers, deg))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer in self.layers:
            h = layer(g, h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding



# Calculate the final integer embedding
def calculate_integer_embedding(embedding):
    sum_x = embedding.sum().item()

    # Handle variance calculation only if there's more than one element
    if embedding.numel() > 1:
        var_x = embedding.var().item()
    else:
        var_x = 0.0  # Set variance to 0 if there's only one element

    min_x = embedding.min().item()

    # Handle NaN variance by setting it to 0
    if np.isnan(var_x):
        var_x = 0.0

    final_embedding = int((sum_x * 100 + var_x * 10 + min_x * 10) * 10)
    return final_embedding


# Save graphs to a file
def save_graphs_to_file(graphs, filepath):
    with open(filepath, 'wb') as f:
        pickle.dump(graphs, f)

# Load graphs from a file
def load_graphs_from_file(filepath):
    with open(filepath, 'rb') as f:
        graphs = pickle.load(f)
    return graphs

# Compute equivalence classes
def compute_equivalence_classes(filepath, model, input_dim):
    graphs = load_graphs_from_file(filepath)

    # Train the model for a single epoch with a random target
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    model.train()
    target_seed = 100
    for g in graphs:
        # Ensure that 'h' exists
        if 'x' not in g.ndata:
            g.ndata['x'] = torch.ones(g.number_of_nodes(), input_dim)  # Initialize with ones if 'h' is missing
        optimizer.zero_grad()

        output = model(g, g.ndata['x'])  # The output is a tensor of shape (1, out_dim)
        target = torch.randint(0, 2, output.shape)  # Target is a random tensor of the same shape as output

        loss = F.binary_cross_entropy_with_logits(output, target.float())  # Ensure the target is a float tensor
        loss.backward()
        optimizer.step()
        
    embeddings = set()

    with torch.no_grad():
        model.eval()
        for g in graphs:
            if 'x' not in g.ndata:
                g.ndata['x'] = torch.ones(g.number_of_nodes(), input_dim)  # Initialize with ones if 'h' is missing
            embedding = model(g, g.ndata['x'])
            final_embedding = calculate_integer_embedding(embedding)
            embeddings.add(final_embedding)

    print("embeddings: ", embeddings)
    return len(embeddings)

# Process all .pkl files in the current directory
for filepath in glob.glob("../data/BREC/Regular/clo_G_Regular_dataset*.pkl"):
    print(f"------------- Processing {filepath}...")

    # Load graphs to determine input_dim
    graphs = load_graphs_from_file(filepath)
    if 'x' in graphs[0].ndata:
        input_dim = graphs[0].ndata['x'].size(1)  # Determine the input dimension dynamically
    else:
        # If 'h' does not exist, assume a default input dimension
        input_dim = 1

    # Initialize the models
    gin_model = GIN(input_dim, hidden_dim=32, out_dim=8, num_layers=3)
    pna_model = PNA(input_dim, hidden_dim=32, out_dim=8, num_layers=3,
                    aggregators=['mean', 'max', 'sum', 'min', 'std'],
                    scalers=['identity', 'amplification', 'attenuation'],
                    deg=torch.tensor([1.0]))  # Example degree tensor
    

    # Compute the number of unique embeddings using GIN
    print("Computing equivalence classes using GIN model...")
    num_unique_embeddings_gin = compute_equivalence_classes(filepath, gin_model, input_dim)
    print(f"Number of unique embeddings with GIN: {num_unique_embeddings_gin}\n")

    # Compute the number of unique embeddings using PNA
    print("Computing equivalence classes using PNA model...")
    num_unique_embeddings_pna = compute_equivalence_classes(filepath, pna_model, input_dim)
    print(f"Number of unique embeddings with PNA: {num_unique_embeddings_pna}\n")



------------- Processing ../data/BREC/Regular/clo_G_Regular_dataset.pkl...
Computing equivalence classes using GIN model...
embeddings:  {546506273467430400, 546506219780339200, 540371822420748800, 1367498087771328000, 540371929794995200, 540371822420684800, 1367498195145574400, 1367498517268185600, 1367498409893939200, 1367498517268121600, 1069076997260595200, 269808283683795200, 4059200909006336000, 27824205796147916800, 244518801976729600, 1808664590115020800, 31402125088682496000, 269601212234764800, 265790822618432000, 40126186917251276800, 29609508717778944000, 927094773181158400, 927094665806976000, 549422665023820800, 167660496609817600, 163671837837747200, 555292760665625600, 1069077212009088000, 267350004044518400, 543591116708697600, 265017754887001600, 927094451058547200, 1033411244425984000, 1069077212009024000, 927094558432729600, 428064593520704000, 540371876107840000, 555292599604288000, 552351565672716800, 543591277770099200, 428064620364249600, 265790634713516800, 543

## Distinguishing Test

In [156]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import GINConv, SumPooling, PNAConv
import networkx as nx
import random
import pickle
import numpy as np
import glob

# Define a GIN model
class GIN(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers):
        super(GIN, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()

        # Input layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(in_feats, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(GINConv(
                nn.Sequential(
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim)
                ), 'sum'))
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Output layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(hidden_dim, out_dim),
                nn.ReLU(),
                nn.Linear(out_dim, out_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(out_dim))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer, batch_norm in zip(self.layers, self.batch_norms):
            h = layer(g, h)
            h = batch_norm(h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding

# Define a PNA model
class PNA(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers, aggregators, scalers, deg):
        super(PNA, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()

        # Input layer
        self.layers.append(PNAConv(in_feats, hidden_dim, aggregators, scalers, deg))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(PNAConv(hidden_dim, hidden_dim, aggregators, scalers, deg))

        # Output layer
        self.layers.append(PNAConv(hidden_dim, out_dim, aggregators, scalers, deg))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer in self.layers:
            h = layer(g, h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding



for filepath in glob.glob("../data/BREC/Regular/clo_G_Regular_dataset_dummy.pkl"):
    print(f"------------- Processing {filepath}...")

    # Load graphs to determine input_dim
    graphs = load_graphs_from_file(filepath)

    non_isomorphic_pairs, isomorphic_pairs = organize_pairs(graphs, G_Regular_original_indices)

    if 'x' in graphs[0].ndata:
        input_dim = graphs[0].ndata['x'].size(1)  # Determine the input dimension dynamically
    else:
        # If 'x' does not exist, assume a default input dimension
        input_dim = 1


    # Initialize the models
    gin_model = GIN(input_dim, hidden_dim=16, out_dim=8, num_layers=4).to(device)
    pna_model = PNA(input_dim, hidden_dim=16, out_dim=8, num_layers=4,
                    aggregators=['mean', 'max', 'sum', 'min', 'std'],
                    scalers=['identity', 'amplification', 'attenuation'],
                    deg=torch.tensor([1.0]).to(device)).to(device)  # Example degree tensor

    # Compute the number of unique embeddings using PNA
    print("Using PNA model...")
    non_isomorphic_different_count, isomorphic_same_count, isomorphic_different_count = evaluate_model_with_pairs(non_isomorphic_pairs, isomorphic_pairs, pna_model, input_dim)

    # Compute the number of unique embeddings using GIN
    print("Using GIN model...")
    non_isomorphic_different_count, isomorphic_same_count, isomorphic_different_count = evaluate_model_with_pairs(non_isomorphic_pairs, isomorphic_pairs, gin_model, input_dim)



------------- Processing ../data/BREC/Regular/clo_G_Regular_dataset_dummy.pkl...
Using PNA model...
Epoch 1/100, Loss: 1.4149
Epoch 2/100, Loss: 1.3382
Epoch 3/100, Loss: 1.3016
Epoch 4/100, Loss: 1.0251
Epoch 5/100, Loss: 0.8888
Epoch 6/100, Loss: 0.9436
Epoch 7/100, Loss: 0.7803
Epoch 8/100, Loss: 0.9934
Epoch 9/100, Loss: 1.3079
Epoch 10/100, Loss: 1.3109
Epoch 11/100, Loss: 1.2611
Epoch 12/100, Loss: 1.2082
Epoch 13/100, Loss: 1.1617
Epoch 14/100, Loss: 1.1616
Epoch 15/100, Loss: 1.2800
Epoch 16/100, Loss: 1.0518
Epoch 17/100, Loss: 1.1451
Epoch 18/100, Loss: 1.2377
Epoch 19/100, Loss: 1.1764
Epoch 20/100, Loss: 1.0332
Epoch 21/100, Loss: 1.0327
Epoch 22/100, Loss: 1.2446
Epoch 23/100, Loss: 0.9698
Epoch 24/100, Loss: 0.9159
Epoch 25/100, Loss: 0.9373
Epoch 26/100, Loss: 0.9605
Epoch 27/100, Loss: 1.0332
Epoch 28/100, Loss: 1.3650
Epoch 29/100, Loss: 1.3216
Epoch 30/100, Loss: 1.3620
Epoch 31/100, Loss: 1.1598
Epoch 32/100, Loss: 1.1615
Epoch 33/100, Loss: 1.3513
Epoch 34/100, Loss

# BREC-Extension

## Original Extension

In [None]:
import dgl
from torch_geometric.utils import to_dgl, from_dgl

G_Extension_dgl_graphs = []
for G in G_Extension:
    dgl_graph = dgl.from_networkx(G)
    dgl_graph.ndata['x'] = torch.ones((G.number_of_nodes(), 1), dtype=torch.float32)
    G_Extension_dgl_graphs.append(dgl_graph)


# Step 4: Save the list of DGL graphs using pickling
output_file = '../data/BREC/Extension/G_Extension_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(G_Extension_dgl_graphs, f)

print(f"Converted {len(G_Extension_dgl_graphs)} graphs to DGL format and saved to {output_file}.")

## Extension dummy with isomorphic pairs

In [None]:
G_Extension_dgl_graphs, G_Extension_isomorphic_pair, G_Extension_original_indices = add_isomorphic_pairs_dgl(G_Extension_dgl_graphs, num_pairs=100)
G_Extension_dummy_dgl = G_Extension_dgl_graphs + G_Extension_isomorphic_pair

output_file = '../data/BREC/Extension/G_Extension_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(G_Extension_dummy_dgl, f)

print(f"Converted {len(G_Extension_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")

## VN on Extension original and Extension dummy

In [None]:
vn_G_Extension_dgl = apply_vn(G_Extension_dgl_graphs)
output_file = '../data/BREC/Extension/vn_G_Extension_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(vn_G_Extension_dgl, f)

print(f"Converted {len(vn_G_Extension_dgl)} graphs to DGL format and saved to {output_file}.")

vn_G_Extension_dummy_dgl = apply_vn(G_Extension_dummy_dgl)
output_file = '../data/BREC/Extension/vn_G_Extension_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(vn_G_Extension_dummy_dgl, f)

print(f"Converted {len(vn_G_Extension_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Degree Centrality on Extension original and Extension dummy

In [None]:
deg_G_Extension_dgl = degree_dataset(G_Extension_dgl_graphs)
output_file = '../data/BREC/Extension/deg_G_Extension_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(deg_G_Extension_dgl, f)

print(f"Converted {len(deg_G_Extension_dgl)} graphs to DGL format and saved to {output_file}.")

deg_G_Extension_dummy_dgl = degree_dataset(G_Extension_dummy_dgl)
output_file = '../data/BREC/Extension/deg_G_Extension_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(deg_G_Extension_dummy_dgl, f)

print(f"Converted {len(deg_G_Extension_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Closeness Centrality on Extension original and Extension dummy

In [160]:
clo_G_Extension_dgl = closeness_dataset(G_Extension_dgl_graphs)
output_file = '../data/BREC/Extension/clo_G_Extension_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(clo_G_Extension_dgl, f)

print(f"Converted {len(clo_G_Extension_dgl)} graphs to DGL format and saved to {output_file}.")

clo_G_Extension_dummy_dgl = closeness_dataset(G_Extension_dummy_dgl)
output_file = '../data/BREC/Extension/clo_G_Extension_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(clo_G_Extension_dummy_dgl, f)

print(f"Converted {len(clo_G_Extension_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 200 graphs to DGL format and saved to ../data/BREC/Extension/clo_G_Extension_dataset.pkl.
Converted 300 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Extension/clo_G_Extension_dataset_dummy.pkl.


## Betweenness Centrality on Extension original and Extension dummy

In [None]:
bet_G_Extension_dgl = betweenness_dataset(G_Extension_dgl_graphs)
output_file = '../data/BREC/Extension/bet_G_Extension_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(bet_G_Extension_dgl, f)

print(f"Converted {len(bet_G_Extension_dgl)} graphs to DGL format and saved to {output_file}.")

bet_G_Extension_dummy_dgl = betweenness_dataset(G_Extension_dummy_dgl)
output_file = '../data/BREC/Extension/bet_G_Extension_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(bet_G_Extension_dummy_dgl, f)

print(f"Converted {len(bet_G_Extension_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Eigenvector Centrality on Extension original and Extension dummy

In [None]:
eig_G_Extension_dgl = eigenvector_dataset(G_Extension_dgl_graphs)
output_file = '../data/BREC/Extension/eig_G_Extension_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(eig_G_Extension_dgl, f)

print(f"Converted {len(eig_G_Extension_dgl)} graphs to DGL format and saved to {output_file}.")

eig_G_Extension_dummy_dgl = eigenvector_dataset(G_Extension_dummy_dgl)
output_file = '../data/BREC/Extension/eig_G_Extension_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(eig_G_Extension_dummy_dgl, f)

print(f"Converted {len(eig_G_Extension_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Distance Encoding on Extension original and Extension dummy

In [None]:
DE_G_Extension_dgl = distance_encoding(G_Extension_dgl_graphs)
output_file = '../data/BREC/Extension/DE_G_Extension_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(DE_G_Extension_dgl, f)

print(f"Converted {len(DE_G_Extension_dgl)} graphs to DGL format and saved to {output_file}.")

DE_G_Extension_dummy_dgl = distance_encoding(G_Extension_dummy_dgl)
output_file = '../data/BREC/Extension/DE_G_Extension_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(DE_G_Extension_dummy_dgl, f)

print(f"Converted {len(DE_G_Extension_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Graph Encoding on Extension original and Extension dummy

In [None]:
GE_G_Extension_dgl = Graph_encoding(G_Extension_dgl_graphs, k=3)
output_file = '../data/BREC/Extension/GE_G_Extension_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(GE_G_Extension_dgl, f)

print(f"Converted {len(GE_G_Extension_dgl)} graphs to DGL format and saved to {output_file}.")

GE_G_Extension_dummy_dgl = Graph_encoding(G_Extension_dummy_dgl, k=3)
output_file = '../data/BREC/Extension/GE_G_Extension_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(GE_G_Extension_dummy_dgl, f)

print(f"Converted {len(GE_G_Extension_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Subgraph Extraction on Extension original and Extension dummy

In [None]:
SE_G_Extension_dgl = subgraph_dataset(G_Extension_dgl_graphs, radius=3)
output_file = '../data/BREC/Extension/SE_G_Extension_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(SE_G_Extension_dgl, f)

print(f"Converted {len(SE_G_Extension_dgl)} graphs to DGL format and saved to {output_file}.")

SE_G_Extension_dummy_dgl = subgraph_dataset(G_Extension_dummy_dgl, radius=3)
output_file = '../data/BREC/Extension/SE_G_Extension_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(SE_G_Extension_dummy_dgl, f)

print(f"Converted {len(SE_G_Extension_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Extra Node on Extension original and Extension dummy

In [None]:
exN_G_Extension_dgl = extra_node_dataset(G_Extension_dgl_graphs)
output_file = '../data/BREC/Extension/exN_G_Extension_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(exN_G_Extension_dgl, f)

print(f"Converted {len(exN_G_Extension_dgl)} graphs to DGL format and saved to {output_file}.")

exN_G_Extension_dummy_dgl = extra_node_dataset(G_Extension_dummy_dgl)
output_file = '../data/BREC/Extension/exN_G_Extension_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(exN_G_Extension_dummy_dgl, f)

print(f"Converted {len(exN_G_Extension_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Graphlet-Based Encoding on Extension original and Extension dummy

In [None]:
gle_G_Extension_dgl = graphlet_encoding_dataset(G_Extension_dgl_graphs)
output_file = '../data/BREC/Extension/gle_G_Extension_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(gle_G_Extension_dgl, f)

print(f"Converted {len(gle_G_Extension_dgl)} graphs to DGL format and saved to {output_file}.")

gle_G_Extension_dummy_dgl = graphlet_encoding_dataset(G_Extension_dummy_dgl)
output_file = '../data/BREC/Extension/gle_G_Extension_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(gle_G_Extension_dummy_dgl, f)

print(f"Converted {len(gle_G_Extension_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Equivalence Class

In [161]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import GINConv, SumPooling, PNAConv
import networkx as nx
import random
import pickle
import numpy as np
import glob

# Define a GIN model
class GIN(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers):
        super(GIN, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()

        # Input layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(in_feats, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(GINConv(
                nn.Sequential(
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim)
                ), 'sum'))
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Output layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(hidden_dim, out_dim),
                nn.ReLU(),
                nn.Linear(out_dim, out_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(out_dim))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer, batch_norm in zip(self.layers, self.batch_norms):
            h = layer(g, h)
            h = batch_norm(h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding

# Define a PNA model
class PNA(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers, aggregators, scalers, deg):
        super(PNA, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()

        # Input layer
        self.layers.append(PNAConv(in_feats, hidden_dim, aggregators, scalers, deg))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(PNAConv(hidden_dim, hidden_dim, aggregators, scalers, deg))

        # Output layer
        self.layers.append(PNAConv(hidden_dim, out_dim, aggregators, scalers, deg))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer in self.layers:
            h = layer(g, h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding



# Calculate the final integer embedding
def calculate_integer_embedding(embedding):
    sum_x = embedding.sum().item()

    # Handle variance calculation only if there's more than one element
    if embedding.numel() > 1:
        var_x = embedding.var().item()
    else:
        var_x = 0.0  # Set variance to 0 if there's only one element

    min_x = embedding.min().item()

    # Handle NaN variance by setting it to 0
    if np.isnan(var_x):
        var_x = 0.0

    final_embedding = int((sum_x * 100 + var_x * 10 + min_x * 10) * 10)
    return final_embedding


# Save graphs to a file
def save_graphs_to_file(graphs, filepath):
    with open(filepath, 'wb') as f:
        pickle.dump(graphs, f)

# Load graphs from a file
def load_graphs_from_file(filepath):
    with open(filepath, 'rb') as f:
        graphs = pickle.load(f)
    return graphs

# Compute equivalence classes
def compute_equivalence_classes(filepath, model, input_dim):
    graphs = load_graphs_from_file(filepath)

    # Train the model for a single epoch with a random target
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    model.train()
    target_seed = 100
    for g in graphs:
        # Ensure that 'h' exists
        if 'x' not in g.ndata:
            g.ndata['x'] = torch.ones(g.number_of_nodes(), input_dim)  # Initialize with ones if 'h' is missing
        optimizer.zero_grad()

        output = model(g, g.ndata['x'])  # The output is a tensor of shape (1, out_dim)
        target = torch.randint(0, 2, output.shape)  # Target is a random tensor of the same shape as output

        loss = F.binary_cross_entropy_with_logits(output, target.float())  # Ensure the target is a float tensor
        loss.backward()
        optimizer.step()
        
    embeddings = set()

    with torch.no_grad():
        model.eval()
        for g in graphs:
            if 'x' not in g.ndata:
                g.ndata['x'] = torch.ones(g.number_of_nodes(), input_dim)  # Initialize with ones if 'h' is missing
            embedding = model(g, g.ndata['x'])
            final_embedding = calculate_integer_embedding(embedding)
            embeddings.add(final_embedding)

    print("embeddings: ", embeddings)
    return len(embeddings)

# Process all .pkl files in the current directory
for filepath in glob.glob("../data/BREC/Extension/clo_G_Extension_dataset*.pkl"):
    print(f"------------- Processing {filepath}...")

    # Load graphs to determine input_dim
    graphs = load_graphs_from_file(filepath)
    if 'x' in graphs[0].ndata:
        input_dim = graphs[0].ndata['x'].size(1)  # Determine the input dimension dynamically
    else:
        # If 'h' does not exist, assume a default input dimension
        input_dim = 1

    # Initialize the models
    gin_model = GIN(input_dim, hidden_dim=32, out_dim=8, num_layers=3)
    pna_model = PNA(input_dim, hidden_dim=32, out_dim=8, num_layers=3,
                    aggregators=['mean', 'max', 'sum', 'min', 'std'],
                    scalers=['identity', 'amplification', 'attenuation'],
                    deg=torch.tensor([1.0]))  # Example degree tensor
    

    # Compute the number of unique embeddings using GIN
    print("Computing equivalence classes using GIN model...")
    num_unique_embeddings_gin = compute_equivalence_classes(filepath, gin_model, input_dim)
    print(f"Number of unique embeddings with GIN: {num_unique_embeddings_gin}\n")

    # Compute the number of unique embeddings using PNA
    print("Computing equivalence classes using PNA model...")
    num_unique_embeddings_pna = compute_equivalence_classes(filepath, pna_model, input_dim)
    print(f"Number of unique embeddings with PNA: {num_unique_embeddings_pna}\n")



------------- Processing ../data/BREC/Extension/clo_G_Extension_dataset.pkl...
Computing equivalence classes using GIN model...
embeddings:  {316657155, 3524102, 42123787, 22907411, 115643923, 306744339, 80503833, 190399513, 65968161, 836643, 34625062, 27870250, 389835822, 57462318, 3729966, 543053362, 526702645, 11643958, 3651127, 62354492, 27599425, 109444674, 310498883, 241303620, 62354501, 2294337, 2294338, 428118090, 127931470, 98687058, 29766740, 288953943, 30848603, 113158748, 5520993, 62001761, 25307235, 69026916, 455704682, 69026922, 2989164, 15357037, 15357038, 376102509, 149770349, 402031217, 66415221, 408118391, 6759550072, 4131448, 4131449, 739460, 149747845, 201846923, 45276813, 1379982, 75952274, 35378840, 35378841, 467768475, 148230811, 18454688, 1434273, 134315169, 18454691, 3832493, 10620078, 267394223, 148230837, 246704316, 285914303, 4445377, 745222851, 4683979, 4683983, 26802901, 5152984, 40376541, 1103399648, 1089574640, 114892533, 479650567, 627739911, 12122887, 

## Distinguishing Test

In [162]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import GINConv, SumPooling, PNAConv
import networkx as nx
import random
import pickle
import numpy as np
import glob

# Define a GIN model
class GIN(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers):
        super(GIN, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()

        # Input layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(in_feats, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(GINConv(
                nn.Sequential(
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim)
                ), 'sum'))
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Output layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(hidden_dim, out_dim),
                nn.ReLU(),
                nn.Linear(out_dim, out_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(out_dim))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer, batch_norm in zip(self.layers, self.batch_norms):
            h = layer(g, h)
            h = batch_norm(h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding

# Define a PNA model
class PNA(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers, aggregators, scalers, deg):
        super(PNA, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()

        # Input layer
        self.layers.append(PNAConv(in_feats, hidden_dim, aggregators, scalers, deg))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(PNAConv(hidden_dim, hidden_dim, aggregators, scalers, deg))

        # Output layer
        self.layers.append(PNAConv(hidden_dim, out_dim, aggregators, scalers, deg))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer in self.layers:
            h = layer(g, h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding



for filepath in glob.glob("../data/BREC/Extension/clo_G_Extension_dataset_dummy.pkl"):
    print(f"------------- Processing {filepath}...")

    # Load graphs to determine input_dim
    graphs = load_graphs_from_file(filepath)

    non_isomorphic_pairs, isomorphic_pairs = organize_pairs(graphs, G_Extension_original_indices)

    if 'x' in graphs[0].ndata:
        input_dim = graphs[0].ndata['x'].size(1)  # Determine the input dimension dynamically
    else:
        # If 'x' does not exist, assume a default input dimension
        input_dim = 1


    # Initialize the models
    gin_model = GIN(input_dim, hidden_dim=16, out_dim=8, num_layers=4).to(device)
    pna_model = PNA(input_dim, hidden_dim=16, out_dim=8, num_layers=4,
                    aggregators=['mean', 'max', 'sum', 'min', 'std'],
                    scalers=['identity', 'amplification', 'attenuation'],
                    deg=torch.tensor([1.0]).to(device)).to(device)  # Example degree tensor

    # Compute the number of unique embeddings using GIN
    print("Using GIN model...")
    non_isomorphic_different_count, isomorphic_same_count, isomorphic_different_count = evaluate_model_with_pairs(non_isomorphic_pairs, isomorphic_pairs, gin_model, input_dim)

    # Compute the number of unique embeddings using PNA
    print("Using PNA model...")
    non_isomorphic_different_count, isomorphic_same_count, isomorphic_different_count = evaluate_model_with_pairs(non_isomorphic_pairs, isomorphic_pairs, pna_model, input_dim)


------------- Processing ../data/BREC/Extension/clo_G_Extension_dataset_dummy.pkl...
Using GIN model...
Epoch 1/100, Loss: 0.3370
Epoch 2/100, Loss: 0.4768
Epoch 3/100, Loss: 0.3297
Epoch 4/100, Loss: 0.3003
Epoch 5/100, Loss: 0.2898
Epoch 6/100, Loss: 0.3188
Epoch 7/100, Loss: 0.3116
Epoch 8/100, Loss: 0.3287
Epoch 9/100, Loss: 0.2343
Epoch 10/100, Loss: 0.4425
Epoch 11/100, Loss: 0.2793
Epoch 12/100, Loss: 0.2280
Epoch 13/100, Loss: 0.2268
Epoch 14/100, Loss: 0.2006
Epoch 15/100, Loss: 0.1600
Epoch 16/100, Loss: 0.2388
Epoch 17/100, Loss: 0.0951
Epoch 18/100, Loss: 0.1597
Epoch 19/100, Loss: 0.3098
Epoch 20/100, Loss: 0.1525
Epoch 21/100, Loss: 0.1315
Epoch 22/100, Loss: 0.1272
Epoch 23/100, Loss: 0.1365
Epoch 24/100, Loss: 0.0771
Epoch 25/100, Loss: 0.1878
Epoch 26/100, Loss: 0.0998
Epoch 27/100, Loss: 0.1096
Epoch 28/100, Loss: 0.0872
Epoch 29/100, Loss: 0.1406
Epoch 30/100, Loss: 0.0898
Epoch 31/100, Loss: 0.0723
Epoch 32/100, Loss: 0.0944
Epoch 33/100, Loss: 0.1563
Epoch 34/100, 

# BREC-CFI

## Original CFI

In [None]:
import dgl
from torch_geometric.utils import to_dgl, from_dgl

G_CFI_dgl_graphs = []
for G in G_CFI:
    dgl_graph = dgl.from_networkx(G)
    dgl_graph.ndata['x'] = torch.ones((G.number_of_nodes(), 1), dtype=torch.float32)
    G_CFI_dgl_graphs.append(dgl_graph)


# Step 4: Save the list of DGL graphs using pickling
output_file = '../data/BREC/CFI/G_CFI_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(G_CFI_dgl_graphs, f)

print(f"Converted {len(G_CFI_dgl_graphs)} graphs to DGL format and saved to {output_file}.")

## CFI dummy with isomorphic pairs

In [None]:
G_CFI_dgl_graphs, G_CFI_isomorphic_pair, G_CFI_original_indices = add_isomorphic_pairs_dgl(G_CFI_dgl_graphs, num_pairs=100)
G_CFI_dummy_dgl = G_CFI_dgl_graphs + G_CFI_isomorphic_pair

output_file = '../data/BREC/CFI/G_CFI_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(G_CFI_dummy_dgl, f)

print(f"Converted {len(G_CFI_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")

## VN on CFI original and CFI dummy

In [None]:
vn_G_CFI_dgl = apply_vn(G_CFI_dgl_graphs)
output_file = '../data/BREC/CFI/vn_G_CFI_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(vn_G_CFI_dgl, f)

print(f"Converted {len(vn_G_CFI_dgl)} graphs to DGL format and saved to {output_file}.")

vn_G_CFI_dummy_dgl = apply_vn(G_CFI_dummy_dgl)
output_file = '../data/BREC/CFI/vn_G_CFI_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(vn_G_CFI_dummy_dgl, f)

print(f"Converted {len(vn_G_CFI_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Degree Centrality on CFI original and CFI dummy

In [None]:
deg_G_CFI_dgl = degree_dataset(G_CFI_dgl_graphs)
output_file = '../data/BREC/CFI/deg_G_CFI_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(deg_G_CFI_dgl, f)

print(f"Converted {len(deg_G_CFI_dgl)} graphs to DGL format and saved to {output_file}.")

deg_G_CFI_dummy_dgl = degree_dataset(G_CFI_dummy_dgl)
output_file = '../data/BREC/CFI/deg_G_CFI_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(deg_G_CFI_dummy_dgl, f)

print(f"Converted {len(deg_G_CFI_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Closeness Centrality on CFI original and CFI dummy

In [163]:
clo_G_CFI_dgl = closeness_dataset(G_CFI_dgl_graphs)
output_file = '../data/BREC/CFI/clo_G_CFI_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(clo_G_CFI_dgl, f)

print(f"Converted {len(clo_G_CFI_dgl)} graphs to DGL format and saved to {output_file}.")

clo_G_CFI_dummy_dgl = closeness_dataset(G_CFI_dummy_dgl)
output_file = '../data/BREC/CFI/clo_G_CFI_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(clo_G_CFI_dummy_dgl, f)

print(f"Converted {len(clo_G_CFI_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 200 graphs to DGL format and saved to ../data/BREC/CFI/clo_G_CFI_dataset.pkl.
Converted 300 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/CFI/clo_G_CFI_dataset_dummy.pkl.


## Betweenness Centrality on CFI original and CFI dummy

In [None]:
bet_G_CFI_dgl = betweenness_dataset(G_CFI_dgl_graphs)
output_file = '../data/BREC/CFI/bet_G_CFI_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(bet_G_CFI_dgl, f)

print(f"Converted {len(bet_G_CFI_dgl)} graphs to DGL format and saved to {output_file}.")

bet_G_CFI_dummy_dgl = betweenness_dataset(G_CFI_dummy_dgl)
output_file = '../data/BREC/CFI/bet_G_CFI_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(bet_G_CFI_dummy_dgl, f)

print(f"Converted {len(bet_G_CFI_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Eigenvector Centrality on CFI original and CFI dummy

In [None]:
eig_G_CFI_dgl = eigenvector_dataset(G_CFI_dgl_graphs)
output_file = '../data/BREC/CFI/eig_G_CFI_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(eig_G_CFI_dgl, f)

print(f"Converted {len(eig_G_CFI_dgl)} graphs to DGL format and saved to {output_file}.")

eig_G_CFI_dummy_dgl = eigenvector_dataset(G_CFI_dummy_dgl)
output_file = '../data/BREC/CFI/eig_G_CFI_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(eig_G_CFI_dummy_dgl, f)

print(f"Converted {len(eig_G_CFI_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Distance Encoding on CFI original and CFI dummy

In [None]:
DE_G_CFI_dgl = distance_encoding(G_CFI_dgl_graphs)
output_file = '../data/BREC/CFI/DE_G_CFI_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(DE_G_CFI_dgl, f)

print(f"Converted {len(DE_G_CFI_dgl)} graphs to DGL format and saved to {output_file}.")

DE_G_CFI_dummy_dgl = distance_encoding(G_CFI_dummy_dgl)
output_file = '../data/BREC/CFI/DE_G_CFI_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(DE_G_CFI_dummy_dgl, f)

print(f"Converted {len(DE_G_CFI_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Graph Encoding on CFI original and CFI dummy

In [None]:
GE_G_CFI_dgl = Graph_encoding(G_CFI_dgl_graphs, k=3)
output_file = '../data/BREC/CFI/GE_G_CFI_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(GE_G_CFI_dgl, f)

print(f"Converted {len(GE_G_CFI_dgl)} graphs to DGL format and saved to {output_file}.")

GE_G_CFI_dummy_dgl = Graph_encoding(G_CFI_dummy_dgl, k=3)
output_file = '../data/BREC/CFI/GE_G_CFI_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(GE_G_CFI_dummy_dgl, f)

print(f"Converted {len(GE_G_CFI_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Subgraph Extraction on CFI original and CFI dummy

In [None]:
SE_G_CFI_dgl = subgraph_dataset(G_CFI_dgl_graphs, radius=3)
output_file = '../data/BREC/CFI/SE_G_CFI_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(SE_G_CFI_dgl, f)

print(f"Converted {len(SE_G_CFI_dgl)} graphs to DGL format and saved to {output_file}.")

SE_G_CFI_dummy_dgl = subgraph_dataset(G_CFI_dummy_dgl, radius=3)
output_file = '../data/BREC/CFI/SE_G_CFI_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(SE_G_CFI_dummy_dgl, f)

print(f"Converted {len(SE_G_CFI_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Extra Node on CFI original and CFI dummy

In [None]:
exN_G_CFI_dgl = extra_node_dataset(G_CFI_dgl_graphs)
output_file = '../data/BREC/CFI/exN_G_CFI_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(exN_G_CFI_dgl, f)

print(f"Converted {len(exN_G_CFI_dgl)} graphs to DGL format and saved to {output_file}.")

exN_G_CFI_dummy_dgl = extra_node_dataset(G_CFI_dummy_dgl)
output_file = '../data/BREC/CFI/exN_G_CFI_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(exN_G_CFI_dummy_dgl, f)

print(f"Converted {len(exN_G_CFI_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Graphlet-Based Encoding on CFI original and CFI dummy

In [None]:
gle_G_CFI_dgl = graphlet_encoding_dataset(G_CFI_dgl_graphs)
output_file = '../data/BREC/CFI/gle_G_CFI_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(gle_G_CFI_dgl, f)

print(f"Converted {len(gle_G_CFI_dgl)} graphs to DGL format and saved to {output_file}.")

gle_G_CFI_dummy_dgl = graphlet_encoding_dataset(G_CFI_dummy_dgl)
output_file = '../data/BREC/CFI/gle_G_CFI_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(gle_G_CFI_dummy_dgl, f)

print(f"Converted {len(gle_G_CFI_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Equivalence Class

In [164]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import GINConv, SumPooling, PNAConv
import networkx as nx
import random
import pickle
import numpy as np
import glob

# Define a GIN model
class GIN(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers):
        super(GIN, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()

        # Input layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(in_feats, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(GINConv(
                nn.Sequential(
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim)
                ), 'sum'))
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Output layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(hidden_dim, out_dim),
                nn.ReLU(),
                nn.Linear(out_dim, out_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(out_dim))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer, batch_norm in zip(self.layers, self.batch_norms):
            h = layer(g, h)
            h = batch_norm(h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding

# Define a PNA model
class PNA(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers, aggregators, scalers, deg):
        super(PNA, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()

        # Input layer
        self.layers.append(PNAConv(in_feats, hidden_dim, aggregators, scalers, deg))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(PNAConv(hidden_dim, hidden_dim, aggregators, scalers, deg))

        # Output layer
        self.layers.append(PNAConv(hidden_dim, out_dim, aggregators, scalers, deg))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer in self.layers:
            h = layer(g, h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding



# Calculate the final integer embedding
def calculate_integer_embedding(embedding):
    sum_x = embedding.sum().item()

    # Handle variance calculation only if there's more than one element
    if embedding.numel() > 1:
        var_x = embedding.var().item()
    else:
        var_x = 0.0  # Set variance to 0 if there's only one element

    min_x = embedding.min().item()

    # Handle NaN variance by setting it to 0
    if np.isnan(var_x):
        var_x = 0.0

    final_embedding = int((sum_x * 100 + var_x * 10 + min_x * 10) * 10)
    return final_embedding


# Save graphs to a file
def save_graphs_to_file(graphs, filepath):
    with open(filepath, 'wb') as f:
        pickle.dump(graphs, f)

# Load graphs from a file
def load_graphs_from_file(filepath):
    with open(filepath, 'rb') as f:
        graphs = pickle.load(f)
    return graphs

# Compute equivalence classes
def compute_equivalence_classes(filepath, model, input_dim):
    graphs = load_graphs_from_file(filepath)

    # Train the model for a single epoch with a random target
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    model.train()
    target_seed = 100
    for g in graphs:
        # Ensure that 'h' exists
        if 'x' not in g.ndata:
            g.ndata['x'] = torch.ones(g.number_of_nodes(), input_dim)  # Initialize with ones if 'h' is missing
        optimizer.zero_grad()

        output = model(g, g.ndata['x'])  # The output is a tensor of shape (1, out_dim)
        target = torch.randint(0, 2, output.shape)  # Target is a random tensor of the same shape as output

        loss = F.binary_cross_entropy_with_logits(output, target.float())  # Ensure the target is a float tensor
        loss.backward()
        optimizer.step()
        
    embeddings = set()

    with torch.no_grad():
        model.eval()
        for g in graphs:
            if 'x' not in g.ndata:
                g.ndata['x'] = torch.ones(g.number_of_nodes(), input_dim)  # Initialize with ones if 'h' is missing
            embedding = model(g, g.ndata['x'])
            final_embedding = calculate_integer_embedding(embedding)
            embeddings.add(final_embedding)

    print("embeddings: ", embeddings)
    return len(embeddings)

# Process all .pkl files in the current directory
for filepath in glob.glob("../data/BREC/CFI/clo_G_CFI_dataset*.pkl"):
    print(f"------------- Processing {filepath}...")

    # Load graphs to determine input_dim
    graphs = load_graphs_from_file(filepath)
    if 'x' in graphs[0].ndata:
        input_dim = graphs[0].ndata['x'].size(1)  # Determine the input dimension dynamically
    else:
        # If 'h' does not exist, assume a default input dimension
        input_dim = 1

    # Initialize the models
    gin_model = GIN(input_dim, hidden_dim=32, out_dim=8, num_layers=3)
    pna_model = PNA(input_dim, hidden_dim=32, out_dim=8, num_layers=3,
                    aggregators=['mean', 'max', 'sum', 'min', 'std'],
                    scalers=['identity', 'amplification', 'attenuation'],
                    deg=torch.tensor([1.0]))  # Example degree tensor
    

    # Compute the number of unique embeddings using GIN
    print("Computing equivalence classes using GIN model...")
    num_unique_embeddings_gin = compute_equivalence_classes(filepath, gin_model, input_dim)
    print(f"Number of unique embeddings with GIN: {num_unique_embeddings_gin}\n")

    # Compute the number of unique embeddings using PNA
    print("Computing equivalence classes using PNA model...")
    num_unique_embeddings_pna = compute_equivalence_classes(filepath, pna_model, input_dim)
    print(f"Number of unique embeddings with PNA: {num_unique_embeddings_pna}\n")



------------- Processing ../data/BREC/CFI/clo_G_CFI_dataset_dummy.pkl...
Computing equivalence classes using GIN model...
embeddings:  {56847363, 52558862, 59414, 549910, 738841, 35756059, 383016, 154153, 180791, 82492, 466503, 84552, 398921, 330826, 67659, 141386, 255569, 262740, 163929, 410714, 79467, 70260, 54430330, 4988539, 32961676, 159373, 90258, 188564, 129694, 276127, 37970590, 85669, 836775, 318122, 127154, 683698, 257218, 209609, 168678, 327910, 324334, 178416, 644339, 103670, 580859, 406270, 278783, 62730, 107671820, 125205, 59163, 76573, 307491, 1844515, 59177, 569131, 265006, 821555, 924472, 63808, 166218, 33661262, 642898, 107862, 34032985, 322907, 42834782, 329057, 123240, 103201652, 25975, 189818, 198010, 34686, 234880, 128896, 785794, 81284, 134799237, 578957, 215440, 145300, 144788, 59806, 32812962, 32680, 44477866, 42412, 317873, 32551353, 312763, 392636, 124864, 115137, 556483, 264647, 266188, 165841, 316882, 226258, 325597, 161246, 369123, 197604, 506854, 756209, 

## Distinguishing Test

In [165]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import GINConv, SumPooling, PNAConv
import networkx as nx
import random
import pickle
import numpy as np
import glob

# Define a GIN model
class GIN(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers):
        super(GIN, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()

        # Input layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(in_feats, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(GINConv(
                nn.Sequential(
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim)
                ), 'sum'))
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Output layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(hidden_dim, out_dim),
                nn.ReLU(),
                nn.Linear(out_dim, out_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(out_dim))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer, batch_norm in zip(self.layers, self.batch_norms):
            h = layer(g, h)
            h = batch_norm(h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding

# Define a PNA model
class PNA(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers, aggregators, scalers, deg):
        super(PNA, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()

        # Input layer
        self.layers.append(PNAConv(in_feats, hidden_dim, aggregators, scalers, deg))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(PNAConv(hidden_dim, hidden_dim, aggregators, scalers, deg))

        # Output layer
        self.layers.append(PNAConv(hidden_dim, out_dim, aggregators, scalers, deg))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer in self.layers:
            h = layer(g, h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding



for filepath in glob.glob("../data/BREC/CFI/clo_G_CFI_dataset_dummy.pkl"):
    print(f"------------- Processing {filepath}...")

    # Load graphs to determine input_dim
    graphs = load_graphs_from_file(filepath)

    non_isomorphic_pairs, isomorphic_pairs = organize_pairs(graphs, G_CFI_original_indices)

    if 'x' in graphs[0].ndata:
        input_dim = graphs[0].ndata['x'].size(1)  # Determine the input dimension dynamically
    else:
        # If 'x' does not exist, assume a default input dimension
        input_dim = 1


    # Initialize the models
    gin_model = GIN(input_dim, hidden_dim=16, out_dim=8, num_layers=4).to(device)
    pna_model = PNA(input_dim, hidden_dim=16, out_dim=8, num_layers=4,
                    aggregators=['mean', 'max', 'sum', 'min', 'std'],
                    scalers=['identity', 'amplification', 'attenuation'],
                    deg=torch.tensor([1.0]).to(device)).to(device)  # Example degree tensor

    # Compute the number of unique embeddings using GIN
    print("Using GIN model...")
    non_isomorphic_different_count, isomorphic_same_count, isomorphic_different_count = evaluate_model_with_pairs(non_isomorphic_pairs, isomorphic_pairs, gin_model, input_dim)

    # Compute the number of unique embeddings using PNA
    print("Using PNA model...")
    non_isomorphic_different_count, isomorphic_same_count, isomorphic_different_count = evaluate_model_with_pairs(non_isomorphic_pairs, isomorphic_pairs, pna_model, input_dim)


------------- Processing ../data/BREC/CFI/clo_G_CFI_dataset_dummy.pkl...
Using GIN model...
Epoch 1/100, Loss: 1.5617
Epoch 2/100, Loss: 1.1333
Epoch 3/100, Loss: 1.1500
Epoch 4/100, Loss: 1.2029
Epoch 5/100, Loss: 1.2136
Epoch 6/100, Loss: 1.0650
Epoch 7/100, Loss: 1.2400
Epoch 8/100, Loss: 1.0281
Epoch 9/100, Loss: 1.2741
Epoch 10/100, Loss: 1.0241
Epoch 11/100, Loss: 1.0919
Epoch 12/100, Loss: 1.1757
Epoch 13/100, Loss: 1.2222
Epoch 14/100, Loss: 1.2038
Epoch 15/100, Loss: 1.1877
Epoch 16/100, Loss: 0.9175
Epoch 17/100, Loss: 1.1704
Epoch 18/100, Loss: 1.1847
Epoch 19/100, Loss: 1.0567
Epoch 20/100, Loss: 1.4482
Epoch 21/100, Loss: 1.1147
Epoch 22/100, Loss: 1.0983
Epoch 23/100, Loss: 1.0925
Epoch 24/100, Loss: 1.2326
Epoch 25/100, Loss: 0.9373
Epoch 26/100, Loss: 1.2782
Epoch 27/100, Loss: 1.0371
Epoch 28/100, Loss: 1.3068
Epoch 29/100, Loss: 1.0910
Epoch 30/100, Loss: 1.0186
Epoch 31/100, Loss: 1.2213
Epoch 32/100, Loss: 1.3757
Epoch 33/100, Loss: 1.0726
Epoch 34/100, Loss: 1.1949

# BREC-4Vertex

## Original 4-Vertex

In [None]:
import dgl
from torch_geometric.utils import to_dgl, from_dgl

G_4Vertex_dgl_graphs = []
for G in G_4Vertex:
    dgl_graph = dgl.from_networkx(G)
    dgl_graph.ndata['x'] = torch.ones((G.number_of_nodes(), 1), dtype=torch.float32)
    G_4Vertex_dgl_graphs.append(dgl_graph)


# Step 4: Save the list of DGL graphs using pickling
output_file = '../data/BREC/Vertex/G_4Vertex_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(G_4Vertex_dgl_graphs, f)

print(f"Converted {len(G_4Vertex_dgl_graphs)} graphs to DGL format and saved to {output_file}.")

## 4Vertex dummy with isomorphic pairs

In [None]:
G_4Vertex_dgl_graphs, G_4Vertex_isomorphic_pair, G_4Vertex_original_indices = add_isomorphic_pairs_dgl(G_4Vertex_dgl_graphs, num_pairs=20)
G_4Vertex_dummy_dgl = G_4Vertex_dgl_graphs + G_4Vertex_isomorphic_pair

output_file = '../data/BREC/Vertex/G_4Vertex_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(G_4Vertex_dummy_dgl, f)

print(f"Converted {len(G_4Vertex_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")

## VN on 4Vertex original and CFI dummy

In [None]:
vn_G_4Vertex_dgl = apply_vn(G_4Vertex_dgl_graphs)
output_file = '../data/BREC/Vertex/vn_G_4Vertex_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(vn_G_4Vertex_dgl, f)

print(f"Converted {len(vn_G_4Vertex_dgl)} graphs to DGL format and saved to {output_file}.")

vn_G_4Vertex_dummy_dgl = apply_vn(G_4Vertex_dummy_dgl)
output_file = '../data/BREC/Vertex/vn_G_4Vertex_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(vn_G_4Vertex_dummy_dgl, f)

print(f"Converted {len(vn_G_4Vertex_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Degree Centrality on 4Vertex original and 4Vertex dummy

In [None]:
deg_G_4Vertex_dgl = degree_dataset(G_4Vertex_dgl_graphs)
output_file = '../data/BREC/Vertex/deg_G_4Vertex_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(deg_G_4Vertex_dgl, f)

print(f"Converted {len(deg_G_4Vertex_dgl)} graphs to DGL format and saved to {output_file}.")

deg_G_4Vertex_dummy_dgl = degree_dataset(G_4Vertex_dummy_dgl)
output_file = '../data/BREC/Vertex/deg_G_4Vertex_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(deg_G_4Vertex_dummy_dgl, f)

print(f"Converted {len(deg_G_4Vertex_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Closeness Centrality on 4Vertex original and 4Vertex dummy

In [166]:
clo_G_4Vertex_dgl = closeness_dataset(G_4Vertex_dgl_graphs)
output_file = '../data/BREC/Vertex/clo_G_4Vertex_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(clo_G_4Vertex_dgl, f)

print(f"Converted {len(clo_G_4Vertex_dgl)} graphs to DGL format and saved to {output_file}.")

clo_G_4Vertex_dummy_dgl = closeness_dataset(G_4Vertex_dummy_dgl)
output_file = '../data/BREC/Vertex/clo_G_4Vertex_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(clo_G_4Vertex_dummy_dgl, f)

print(f"Converted {len(clo_G_4Vertex_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 40 graphs to DGL format and saved to ../data/BREC/Vertex/clo_G_4Vertex_dataset.pkl.
Converted 60 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Vertex/clo_G_4Vertex_dataset_dummy.pkl.


## Betweenness Centrality on 4Vertex original and 4Vertex dummy

In [None]:
bet_G_4Vertex_dgl = betweenness_dataset(G_4Vertex_dgl_graphs)
output_file = '../data/BREC/Vertex/bet_G_4Vertex_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(bet_G_4Vertex_dgl, f)

print(f"Converted {len(bet_G_4Vertex_dgl)} graphs to DGL format and saved to {output_file}.")

bet_G_4Vertex_dummy_dgl = betweenness_dataset(G_4Vertex_dummy_dgl)
output_file = '../data/BREC/Vertex/bet_G_4Vertex_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(bet_G_4Vertex_dummy_dgl, f)

print(f"Converted {len(bet_G_4Vertex_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Eigenvector Centrality on 4Vertex original and 4Vertex dummy

In [None]:
eig_G_4Vertex_dgl = eigenvector_dataset(G_4Vertex_dgl_graphs)
output_file = '../data/BREC/Vertex/eig_G_4Vertex_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(eig_G_4Vertex_dgl, f)

print(f"Converted {len(eig_G_4Vertex_dgl)} graphs to DGL format and saved to {output_file}.")

eig_G_4Vertex_dummy_dgl = eigenvector_dataset(G_4Vertex_dummy_dgl)
output_file = '../data/BREC/Vertex/eig_G_4Vertex_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(eig_G_4Vertex_dummy_dgl, f)

print(f"Converted {len(eig_G_4Vertex_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Distance Encoding on 4Vertex original and 4Vertex dummy

In [None]:
DE_G_4Vertex_dgl = distance_encoding(G_4Vertex_dgl_graphs)
output_file = '../data/BREC/Vertex/DE_G_4Vertex_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(DE_G_4Vertex_dgl, f)

print(f"Converted {len(DE_G_4Vertex_dgl)} graphs to DGL format and saved to {output_file}.")

DE_G_4Vertex_dummy_dgl = distance_encoding(G_4Vertex_dummy_dgl)
output_file = '../data/BREC/Vertex/DE_G_4Vertex_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(DE_G_4Vertex_dummy_dgl, f)

print(f"Converted {len(DE_G_4Vertex_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Graph Encoding on 4Vertex original and 4Vertex dummy

In [None]:
GE_G_4Vertex_dgl = Graph_encoding(G_4Vertex_dgl_graphs, k=3)
output_file = '../data/BREC/Vertex/GE_G_4Vertex_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(GE_G_4Vertex_dgl, f)

print(f"Converted {len(GE_G_4Vertex_dgl)} graphs to DGL format and saved to {output_file}.")

GE_G_4Vertex_dummy_dgl = Graph_encoding(G_4Vertex_dummy_dgl, k=3)
output_file = '../data/BREC/Vertex/GE_G_4Vertex_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(GE_G_4Vertex_dummy_dgl, f)

print(f"Converted {len(GE_G_4Vertex_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Subgraph Extraction on 4Vertex original and 4Vertex dummy

In [None]:
SE_G_4Vertex_dgl = subgraph_dataset(G_4Vertex_dgl_graphs, radius=3)
output_file = '../data/BREC/Vertex/SE_G_4Vertex_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(SE_G_4Vertex_dgl, f)

print(f"Converted {len(SE_G_4Vertex_dgl)} graphs to DGL format and saved to {output_file}.")

SE_G_4Vertex_dummy_dgl = subgraph_dataset(G_4Vertex_dummy_dgl, radius=3)
output_file = '../data/BREC/Vertex/SE_G_4Vertex_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(SE_G_4Vertex_dummy_dgl, f)

print(f"Converted {len(SE_G_4Vertex_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Extra Node on 4Vertex original and 4Vertex dummy

In [None]:
exN_G_4Vertex_dgl = extra_node_dataset(G_4Vertex_dgl_graphs)
output_file = '../data/BREC/Vertex/exN_G_4Vertex_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(exN_G_4Vertex_dgl, f)

print(f"Converted {len(exN_G_4Vertex_dgl)} graphs to DGL format and saved to {output_file}.")

exN_G_4Vertex_dummy_dgl = extra_node_dataset(G_4Vertex_dummy_dgl)
output_file = '../data/BREC/Vertex/exN_G_4Vertex_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(exN_G_4Vertex_dummy_dgl, f)

print(f"Converted {len(exN_G_4Vertex_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Graphlet-Based Encoding on 4Vertex original and 4Vertex dummy

In [None]:
gle_G_4Vertex_dgl = graphlet_encoding_dataset(G_4Vertex_dgl_graphs)
output_file = '../data/BREC/Vertex/gle_G_4Vertex_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(gle_G_4Vertex_dgl, f)

print(f"Converted {len(gle_G_4Vertex_dgl)} graphs to DGL format and saved to {output_file}.")

gle_G_4Vertex_dummy_dgl = graphlet_encoding_dataset(G_4Vertex_dummy_dgl)
output_file = '../data/BREC/Vertex/gle_G_4Vertex_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(gle_G_4Vertex_dummy_dgl, f)

print(f"Converted {len(gle_G_4Vertex_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Equivalence Class

In [170]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import GINConv, SumPooling, PNAConv
import networkx as nx
import random
import pickle
import numpy as np
import glob

# Define a GIN model
class GIN(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers):
        super(GIN, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()

        # Input layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(in_feats, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(GINConv(
                nn.Sequential(
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim)
                ), 'sum'))
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Output layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(hidden_dim, out_dim),
                nn.ReLU(),
                nn.Linear(out_dim, out_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(out_dim))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer, batch_norm in zip(self.layers, self.batch_norms):
            h = layer(g, h)
            h = batch_norm(h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding

# Define a PNA model
class PNA(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers, aggregators, scalers, deg):
        super(PNA, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()

        # Input layer
        self.layers.append(PNAConv(in_feats, hidden_dim, aggregators, scalers, deg))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(PNAConv(hidden_dim, hidden_dim, aggregators, scalers, deg))

        # Output layer
        self.layers.append(PNAConv(hidden_dim, out_dim, aggregators, scalers, deg))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer in self.layers:
            h = layer(g, h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding



# Calculate the final integer embedding
def calculate_integer_embedding(embedding):
    sum_x = embedding.sum().item()

    # Handle variance calculation only if there's more than one element
    if embedding.numel() > 1:
        var_x = embedding.var().item()
    else:
        var_x = 0.0  # Set variance to 0 if there's only one element

    min_x = embedding.min().item()

    # Handle NaN variance by setting it to 0
    if np.isnan(var_x):
        var_x = 0.0

    final_embedding = int((sum_x * 100 + var_x * 10 + min_x * 10) * 10)
    return final_embedding


# Save graphs to a file
def save_graphs_to_file(graphs, filepath):
    with open(filepath, 'wb') as f:
        pickle.dump(graphs, f)

# Load graphs from a file
def load_graphs_from_file(filepath):
    with open(filepath, 'rb') as f:
        graphs = pickle.load(f)
    return graphs

# Compute equivalence classes
def compute_equivalence_classes(filepath, model, input_dim):
    graphs = load_graphs_from_file(filepath)

    # Train the model for a single epoch with a random target
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    model.train()
    target_seed = 100
    for g in graphs:
        # Ensure that 'h' exists
        if 'x' not in g.ndata:
            g.ndata['x'] = torch.ones(g.number_of_nodes(), input_dim)  # Initialize with ones if 'h' is missing
        optimizer.zero_grad()

        output = model(g, g.ndata['x'])  # The output is a tensor of shape (1, out_dim)
        target = torch.randint(0, 2, output.shape)  # Target is a random tensor of the same shape as output

        loss = F.binary_cross_entropy_with_logits(output, target.float())  # Ensure the target is a float tensor
        loss.backward()
        optimizer.step()
        
    embeddings = set()

    with torch.no_grad():
        model.eval()
        for g in graphs:
            if 'x' not in g.ndata:
                g.ndata['x'] = torch.ones(g.number_of_nodes(), input_dim)  # Initialize with ones if 'h' is missing
            embedding = model(g, g.ndata['x'])
            final_embedding = calculate_integer_embedding(embedding)
            embeddings.add(final_embedding)

    print("embeddings: ", embeddings)
    return len(embeddings)

# Process all .pkl files in the current directory
for filepath in glob.glob("../data/BREC/Vertex/clo_G_4Vertex_dataset*.pkl"):
    print(f"------------- Processing {filepath}...")

    # Load graphs to determine input_dim
    graphs = load_graphs_from_file(filepath)
    if 'x' in graphs[0].ndata:
        input_dim = graphs[0].ndata['x'].size(1)  # Determine the input dimension dynamically
    else:
        # If 'h' does not exist, assume a default input dimension
        input_dim = 1

    # Initialize the models
    gin_model = GIN(input_dim, hidden_dim=32, out_dim=8, num_layers=3)
    pna_model = PNA(input_dim, hidden_dim=32, out_dim=8, num_layers=3,
                    aggregators=['mean', 'max', 'sum', 'min', 'std'],
                    scalers=['identity', 'amplification', 'attenuation'],
                    deg=torch.tensor([1.0]))  # Example degree tensor
    

    # Compute the number of unique embeddings using GIN
    print("Computing equivalence classes using GIN model...")
    num_unique_embeddings_gin = compute_equivalence_classes(filepath, gin_model, input_dim)
    print(f"Number of unique embeddings with GIN: {num_unique_embeddings_gin}\n")

    # Compute the number of unique embeddings using PNA
    print("Computing equivalence classes using PNA model...")
    num_unique_embeddings_pna = compute_equivalence_classes(filepath, pna_model, input_dim)
    print(f"Number of unique embeddings with PNA: {num_unique_embeddings_pna}\n")



------------- Processing ../data/BREC/Vertex/clo_G_4Vertex_dataset.pkl...
Computing equivalence classes using GIN model...
embeddings:  {78075286731}
Number of unique embeddings with GIN: 1

Computing equivalence classes using PNA model...
embeddings:  {307884}
Number of unique embeddings with PNA: 1

------------- Processing ../data/BREC/Vertex/clo_G_4Vertex_dataset_dummy.pkl...
Computing equivalence classes using GIN model...
embeddings:  {951029692925}
Number of unique embeddings with GIN: 1

Computing equivalence classes using PNA model...
embeddings:  {258143}
Number of unique embeddings with PNA: 1



## Distinguishing Test

In [171]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import GINConv, SumPooling, PNAConv
import networkx as nx
import random
import pickle
import numpy as np
import glob

# Define a GIN model
class GIN(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers):
        super(GIN, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()

        # Input layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(in_feats, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(GINConv(
                nn.Sequential(
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim)
                ), 'sum'))
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Output layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(hidden_dim, out_dim),
                nn.ReLU(),
                nn.Linear(out_dim, out_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(out_dim))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer, batch_norm in zip(self.layers, self.batch_norms):
            h = layer(g, h)
            h = batch_norm(h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding

# Define a PNA model
class PNA(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers, aggregators, scalers, deg):
        super(PNA, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()

        # Input layer
        self.layers.append(PNAConv(in_feats, hidden_dim, aggregators, scalers, deg))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(PNAConv(hidden_dim, hidden_dim, aggregators, scalers, deg))

        # Output layer
        self.layers.append(PNAConv(hidden_dim, out_dim, aggregators, scalers, deg))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer in self.layers:
            h = layer(g, h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding



for filepath in glob.glob("../data/BREC/Vertex/clo_G_4Vertex_dataset_dummy.pkl"):
    print(f"------------- Processing {filepath}...")

    # Load graphs to determine input_dim
    graphs = load_graphs_from_file(filepath)

    non_isomorphic_pairs, isomorphic_pairs = organize_pairs(graphs, G_4Vertex_original_indices)

    if 'x' in graphs[0].ndata:
        input_dim = graphs[0].ndata['x'].size(1)  # Determine the input dimension dynamically
    else:
        # If 'x' does not exist, assume a default input dimension
        input_dim = 1


    # Initialize the models
    gin_model = GIN(input_dim, hidden_dim=16, out_dim=8, num_layers=4).to(device)
    pna_model = PNA(input_dim, hidden_dim=16, out_dim=8, num_layers=4,
                    aggregators=['mean', 'max', 'sum', 'min', 'std'],
                    scalers=['identity', 'amplification', 'attenuation'],
                    deg=torch.tensor([1.0]).to(device)).to(device)  # Example degree tensor

    # Compute the number of unique embeddings using GIN
    print("Using GIN model...")
    non_isomorphic_different_count, isomorphic_same_count, isomorphic_different_count = evaluate_model_with_pairs(non_isomorphic_pairs, isomorphic_pairs, gin_model, input_dim)

    # Compute the number of unique embeddings using PNA
    print("Using PNA model...")
    non_isomorphic_different_count, isomorphic_same_count, isomorphic_different_count = evaluate_model_with_pairs(non_isomorphic_pairs, isomorphic_pairs, pna_model, input_dim)


------------- Processing ../data/BREC/Vertex/clo_G_4Vertex_dataset_dummy.pkl...
Using GIN model...
Epoch 1/100, Loss: 1.0000
Epoch 2/100, Loss: 1.0000
Epoch 3/100, Loss: 1.0000
Epoch 4/100, Loss: 1.0000
Epoch 5/100, Loss: 1.0000
Epoch 6/100, Loss: 1.0000
Epoch 7/100, Loss: 1.0000
Epoch 8/100, Loss: 1.0000
Epoch 9/100, Loss: 1.0000
Epoch 10/100, Loss: 1.0000
Epoch 11/100, Loss: 1.0000
Epoch 12/100, Loss: 1.0000
Epoch 13/100, Loss: 1.0000
Epoch 14/100, Loss: 1.0000
Epoch 15/100, Loss: 1.0000
Epoch 16/100, Loss: 1.0000
Epoch 17/100, Loss: 1.0000
Epoch 18/100, Loss: 1.0000
Epoch 19/100, Loss: 1.0000
Epoch 20/100, Loss: 1.0000
Epoch 21/100, Loss: 1.0000
Epoch 22/100, Loss: 1.0000
Epoch 23/100, Loss: 1.0000
Epoch 24/100, Loss: 1.0000
Epoch 25/100, Loss: 0.1329
Epoch 26/100, Loss: 1.0000
Epoch 27/100, Loss: 1.0000
Epoch 28/100, Loss: 1.0000
Epoch 29/100, Loss: 0.7000
Epoch 30/100, Loss: 1.0000
Epoch 31/100, Loss: 1.0000
Epoch 32/100, Loss: 0.9983
Epoch 33/100, Loss: 1.0000
Epoch 34/100, Loss:

# BREC-Distance_Regular

## Original Distance_Regular

In [None]:
import dgl
from torch_geometric.utils import to_dgl, from_dgl

G_Distance_Regular_dgl_graphs = []
for G in G_Distance_Regular:
    dgl_graph = dgl.from_networkx(G)
    dgl_graph.ndata['x'] = torch.ones((G.number_of_nodes(), 1), dtype=torch.float32)
    G_Distance_Regular_dgl_graphs.append(dgl_graph)


# Step 4: Save the list of DGL graphs using pickling
output_file = '../data/BREC/Distance_Regular/G_Distance_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(G_Distance_Regular_dgl_graphs, f)

print(f"Converted {len(G_Distance_Regular_dgl_graphs)} graphs to DGL format and saved to {output_file}.")

## Distance_Regular dummy with isomorphic pairs

In [None]:
G_Distance_Regular_dgl_graphs, G_Distance_Regular_isomorphic_pair, G_Distance_Regular_original_indices = add_isomorphic_pairs_dgl(G_Distance_Regular_dgl_graphs, num_pairs=20)
G_Distance_Regular_dummy_dgl = G_Distance_Regular_dgl_graphs + G_Distance_Regular_isomorphic_pair

output_file = '../data/BREC/Distance_Regular/G_Distance_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(G_Distance_Regular_dummy_dgl, f)

print(f"Converted {len(G_Distance_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")

## VN on 4Vertex original and CFI dummy

In [None]:
vn_G_Distance_Regular_dgl = apply_vn(G_Distance_Regular_dgl_graphs)
output_file = '../data/BREC/Distance_Regular/vn_G_Distance_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(vn_G_Distance_Regular_dgl, f)

print(f"Converted {len(vn_G_Distance_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

vn_G_Distance_Regular_dummy_dgl = apply_vn(G_Distance_Regular_dummy_dgl)
output_file = '../data/BREC/Distance_Regular/vn_G_Distance_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(vn_G_Distance_Regular_dummy_dgl, f)

print(f"Converted {len(vn_G_Distance_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Degree Centrality on Distance_Regular original and Distance_Regular dummy

In [None]:
deg_G_Distance_Regular_dgl = degree_dataset(G_Distance_Regular_dgl_graphs)
output_file = '../data/BREC/Distance_Regular/deg_G_Distance_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(deg_G_Distance_Regular_dgl, f)

print(f"Converted {len(deg_G_Distance_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

deg_G_Distance_Regular_dummy_dgl = degree_dataset(G_Distance_Regular_dummy_dgl)
output_file = '../data/BREC/Distance_Regular/deg_G_Distance_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(deg_G_Distance_Regular_dummy_dgl, f)

print(f"Converted {len(deg_G_Distance_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Closeness Centrality on Distance_Regular original and Distance_Regular dummy

In [172]:
clo_G_Distance_Regular_dgl = closeness_dataset(G_Distance_Regular_dgl_graphs)
output_file = '../data/BREC/Distance_Regular/clo_G_Distance_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(clo_G_Distance_Regular_dgl, f)

print(f"Converted {len(clo_G_Distance_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

clo_G_Distance_Regular_dummy_dgl = closeness_dataset(G_Distance_Regular_dummy_dgl)
output_file = '../data/BREC/Distance_Regular/clo_G_Distance_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(clo_G_Distance_Regular_dummy_dgl, f)

print(f"Converted {len(clo_G_Distance_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


Converted 40 graphs to DGL format and saved to ../data/BREC/Distance_Regular/clo_G_Distance_Regular_dataset.pkl.
Converted 60 graphs (including isomorphisms) to DGL format and saved to ../data/BREC/Distance_Regular/clo_G_Distance_Regular_dataset_dummy.pkl.


## Betweenness Centrality on Distance_Regular original and Distance_Regular dummy

In [None]:
bet_G_Distance_Regular_dgl = betweenness_dataset(G_Distance_Regular_dgl_graphs)
output_file = '../data/BREC/Distance_Regular/bet_G_Distance_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(bet_G_Distance_Regular_dgl, f)

print(f"Converted {len(bet_G_Distance_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

bet_G_Distance_Regular_dummy_dgl = betweenness_dataset(G_Distance_Regular_dummy_dgl)
output_file = '../data/BREC/Distance_Regular/bet_G_Distance_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(bet_G_Distance_Regular_dummy_dgl, f)

print(f"Converted {len(bet_G_Distance_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Eigenvector Centrality on Distance_Regular original and Distance_Regular dummy

In [None]:
eig_G_Distance_Regular_dgl = eigenvector_dataset(G_Distance_Regular_dgl_graphs)
output_file = '../data/BREC/Distance_Regular/eig_G_Distance_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(eig_G_Distance_Regular_dgl, f)

print(f"Converted {len(eig_G_Distance_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

eig_G_Distance_Regular_dummy_dgl = eigenvector_dataset(G_Distance_Regular_dummy_dgl)
output_file = '../data/BREC/Distance_Regular/eig_G_Distance_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(eig_G_Distance_Regular_dummy_dgl, f)

print(f"Converted {len(eig_G_Distance_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Distance Encoding on Distance_Regular original and Distance_Regular dummy

In [None]:
DE_G_Distance_Regular_dgl = distance_encoding(G_Distance_Regular_dgl_graphs)
output_file = '../data/BREC/Distance_Regular/DE_G_Distance_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(DE_G_Distance_Regular_dgl, f)

print(f"Converted {len(DE_G_Distance_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

DE_G_Distance_Regular_dummy_dgl = distance_encoding(G_Distance_Regular_dummy_dgl)
output_file = '../data/BREC/Distance_Regular/DE_G_Distance_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(DE_G_Distance_Regular_dummy_dgl, f)

print(f"Converted {len(DE_G_Distance_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Graph Encoding on Distance_Regular original and Distance_Regular dummy

In [None]:
GE_G_Distance_Regular_dgl = Graph_encoding(G_Distance_Regular_dgl_graphs, k=3)
output_file = '../data/BREC/Distance_Regular/GE_G_Distance_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(GE_G_Distance_Regular_dgl, f)

print(f"Converted {len(GE_G_Distance_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

GE_G_Distance_Regular_dummy_dgl = Graph_encoding(G_Distance_Regular_dummy_dgl, k=3)
output_file = '../data/BREC/Distance_Regular/GE_G_Distance_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(GE_G_Distance_Regular_dummy_dgl, f)

print(f"Converted {len(GE_G_Distance_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Subgraph Extraction on Distance_Regular original and Distance_Regular dummy

In [None]:
SE_G_Distance_Regular_dgl = subgraph_dataset(G_Distance_Regular_dgl_graphs, radius=3)
output_file = '../data/BREC/Distance_Regular/SE_G_Distance_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(SE_G_Distance_Regular_dgl, f)

print(f"Converted {len(SE_G_Distance_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

SE_G_Distance_Regular_dummy_dgl = subgraph_dataset(G_Distance_Regular_dummy_dgl, radius=3)
output_file = '../data/BREC/Distance_Regular/SE_G_Distance_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(SE_G_Distance_Regular_dummy_dgl, f)

print(f"Converted {len(SE_G_Distance_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Extra Node on Distance_Regular original and Distance_Regular dummy

In [None]:
exN_G_Distance_Regular_dgl = extra_node_dataset(G_Distance_Regular_dgl_graphs)
output_file = '../data/BREC/Distance_Regular/exN_G_Distance_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(exN_G_Distance_Regular_dgl, f)

print(f"Converted {len(exN_G_Distance_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

exN_G_Distance_Regular_dummy_dgl = extra_node_dataset(G_Distance_Regular_dummy_dgl)
output_file = '../data/BREC/Distance_Regular/exN_G_Distance_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(exN_G_Distance_Regular_dummy_dgl, f)

print(f"Converted {len(exN_G_Distance_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Graphlet-Based Encoding on Distance_Regular original and Distance_Regular dummy

In [None]:
gle_G_Distance_Regular_dgl = graphlet_encoding_dataset(G_Distance_Regular_dgl_graphs)
output_file = '../data/BREC/Distance_Regular/gle_G_Distance_Regular_dataset.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(gle_G_Distance_Regular_dgl, f)

print(f"Converted {len(gle_G_Distance_Regular_dgl)} graphs to DGL format and saved to {output_file}.")

gle_G_Distance_Regular_dummy_dgl = graphlet_encoding_dataset(G_Distance_Regular_dummy_dgl)
output_file = '../data/BREC/Distance_Regular/gle_G_Distance_Regular_dataset_dummy.pkl'
with open(output_file, 'wb') as f:
    pickle.dump(gle_G_Distance_Regular_dummy_dgl, f)

print(f"Converted {len(gle_G_Distance_Regular_dummy_dgl)} graphs (including isomorphisms) to DGL format and saved to {output_file}.")


## Equivalence Class

In [173]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import GINConv, SumPooling, PNAConv
import networkx as nx
import random
import pickle
import numpy as np
import glob

# Define a GIN model
class GIN(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers):
        super(GIN, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()

        # Input layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(in_feats, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(GINConv(
                nn.Sequential(
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim)
                ), 'sum'))
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Output layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(hidden_dim, out_dim),
                nn.ReLU(),
                nn.Linear(out_dim, out_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(out_dim))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer, batch_norm in zip(self.layers, self.batch_norms):
            h = layer(g, h)
            h = batch_norm(h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding

# Define a PNA model
class PNA(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers, aggregators, scalers, deg):
        super(PNA, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()

        # Input layer
        self.layers.append(PNAConv(in_feats, hidden_dim, aggregators, scalers, deg))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(PNAConv(hidden_dim, hidden_dim, aggregators, scalers, deg))

        # Output layer
        self.layers.append(PNAConv(hidden_dim, out_dim, aggregators, scalers, deg))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer in self.layers:
            h = layer(g, h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding



# Calculate the final integer embedding
def calculate_integer_embedding(embedding):
    sum_x = embedding.sum().item()

    # Handle variance calculation only if there's more than one element
    if embedding.numel() > 1:
        var_x = embedding.var().item()
    else:
        var_x = 0.0  # Set variance to 0 if there's only one element

    min_x = embedding.min().item()

    # Handle NaN variance by setting it to 0
    if np.isnan(var_x):
        var_x = 0.0

    final_embedding = int((sum_x * 100 + var_x * 10 + min_x * 10) * 10)
    return final_embedding


# Save graphs to a file
def save_graphs_to_file(graphs, filepath):
    with open(filepath, 'wb') as f:
        pickle.dump(graphs, f)

# Load graphs from a file
def load_graphs_from_file(filepath):
    with open(filepath, 'rb') as f:
        graphs = pickle.load(f)
    return graphs

# Compute equivalence classes
def compute_equivalence_classes(filepath, model, input_dim):
    graphs = load_graphs_from_file(filepath)

    # Train the model for a single epoch with a random target
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    model.train()
    target_seed = 100
    for g in graphs:
        # Ensure that 'h' exists
        if 'x' not in g.ndata:
            g.ndata['x'] = torch.ones(g.number_of_nodes(), input_dim)  # Initialize with ones if 'h' is missing
        optimizer.zero_grad()

        output = model(g, g.ndata['x'])  # The output is a tensor of shape (1, out_dim)
        target = torch.randint(0, 2, output.shape)  # Target is a random tensor of the same shape as output

        loss = F.binary_cross_entropy_with_logits(output, target.float())  # Ensure the target is a float tensor
        loss.backward()
        optimizer.step()
        
    embeddings = set()

    with torch.no_grad():
        model.eval()
        for g in graphs:
            if 'x' not in g.ndata:
                g.ndata['x'] = torch.ones(g.number_of_nodes(), input_dim)  # Initialize with ones if 'h' is missing
            embedding = model(g, g.ndata['x'])
            final_embedding = calculate_integer_embedding(embedding)
            embeddings.add(final_embedding)

    print("embeddings: ", embeddings)
    return len(embeddings)

# Process all .pkl files in the current directory
for filepath in glob.glob("../data/BREC/Distance_Regular/clo_G_Distance_Regular_dataset*.pkl"):
    print(f"------------- Processing {filepath}...")

    # Load graphs to determine input_dim
    graphs = load_graphs_from_file(filepath)
    if 'x' in graphs[0].ndata:
        input_dim = graphs[0].ndata['x'].size(1)  # Determine the input dimension dynamically
    else:
        # If 'h' does not exist, assume a default input dimension
        input_dim = 1

    # Initialize the models
    gin_model = GIN(input_dim, hidden_dim=32, out_dim=8, num_layers=3)
    pna_model = PNA(input_dim, hidden_dim=32, out_dim=8, num_layers=3,
                    aggregators=['mean', 'max', 'sum', 'min', 'std'],
                    scalers=['identity', 'amplification', 'attenuation'],
                    deg=torch.tensor([1.0]))  # Example degree tensor
    

    # Compute the number of unique embeddings using GIN
    print("Computing equivalence classes using GIN model...")
    num_unique_embeddings_gin = compute_equivalence_classes(filepath, gin_model, input_dim)
    print(f"Number of unique embeddings with GIN: {num_unique_embeddings_gin}\n")

    # Compute the number of unique embeddings using PNA
    print("Computing equivalence classes using PNA model...")
    num_unique_embeddings_pna = compute_equivalence_classes(filepath, pna_model, input_dim)
    print(f"Number of unique embeddings with PNA: {num_unique_embeddings_pna}\n")



------------- Processing ../data/BREC/Distance_Regular/clo_G_Distance_Regular_dataset_dummy.pkl...
Computing equivalence classes using GIN model...
embeddings:  {466849564612, 34785838543400, 6916859309, 2162355892912, 2523934398525, 322262944637, 62518170206}
Number of unique embeddings with GIN: 7

Computing equivalence classes using PNA model...
embeddings:  {267618, 618403, 256965, 466823, 216104, 279598, 68847}
Number of unique embeddings with PNA: 7

------------- Processing ../data/BREC/Distance_Regular/clo_G_Distance_Regular_dataset.pkl...
Computing equivalence classes using GIN model...
embeddings:  {2551778690, 1642392394, 12874603253, 16375521270, 488394101, 483693049, 1842147293}
Number of unique embeddings with GIN: 7

Computing equivalence classes using PNA model...
embeddings:  {27291, 85065, 18154, 79086, 14550, 16251, 69724}
Number of unique embeddings with PNA: 7



## Distinguishing Test

In [174]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import GINConv, SumPooling, PNAConv
import networkx as nx
import random
import pickle
import numpy as np
import glob

# Define a GIN model
class GIN(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers):
        super(GIN, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()

        # Input layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(in_feats, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(GINConv(
                nn.Sequential(
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim)
                ), 'sum'))
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        # Output layer
        self.layers.append(GINConv(
            nn.Sequential(
                nn.Linear(hidden_dim, out_dim),
                nn.ReLU(),
                nn.Linear(out_dim, out_dim)
            ), 'sum'))
        self.batch_norms.append(nn.BatchNorm1d(out_dim))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer, batch_norm in zip(self.layers, self.batch_norms):
            h = layer(g, h)
            h = batch_norm(h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding

# Define a PNA model
class PNA(nn.Module):
    def __init__(self, in_feats, hidden_dim, out_dim, num_layers, aggregators, scalers, deg):
        super(PNA, self).__init__()
        self.in_feats = in_feats
        self.layers = nn.ModuleList()

        # Input layer
        self.layers.append(PNAConv(in_feats, hidden_dim, aggregators, scalers, deg))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(PNAConv(hidden_dim, hidden_dim, aggregators, scalers, deg))

        # Output layer
        self.layers.append(PNAConv(hidden_dim, out_dim, aggregators, scalers, deg))

        # Pooling layer
        self.pool = SumPooling()

    def forward(self, g, h):
        h = torch.round(h * 100) / 100
        for layer in self.layers:
            h = layer(g, h)
            h = F.relu(h)
        g_embedding = self.pool(g, h)
        return g_embedding



for filepath in glob.glob("../data/BREC/Distance_Regular/clo_G_Distance_Regular_dataset_dummy.pkl"):
    print(f"------------- Processing {filepath}...")

    # Load graphs to determine input_dim
    graphs = load_graphs_from_file(filepath)

    non_isomorphic_pairs, isomorphic_pairs = organize_pairs(graphs, G_Distance_Regular_original_indices)

    if 'x' in graphs[0].ndata:
        input_dim = graphs[0].ndata['x'].size(1)  # Determine the input dimension dynamically
    else:
        # If 'x' does not exist, assume a default input dimension
        input_dim = 1


    # Initialize the models
    gin_model = GIN(input_dim, hidden_dim=16, out_dim=8, num_layers=4).to(device)
    pna_model = PNA(input_dim, hidden_dim=16, out_dim=8, num_layers=4,
                    aggregators=['mean', 'max', 'sum', 'min', 'std'],
                    scalers=['identity', 'amplification', 'attenuation'],
                    deg=torch.tensor([1.0]).to(device)).to(device)  # Example degree tensor

    # Compute the number of unique embeddings using GIN
    print("Using GIN model...")
    non_isomorphic_different_count, isomorphic_same_count, isomorphic_different_count = evaluate_model_with_pairs(non_isomorphic_pairs, isomorphic_pairs, gin_model, input_dim)

    # Compute the number of unique embeddings using PNA
    print("Using PNA model...")
    non_isomorphic_different_count, isomorphic_same_count, isomorphic_different_count = evaluate_model_with_pairs(non_isomorphic_pairs, isomorphic_pairs, pna_model, input_dim)


------------- Processing ../data/BREC/Distance_Regular/clo_G_Distance_Regular_dataset_dummy.pkl...
Using GIN model...
Epoch 1/100, Loss: 1.0000
Epoch 2/100, Loss: 1.0000
Epoch 3/100, Loss: 1.0000
Epoch 4/100, Loss: 1.0000
Epoch 5/100, Loss: 1.0000
Epoch 6/100, Loss: 1.0000
Epoch 7/100, Loss: 1.0000
Epoch 8/100, Loss: 1.0000
Epoch 9/100, Loss: 1.0000
Epoch 10/100, Loss: 1.0000
Epoch 11/100, Loss: 1.0000
Epoch 12/100, Loss: 1.0000
Epoch 13/100, Loss: 0.9999
Epoch 14/100, Loss: 0.9999
Epoch 15/100, Loss: 1.0000
Epoch 16/100, Loss: 1.0000
Epoch 17/100, Loss: 0.9999
Epoch 18/100, Loss: 0.9999
Epoch 19/100, Loss: 0.9999
Epoch 20/100, Loss: 0.9999
Epoch 21/100, Loss: 0.9999
Epoch 22/100, Loss: 0.9999
Epoch 23/100, Loss: 0.9999
Epoch 24/100, Loss: 0.9999
Epoch 25/100, Loss: 0.9999
Epoch 26/100, Loss: 0.9999
Epoch 27/100, Loss: 0.9999
Epoch 28/100, Loss: 0.9999
Epoch 29/100, Loss: 0.9999
Epoch 30/100, Loss: 0.9999
Epoch 31/100, Loss: 0.9999
Epoch 32/100, Loss: 0.9999
Epoch 33/100, Loss: 0.9999
