In [86]:
import torch
import os
from torch_geometric.data import Data
from torch_geometric.data import InMemoryDataset
import torch_geometric.utils
import torch.nn.functional as F
from torch import Tensor
from typing import Tuple
from typing import List

# Define the paths to each file
base_path = r'C:\Users\firda\Downloads\QM9\QM9' 
adjacency_file = os.path.join(base_path, 'QM9_A.txt')
graph_indicator_file = os.path.join(base_path, 'QM9_graph_indicator.txt')
node_attributes_file = os.path.join(base_path, 'QM9_node_attributes.txt')  

# Load adjacency list
edges = []
with open(adjacency_file, 'r') as f:
    for line in f:
        src, dst = map(int, line.strip().split(','))
        edges.append((src - 1, dst - 1))  # Subtract 1 to start node indexing from 0

# Load graph indicators
graph_indicator = []
with open(graph_indicator_file, 'r') as f:
    for line in f:
        graph_indicator.append(int(line.strip()) - 1)

# Load node features
node_features = []
with open(node_attributes_file, 'r') as f:
    for line in f:
        features = list(map(float, line.strip().split(',')))
        node_features.append(features)

# Convert lists to tensors
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
node_features = torch.tensor(node_features, dtype=torch.float)
graph_indicator = torch.tensor(graph_indicator, dtype=torch.long)
#print(edge_index)
#print(graph_indicator)
#print(node_features)

In [84]:
# Define the number of graphs to process since they are much more than 1000
num_graphs_to_process = 1000
data_list = []  # List to store Data objects for each graph

for g_id in range(num_graphs_to_process):
    # Identify nodes belonging to the current graph
    graph_nodes = torch.where(graph_indicator == g_id)[0]
    
    # Select node features for this graph
    x = node_features[graph_nodes]
    
    # Filter edges for this graph based on the graph_indicator
    mask = (graph_indicator[edge_index[0]] == g_id) & (graph_indicator[edge_index[1]] == g_id)
    edge_index_graph = edge_index[:, mask]
    
    # Reindex the edges for this graph
    node_map = {old_idx: new_idx for new_idx, old_idx in enumerate(graph_nodes.tolist())}
    edge_index_graph = torch.tensor([
        [node_map[i.item()] for i in edge_index_graph[0]],
        [node_map[i.item()] for i in edge_index_graph[1]]
    ], dtype=torch.long)
    
    # Create the Data object for the current graph and add it to the list
    data = Data(x=x, edge_index=edge_index_graph)
    data_list.append(data)
print(data_list)


[Data(x=[5, 16], edge_index=[2, 8]), Data(x=[4, 16], edge_index=[2, 6]), Data(x=[3, 16], edge_index=[2, 4]), Data(x=[4, 16], edge_index=[2, 6]), Data(x=[3, 16], edge_index=[2, 4]), Data(x=[4, 16], edge_index=[2, 6]), Data(x=[8, 16], edge_index=[2, 14]), Data(x=[6, 16], edge_index=[2, 10]), Data(x=[7, 16], edge_index=[2, 12]), Data(x=[6, 16], edge_index=[2, 10]), Data(x=[7, 16], edge_index=[2, 12]), Data(x=[6, 16], edge_index=[2, 10]), Data(x=[11, 16], edge_index=[2, 20]), Data(x=[9, 16], edge_index=[2, 16]), Data(x=[9, 16], edge_index=[2, 16]), Data(x=[9, 16], edge_index=[2, 18]), Data(x=[7, 16], edge_index=[2, 14]), Data(x=[10, 16], edge_index=[2, 18]), Data(x=[9, 16], edge_index=[2, 16]), Data(x=[8, 16], edge_index=[2, 14]), Data(x=[14, 16], edge_index=[2, 26]), Data(x=[12, 16], edge_index=[2, 22]), Data(x=[6, 16], edge_index=[2, 10]), Data(x=[5, 16], edge_index=[2, 8]), Data(x=[4, 16], edge_index=[2, 6]), Data(x=[6, 16], edge_index=[2, 10]), Data(x=[5, 16], edge_index=[2, 8]), Data(

In [94]:
def generate_feature_mask(feature_matrix: torch.Tensor) -> torch.Tensor:
    """
    Generates a feature type mask indicating discrete and continuous features for all features in a graph.

    Args:
        feature_matrix (torch.Tensor): Node feature matrix (num_nodes, num_features).

    Returns:
        torch.Tensor: A mask where 1 indicates discrete features and 0 indicates continuous features.
    """
    # Detect discrete indices
    discrete_indices = detect_discrete_indices(feature_matrix)

    # Initialize a mask with zeros (all features are continuous by default)
    feature_types = torch.zeros(feature_matrix.size(1), dtype=torch.int)

    # Set discrete feature indices to 1
    feature_types[discrete_indices] = 1

    return feature_types

In [110]:
feature_types = generate_feature_mask(data_list[3].x)
print("Feature Types Mask:", feature_types)

Feature Types Mask: tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1], dtype=torch.int32)


In [112]:
print(data_list[3].x)
P = torch.ones(data_list[3].x.size())
print(P)

tensor([[ 0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  6.0000,  0.0000,  0.0000,
          0.0000,  1.0000,  0.0000,  0.0000,  1.0000,  0.5995,  0.0000,  1.0000],
        [ 0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  6.0000,  0.0000,  0.0000,
          0.0000,  1.0000,  0.0000,  0.0000,  1.0000, -0.5995,  0.0000,  1.0000],
        [ 1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000, -1.6616,  0.0000,  1.0000],
        [ 1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.6616,  0.0000,  1.0000]])
tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])


In [121]:
def forward(V_x: Tensor, P_x: Tensor, feature_types: Tensor) -> Tensor:
    """
    Differentiable forward pass for training.

    Args:
        V_x (Tensor): Original node feature matrix (num_nodes, num_features).
        P_x (Tensor): Perturbation matrix (num_nodes, num_features).
        feature_types (Tensor): Feature type mask (1 for discrete, 0 for continuous).

    Returns:
        Tensor: Perturbed feature matrix (V_pert) with continuous perturbations.
    """
    # Apply sigmoid to create continuous perturbations
    real_perturbations = F.sigmoid(P_x)

    features_add = False
    if not features_add:
        # Apply continuous perturbations to input features
        V_pert = real_perturbations * V_x
    else:
        # Use perturbations directly if features_add is True
        V_pert = real_perturbations

    return V_pert, real_perturbations, P_x
def forward_prediction(V_x: Tensor, P_x: Tensor, feature_types: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
    
    real_perturbations = F.sigmoid(P_x)
    
    # Binary thresholding for discrete features
    binary_perturbations = (real_perturbations >= 0.5).float()
    
    perturbations = torch.where(feature_types == 1, binary_perturbations, real_perturbations)
    
    features_add = False 
    
    # Apply perturbations to the input features
    if not features_add:
        
        V_pert = perturbations * V_x
    else:
        
        V_pert = perturbations  # Use perturbations directly if features_add is True

    return V_pert, perturbations, P_x

In [122]:
T1, T2, T3 = forward(data_list[3].x, P, feature_types)
print(T1)
print(T2)
print(T3)
print(data_list[3].x)

tensor([[ 0.0000,  0.7311,  0.0000,  0.0000,  0.0000,  4.3864,  0.0000,  0.0000,
          0.0000,  0.7311,  0.0000,  0.0000,  0.7311,  0.4383,  0.0000,  0.7311],
        [ 0.0000,  0.7311,  0.0000,  0.0000,  0.0000,  4.3864,  0.0000,  0.0000,
          0.0000,  0.7311,  0.0000,  0.0000,  0.7311, -0.4383,  0.0000,  0.7311],
        [ 0.7311,  0.0000,  0.0000,  0.0000,  0.0000,  0.7311,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000, -1.2147,  0.0000,  0.7311],
        [ 0.7311,  0.0000,  0.0000,  0.0000,  0.0000,  0.7311,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.2147,  0.0000,  0.7311]])
tensor([[0.7311, 0.7311, 0.7311, 0.7311, 0.7311, 0.7311, 0.7311, 0.7311, 0.7311,
         0.7311, 0.7311, 0.7311, 0.7311, 0.7311, 0.7311, 0.7311],
        [0.7311, 0.7311, 0.7311, 0.7311, 0.7311, 0.7311, 0.7311, 0.7311, 0.7311,
         0.7311, 0.7311, 0.7311, 0.7311, 0.7311, 0.7311, 0.7311],
        [0.7311, 0.7311, 0.7311, 0.7311, 0.7311, 0.73

In [124]:
T4, T5, T6 = forward_prediction(data_list[3].x, P, feature_types)
print(T4)
print(T5)
print(T6)
print(data_list[3].x)

tensor([[ 0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  6.0000,  0.0000,  0.0000,
          0.0000,  1.0000,  0.0000,  0.0000,  1.0000,  0.4383,  0.0000,  1.0000],
        [ 0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  6.0000,  0.0000,  0.0000,
          0.0000,  1.0000,  0.0000,  0.0000,  1.0000, -0.4383,  0.0000,  1.0000],
        [ 1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000, -1.2147,  0.0000,  1.0000],
        [ 1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.2147,  0.0000,  1.0000]])
tensor([[1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
         1.0000, 1.0000, 1.0000, 1.0000, 0.7311, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
         1.0000, 1.0000, 1.0000, 1.0000, 0.7311, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.00