NEW TRY

In [1]:
import sys
sys.path.append('../../')
import torch
from torch_geometric.data import Data, Dataset
import networkx as nx
from networkx.algorithms.centrality import betweenness_centrality

from Datasets.synthetics import BA_2grid, BA_2grid_house, ProbingDataset, BA_2grid_to_test, BA_2grid_house_with_node_degree_as_features_and_expand_10_dimensions
from sklearn.model_selection import train_test_split
from torch_geometric.loader import DataLoader

import torch_geometric.utils as pyg_utils
import pickle as pkl
from torch_geometric.utils import from_networkx
import random
import torch.nn as nn

from models.models_BA_2grid import GIN_framework as framework
import torch.nn.functional as F
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import torch.optim as optim

from sklearn.metrics import r2_score
import matplotlib.pyplot as plt

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
def set_seed(seed):
    if seed == -1:
        seed = random.randint(0, 1000)
    # Pandas also uses np random state by default
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)

    # if you are using GPU
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.enabled = False
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

In [4]:
set_seed(43)

probe

In [5]:
MODEL = "RGCN"
DATASET = "BA_2grid_house"

# import the model
from models.models_BA_2grid_house import RGCN_framework as framework
# import the dataset
from Datasets.synthetics import BA_2grid_house_with_node_degree_as_features_and_expand_10_dimensions

In [6]:
# inizialize the framework
dataset = BA_2grid_house_with_node_degree_as_features_and_expand_10_dimensions()
gnn = framework(dataset,device="cpu")



In [7]:
# the gnn object contains the train test split and the model.

print(gnn.model)
print(gnn.train_idx)
print(gnn.dataset[gnn.train_idx])

Net(
  (conv1): RGCNConv(10, 30, num_relations=2)
  (conv2): RGCNConv(30, 30, num_relations=2)
  (lin1): Linear(in_features=30, out_features=30, bias=True)
  (lin2): Linear(in_features=30, out_features=2, bias=True)
)
tensor([ 701, 1225,   47,  ...,   40,  423,  494])
BA_2grid_house_with_node_degree_as_features_and_expand_10_dimensions(1600)


In [8]:
#now that the model is instaziated, we have to load the weights
gnn.load_model("models/"+DATASET+"_"+MODEL)
print("we loaded the weights")
# right now the model has trained weights.
print(gnn.model)

INFO:root:Model loaded from: models/BA_2grid_house_RGCN


we loaded the weights
Net(
  (conv1): RGCNConv(10, 30, num_relations=2)
  (conv2): RGCNConv(30, 30, num_relations=2)
  (lin1): Linear(in_features=30, out_features=30, bias=True)
  (lin2): Linear(in_features=30, out_features=2, bias=True)
)


In [9]:
gnn.evaluate()

INFO:root:Train Loss: 0.303, Train Acc: 0.846, Test Loss: 0.368, Test Acc: 0.823


In [10]:
import networkx as nx

def compute_graph_properties(data):
    properties = []
    for graph_data in data:
        G = nx.from_edgelist(graph_data.edge_index.t().tolist())
        num_nodes = G.number_of_nodes()
        num_edges = G.number_of_edges()
        density = nx.density(G)
        avg_path_len = nx.average_shortest_path_length(G) if nx.is_connected(G) else None
        num_cliques = len(list(nx.find_cliques(G)))
        #small_world = nx.algorithms.smallworld.sigma(G)

        properties.append((num_nodes, num_edges, density, avg_path_len, num_cliques)) #, small_world))
    return properties

train_properties = compute_graph_properties(gnn.dataset[gnn.train_idx])
test_properties = compute_graph_properties(gnn.dataset[gnn.test_idx])


In [11]:
#save the properties in a file
# with open("results/"+DATASET+"_"+MODEL+"_train_properties.pkl", "wb") as f:
#     pkl.dump(train_properties, f)

# with open("results/"+DATASET+"_"+MODEL+"_test_properties.pkl", "wb") as f:
#     pkl.dump(test_properties, f)

    

In [12]:
len(test_properties)

400

In [13]:
train_features, test_features = gnn.evaluate_with_features2()

In [14]:
len(train_features), len(test_features)

(1600, 400)

In [15]:
len(train_features[0])

5

### probing

In [16]:
# Evaluate to get features
train_features, test_features = gnn.evaluate_with_features2()

# Extract x_global and x_lin1 embeddings for training set
train_x_global = np.array([feat[0] for feat in train_features])
train_x_lin1 = np.array([feat[1] for feat in train_features])

#take only the first 10 elements
# train_x_global = train_x_global[:10]
# train_x_lin1 = train_x_lin1[:10]

# Extract x_global and x_lin1 embeddings for test set
test_x_global = np.array([feat[0] for feat in test_features])
test_x_lin1 = np.array([feat[1] for feat in test_features])

#take only the first 10 elements
# test_x_global = test_x_global[:10]
# test_x_lin1 = test_x_lin1[:10]


linear regression

In [17]:
import numpy as np

# Prepare training data
train_x = train_x_global  # or use train_x_lin1 if you prefer
train_y = np.array(train_properties)

# Prepare testing data
test_x = test_x_global  # or use test_x_lin1 if you prefer
test_y = np.array(test_properties)


In [18]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Train a regression model for each graph property
models = []
for i in range(train_y.shape[1]):
    model = LinearRegression()
    model.fit(train_x, train_y[:, i])
    models.append(model)

# Evaluate the models
for i, model in enumerate(models):
    train_pred = model.predict(train_x)
    test_pred = model.predict(test_x)
    
    train_mse = mean_squared_error(train_y[:, i], train_pred)
    test_mse = mean_squared_error(test_y[:, i], test_pred)
    
    print(f'Property {i}: Train MSE: {train_mse:.3f}, Test MSE: {test_mse:.3f}')


Property 0: Train MSE: 8.223, Test MSE: 8.049
Property 1: Train MSE: 10.675, Test MSE: 10.887
Property 2: Train MSE: 0.000, Test MSE: 0.000
Property 3: Train MSE: 0.241, Test MSE: 0.244
Property 4: Train MSE: 11.329, Test MSE: 11.301


Linear model

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim

class LinearModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(LinearModel, self).__init__()
        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        return self.linear(x)


In [17]:
# Convert to PyTorch tensors
train_x = torch.tensor(train_x_global, dtype=torch.float32)
train_y = torch.tensor(train_properties, dtype=torch.float32)

test_x = torch.tensor(test_x_global, dtype=torch.float32)
test_y = torch.tensor(test_properties, dtype=torch.float32)


In [18]:
len(train_x)

1600

In [19]:
train_y[:, 4]

tensor([28., 30., 30.,  ..., 26., 29., 22.])

In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Define the linear model
class LinearModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(LinearModel, self).__init__()
        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        return self.linear(x)

# Assume we have already evaluated to get features
# train_features, test_features = gnn.evaluate_with_features()

# Extract embeddings (in order x1, x2, x_global, x_lin1, out)
train_x1 = np.array([feat[0] for feat in train_features])
test_x1 = np.array([feat[0] for feat in test_features])

# Extract 5th, 6th and 7th embeddings
train_x2 = np.array([feat[1] for feat in train_features])
test_x2 = np.array([feat[1] for feat in test_features])

train_x_global = np.array([feat[2] for feat in train_features])
test_x_global = np.array([feat[2] for feat in test_features])

train_x_lin1 = np.array([feat[3] for feat in train_features])
test_x_lin1 = np.array([feat[3] for feat in test_features])

train_out = np.array([feat[4] for feat in train_features])
test_out = np.array([feat[4] for feat in test_features])

# Compute graph properties
train_properties = compute_graph_properties(gnn.dataset[gnn.train_idx])
test_properties = compute_graph_properties(gnn.dataset[gnn.test_idx])

# Convert to PyTorch tensors
train_x = torch.tensor(train_x1, dtype=torch.float32)
train_y = torch.tensor(train_properties, dtype=torch.float32)

test_x = torch.tensor(test_x1, dtype=torch.float32)
test_y = torch.tensor(test_properties, dtype=torch.float32)

train_x2 = torch.tensor(train_x2, dtype=torch.float32)
test_x2 = torch.tensor(test_x2, dtype=torch.float32)

train_x_global = torch.tensor(train_x_global, dtype=torch.float32)
test_x_global = torch.tensor(test_x_global, dtype=torch.float32)

train_x_lin1 = torch.tensor(train_x_lin1, dtype=torch.float32)
test_x_lin1 = torch.tensor(test_x_lin1, dtype=torch.float32)

train_out = torch.tensor(train_out, dtype=torch.float32)
test_out = torch.tensor(test_out, dtype=torch.float32)

# Train and evaluate a model for each graph property and each embeddings
input_size = train_x.shape[1]
output_size = 1  # Predicting one property at a time


property_names = ['num_nodes', 'num_edges', 'density', 'avg_path_len', 'num_cliques']

embeddings = [train_x, train_x2, train_x_global, train_x_lin1, train_out]
embeddings_names = ['train_x', 'train_x2', 'train_x_global', 'train_x_lin1', 'train_out']

for j, embedding in enumerate(embeddings):
    # Train and evaluate a model for each graph property
    for i, property_name in enumerate(property_names):
        model = LinearModel(input_size, output_size)
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        num_epochs = 10000

        for epoch in range(num_epochs):
            model.train()
            optimizer.zero_grad()

            outputs = model(embedding).squeeze()
            target = train_y[:, i].squeeze()

            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()

            if (epoch+1) % 1000 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

        # Evaluate the model
        model.eval()
        with torch.no_grad():
            train_pred = model(embedding).squeeze().cpu().numpy()
            test_pred = model(test_x).squeeze().cpu().numpy()

            train_target = train_y[:, i].cpu().numpy()
            test_target = test_y[:, i].cpu().numpy()

            train_mse = mean_squared_error(train_target, train_pred)
            test_mse = mean_squared_error(test_target, test_pred)

            train_r2 = r2_score(train_target, train_pred)
            test_r2 = r2_score(test_target, test_pred)

            print(f'Embedding: {embedding}')
            #print the name of the embedding, literally the name of the variable
            print(f'Embedding name: {embeddings_names[j]}')
            print(f'Property: {property_name}')
            print(f'  Train MSE: {train_mse:.4f}, Test MSE: {test_mse:.4f}')
            print(f'  Train R²: {train_r2:.4f}, Test R²: {test_r2:.4f}')


Epoch [1000/10000], Loss: 65.8506
Epoch [2000/10000], Loss: 37.3973
Epoch [3000/10000], Loss: 32.1226
Epoch [4000/10000], Loss: 29.7836
Epoch [5000/10000], Loss: 27.3770
Epoch [6000/10000], Loss: 24.9685
Epoch [7000/10000], Loss: 22.6811
Epoch [8000/10000], Loss: 20.5124
Epoch [9000/10000], Loss: 18.4646
Epoch [10000/10000], Loss: 16.5123
Embedding: tensor([[0.0000, 0.9567, 4.4559,  ..., 0.0000, 0.7612, 3.0745],
        [0.0000, 0.4834, 3.9561,  ..., 0.0000, 0.9605, 3.2237],
        [1.0854, 0.0000, 0.0000,  ..., 3.2084, 2.7875, 3.3425],
        ...,
        [2.0631, 0.0000, 0.0000,  ..., 4.2904, 2.9038, 2.8054],
        [2.0631, 0.0000, 0.0000,  ..., 4.2904, 2.9038, 2.8054],
        [0.0000, 0.0000, 1.2693,  ..., 0.0000, 1.3425, 2.8854]])
Embedding name: train_x
Property: num_nodes
  Train MSE: 16.5104, Test MSE: 15.5250
  Train R²: -0.9992, Test R²: -0.9291
Epoch [1000/10000], Loss: 108.2927
Epoch [2000/10000], Loss: 55.5548
Epoch [3000/10000], Loss: 44.1973
Epoch [4000/10000], Loss:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1600x2 and 30x1)

In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
output_size = 1  # Predicting one property at a time
# Define the linear model
class LinearModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(LinearModel, self).__init__()
        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        return self.linear(x)

# Assume we have already evaluated to get features
# train_features, test_features = gnn.evaluate_with_features()

# Extract embeddings (in order x1, x2, x_global, x_lin1, out)
train_x1 = np.array([feat[0] for feat in train_features])
test_x1 = np.array([feat[0] for feat in test_features])

# Extract 5th, 6th and 7th embeddings
train_x2 = np.array([feat[1] for feat in train_features])
test_x2 = np.array([feat[1] for feat in test_features])

train_x_global = np.array([feat[2] for feat in train_features])
test_x_global = np.array([feat[2] for feat in test_features])

train_x_lin1 = np.array([feat[3] for feat in train_features])
test_x_lin1 = np.array([feat[3] for feat in test_features])

train_out = np.array([feat[4] for feat in train_features])
test_out = np.array([feat[4] for feat in test_features])

# Compute graph properties
train_properties = compute_graph_properties(gnn.dataset[gnn.train_idx])
test_properties = compute_graph_properties(gnn.dataset[gnn.test_idx])

# Convert to PyTorch tensors
train_x = torch.tensor(train_x1, dtype=torch.float32)
train_y = torch.tensor(train_properties, dtype=torch.float32)

test_x = torch.tensor(test_x1, dtype=torch.float32)
test_y = torch.tensor(test_properties, dtype=torch.float32)

train_x2 = torch.tensor(train_x2, dtype=torch.float32)
test_x2 = torch.tensor(test_x2, dtype=torch.float32)

train_x_global = torch.tensor(train_x_global, dtype=torch.float32)
test_x_global = torch.tensor(test_x_global, dtype=torch.float32)

train_x_lin1 = torch.tensor(train_x_lin1, dtype=torch.float32)
test_x_lin1 = torch.tensor(test_x_lin1, dtype=torch.float32)

train_out = torch.tensor(train_out, dtype=torch.float32)
test_out = torch.tensor(test_out, dtype=torch.float32)

# Train and evaluate a model for each graph property and each embedding
property_names = ['num_nodes', 'num_edges', 'density', 'avg_path_len', 'num_cliques']
embeddings = [(train_x, test_x), (train_x2, test_x2), (train_x_global, test_x_global), (train_x_lin1, test_x_lin1), (train_out, test_out)]

for train_embedding, test_embedding in embeddings:
    input_size = train_embedding.shape[1]

    for i, property_name in enumerate(property_names):
        model = LinearModel(input_size, output_size)
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        num_epochs = 10000  # Adjust this as needed

        for epoch in range(num_epochs):
            model.train()
            optimizer.zero_grad()

            outputs = model(train_embedding).squeeze()
            target = train_y[:, i].squeeze()

            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()

            if (epoch+1) % 1000 == 0:  # Adjust this for more frequent/lower print frequency
                print(f'Epoch [{epoch+1}/{num_epochs}], Property: {property_name}, Loss: {loss.item():.4f}')

        # Evaluate the model
        model.eval()
        with torch.no_grad():
            train_pred = model(train_embedding).squeeze().cpu().numpy()
            test_pred = model(test_embedding).squeeze().cpu().numpy()

            train_target = train_y[:, i].cpu().numpy()
            test_target = test_y[:, i].cpu().numpy()

            train_mse = mean_squared_error(train_target, train_pred)
            test_mse = mean_squared_error(test_target, test_pred)

            train_r2 = r2_score(train_target, train_pred)
            test_r2 = r2_score(test_target, test_pred)

            print(f'Embedding: {train_embedding.shape}')
            print(f'Property: {property_name}')
            print(f'  Train MSE: {train_mse:.4f}, Test MSE: {test_mse:.4f}')
            print(f'  Train R²: {train_r2:.4f}, Test R²: {test_r2:.4f}')


Epoch [1000/10000], Property: num_nodes, Loss: 65.6111
Epoch [2000/10000], Property: num_nodes, Loss: 38.4882
Epoch [3000/10000], Property: num_nodes, Loss: 33.1509
Epoch [4000/10000], Property: num_nodes, Loss: 30.5764
Epoch [5000/10000], Property: num_nodes, Loss: 27.8995
Epoch [6000/10000], Property: num_nodes, Loss: 25.2782
Epoch [7000/10000], Property: num_nodes, Loss: 22.8601
Epoch [8000/10000], Property: num_nodes, Loss: 20.6307
Epoch [9000/10000], Property: num_nodes, Loss: 18.5701
Epoch [10000/10000], Property: num_nodes, Loss: 16.6302
Embedding: torch.Size([1600, 30])
Property: num_nodes
  Train MSE: 16.6283, Test MSE: 15.6328
  Train R²: -1.0134, Test R²: -0.9425
Epoch [1000/10000], Property: num_edges, Loss: 92.6828
Epoch [2000/10000], Property: num_edges, Loss: 52.3110
Epoch [3000/10000], Property: num_edges, Loss: 43.6442
Epoch [4000/10000], Property: num_edges, Loss: 40.3993
Epoch [5000/10000], Property: num_edges, Loss: 37.2498
Epoch [6000/10000], Property: num_edges, L