In [2]:
%load_ext autoreload
%autoreload 2

import pandas as pd
from analysis import *
from getters import * 
from plotters import * 
from helpers import *

In [3]:

gcn_args = {
    "num_epochs":50, 
    "lr": 0.0001,
    "weight_decay":5e-4, 
    "hidden_dim":64,
    "dropout":0,
    "threshold":"median", # Threshold the graph adjacency matrix. Possible values: no_threshold, median, mean
    "model_name":"gcn",
    "layers":2,
    "evaluation_method": "model_assessment" # model selection or model assessment
}

gcn_3_args = {
    "num_epochs":50, 
    "lr": 0.0001,
    "weight_decay":5e-4, 
    "hidden_dim":64,
    "dropout":0,
    "threshold":"median", # Threshold the graph adjacency matrix. Possible values: no_threshold, median, mean
    "model_name":"gcn",
    "layers":3,
    "evaluation_method": "model_assessment" # model selection or model assessment
}

gcn_student_args = {
    "num_epochs":50, 
    "lr": 0.0001, 
    "weight_decay":5e-4, 
    "hidden_dim":64,
    "dropout":0,
    "threshold":"median", # Threshold the graph adjacency matrix. Possible values: no_threshold, median, mean
    "model_name":"gcn_student",
    "evaluation_method": "model_assessment", # model selection or model assessment
    "alpha_ce": 1, 
    "T": 3, 
    "alpha_soft_ce": 2,
    "alpha_weight": 0
}

## Model Size 

In [5]:
model_1_dir = '/Users/lorenzostigliano/Documents/University/Imperial/Summer Term/thesis-imperial/model_data/model_assessment/gcn_student/models/gcn_student_MainModel_3Fold_gender_data_gcn_student_run_0_fixed_init_CV_0_view_0.pt'
model_2_dir = '/Users/lorenzostigliano/Documents/University/Imperial/Summer Term/thesis-imperial/model_data/model_assessment/gcn/models/gcn_MainModel_3Fold_gender_data_gcn_run_0_fixed_init_CV_0_view_0.pt'

model_layer_1 = torch.load(model_1_dir)
model_layer_2 = torch.load(model_2_dir)


In [6]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

count_parameters(model_layer_1), count_parameters(model_layer_2)

(108, 2470)

## Inference time 

In [98]:
import torch

def lsp(node_embeddings, adjacency_matrix, sigma=1.0):

    # Compute the squared Euclidean distance matrix between node embeddings
    squared_distances = torch.cdist(node_embeddings, node_embeddings, p=2).pow(2)
    
    # Apply the RBF kernel to the squared distance matrix
    similarity_matrix = torch.exp(-squared_distances / (2 * sigma**2))
    
    # Cast the adjacency matrix to Float
    adjacency_matrix = adjacency_matrix.float()
    
    # Compute the sum of similarities for each node's neighbors
    sum_similarities = torch.sum(adjacency_matrix * similarity_matrix, dim=1)
    
    # Compute the local structure by dividing each node's similarity by the sum
    local_structure = similarity_matrix / sum_similarities.unsqueeze(1)
    
    return local_structure

# Example usage
node_embeddings = torch.tensor([[1.0,1.0], 
                                [2.0,2.0], 
                                [3.0,3.0]])

adjacency_matrix = torch.tensor([[1, 0, 1], 
                                 [0, 0, 1], 
                                 [1, 1, 1]])

local_structure = lsp(node_embeddings, adjacency_matrix, sigma=1.0)

In [99]:
def extract_ls_vectors(local_structure, adjacency_matrix):
    # Create a sparse mask tensor from the adjacency matrix
    mask = adjacency_matrix.to_sparse().to_dense()
    
    # Multiply the mask tensor element-wise with the local structure tensor
    ls_vectors = mask * local_structure

    non_zero_rows = []
    for row in ls_vectors:
        # Select non-zero elements in the row
        non_zero_elements = row[row != 0.0]
        non_zero_rows.append(non_zero_elements)
    
    return non_zero_rows

# Example usage
ls_vectors = extract_ls_vectors(local_structure, adjacency_matrix)

print(ls_vectors)

[tensor([0.9820, 0.0180]), tensor([1.]), tensor([0.0132, 0.2654, 0.7214])]


In [114]:
import time
from torch.autograd import Variable

def inference_time(model, model_args):
    
    model.eval()
    
    G_list = load_data("gender_data", 0, NormalizeInputGraphs=False)

    folds = stratify_splits(G_list, 3)
        
    [random.shuffle(folds[i]) for i in range(len(folds))]
    train_set, validation_set, test_set = datasets_splits(folds, model_args, 0)
    train_dataset, val_dataset, threshold_value = model_assessment_split(train_set, validation_set, test_set, gcn_student_args)

    for batch_idx, data in enumerate(train_dataset):

        adj = Variable(data['adj'].float(), requires_grad=False).to(device)
        adj = torch.squeeze(adj)

        features = np.identity(adj.shape[0])
        features = Variable(torch.from_numpy(features).float(), requires_grad=False).to("cpu")
        if gcn_student_args["threshold"] in ["median", "mean"]:
            adj = torch.where(adj > threshold_value, torch.tensor([1.0]).to("cpu"), torch.tensor([0.0]).to("cpu"))

        begin_time = time.time()

        _, node_embeddings = model(features, adj)
        extract_ls_vectors(lsp(node_embeddings, adj),adj)
        print(len(extract_ls_vectors(lsp(node_embeddings, adj),adj)))
        
        student_ls_vectors_reshaped = torch.cat(student_non_zero_rows, dim=0).view(-1, student_non_zero_rows[0].size(-1))
        teacher_ls_vectors_reshaped = torch.cat(teacher_non_zero_rows, dim=0).view(-1, teacher_non_zero_rows[0].size(-1))

        return time.time() - begin_time
      
      

In [115]:
inference_time(model_layer_1, gcn_student_args)

Num training graphs:  466 ; Num test graphs:  232
35


0.006639003753662109