In [68]:
from scipy.sparse import csgraph
from main.utils import load_dataset
import torch.nn.functional as F
import copy
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score,roc_auc_score
import torch
import torch.nn as nn
from scipy.sparse import coo_matrix
import numpy as np
import pandas as pd
import pickle
import networkx as nx


In [69]:
# %pip install torch-geometric
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree

In [70]:
print('Is GPU available? {}\n'.format(torch.cuda.is_available()))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Is GPU available? False



In [71]:
def write_to_gephi(dataset, gt_y, gt_x, pred_x):
    graph = load_dataset(dataset)
    true_G = nx.from_scipy_sparse_matrix(graph.adj_matrix)
    pred_G = nx.from_scipy_sparse_matrix(graph.adj_matrix)
    true_y = nx.from_scipy_sparse_matrix(graph.adj_matrix)
    
    for i in true_G.nodes:
        true_G.nodes[i]['class'] = gt_x[i]
        pred_G.nodes[i]['class'] = pred_x[i]
        true_y.nodes[i]['inf_rate'] = gt_y[i]
        
    nx.write_gexf(true_G, "{}_GCNSI_real.gexf".format(dataset))
    nx.write_gexf(pred_G, "{}_GCNSI_pred.gexf".format(dataset))
    nx.write_gexf(true_y, "{}_GCNSI_y.gexf".format(dataset))

In [72]:
class GCNConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(GCNConv, self).__init__(aggr='add')  # "Add" aggregation
        self.lin = torch.nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index):
        # Step 1: Add self-loops
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        # Step 2: Multiply with weights
        x = self.lin(x)

        # Step 3: Calculate the normalization
        row, col = edge_index
        deg = degree(row, x.size(0), dtype=x.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        # Step 4: Propagate the embeddings to the next layer
        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x,
                              norm=norm)

    def message(self, x_j, norm):
        # Normalize node features.
        return norm.view(-1, 1) * x_j

In [73]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(4, 128)
        self.conv2 = GCNConv(128, 128)
        self.fc =torch.nn.Linear(128,2)

    def forward(self, x,edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = self.fc(x)

        return x

In [74]:
dataset = 'new_data/Combo_github2stack_'
diffusion_model_proj = 'LT'
diffusion_model_rec = 'IC'
seed_rate = 1
sample = 100
dataset_file = '{}{}2{}_{}_{}.SG'.format(dataset, diffusion_model_proj,
                                    diffusion_model_rec, str(10*seed_rate), sample)
with open(dataset_file, 'rb') as f:
    graph = pickle.load(f)
    
Combined_Graph, adj, prob_matrix, inverse_pairs = graph['Combined_Graph'], graph['adj'], graph['prob'], graph['inverse_pairs']

# with open('new_data/github2stack_IC2SIS_10_100.SG', 'rb') as f:
#     graph = pickle.load(f)

1. training of both networks 

In [75]:
# 0.15
# 0.05
threshold = 0.04

In [76]:
# adj, inverse_pairs, prob = graph['adj_received'], np.array(graph['inverse_pairs_received']), graph['prob_received']

In [77]:
np.unique(inverse_pairs)

array([0., 1.], dtype=float32)

In [78]:
# %pip install --upgrade numpy scipy
print(f"NumPy version: {np.__version__}")
print(f"PyTorch version: {torch.__version__}")

NumPy version: 1.26.2
PyTorch version: 2.1.0+cpu


In [79]:
# Splitting data
random_seed = 42
split_ratio = 0.8

# Define the total number of samples
total_samples = inverse_pairs.shape[0]

# Calculate the number of samples for training and testing
num_train_samples = int(total_samples * split_ratio)
num_test_samples = total_samples - num_train_samples

# Create a list of shuffled indices
indices = list(range(total_samples))
np.random.seed(random_seed)
np.random.shuffle(indices)

# Use the shuffled indices to split the data
train_indices = indices[:num_train_samples]
test_indices = indices[num_train_samples:]

# Split the data into training and testing sets
inverse_pairs_train = inverse_pairs[train_indices]
inverse_pairs_test = inverse_pairs[test_indices]

# print(inverse_pairs)
# print(inverse_pairs_train)
# print(inverse_pairs_test)

In [80]:
S = csgraph.laplacian(adj, normed=False)
print(type(S))

# S = np.array(coo_matrix.todense(S)) REMOVED LINE

<class 'numpy.ndarray'>


In [81]:
num_node = adj.shape[0]

In [82]:
alpha = 0.4

In [83]:
adj_sparse = coo_matrix(adj) # ADDED LINE

coo = adj_sparse.tocoo()
row = torch.from_numpy(coo.row.astype(np.int64)).to(torch.long).to(device)
col = torch.from_numpy(coo.col.astype(np.int64)).to(torch.long).to(device)

In [84]:
edge_index = torch.stack([row, col], dim=0).to(device)

In [85]:
model = Net().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-4)
criterion=torch.nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.5]).to(device)) #[2 3]

In [86]:
import time

In [87]:
begin = time.time()
prediction_train = []
for epoch in range(30):
    print("epoch:" + str(epoch))
    optimizer.zero_grad()

    total_loss=0
    train_acc = 0
    train_pr = 0
    train_re = 0
    train_fs = 0
    train_auc = 0
    for i, influ_mat in enumerate(inverse_pairs_train):
        # print(i)
        seed_vec = influ_mat[0]
        # seed = torch.LongTensor(seed_vec).to(device)
        influ_vec = influ_mat[1]
        # print(influ_mat)
        # print(influ_mat.shape)
        # print(seed_vec)
        # print(influ_vec)
        V3 = copy.copy(influ_vec)
        V4 = copy.copy(influ_vec)
        
        V3[influ_vec < 0.5] =  0.5 # play with this 0
        V4[influ_vec >= 0.5] =  0.5 # 1
        d1 = influ_vec
        d1 = d1[:, np.newaxis]
        d2 = (1 - alpha) * np.matmul(np.linalg.inv(np.eye(N=num_node) - alpha * S), influ_vec)
        d2 = d2[:, np.newaxis]
        d3 = (1 - alpha) * np.matmul(np.linalg.inv(np.eye(N=num_node) - alpha * S), V3)
        d3 = d3[:, np.newaxis]
        d4 = (1 - alpha) * np.matmul(np.linalg.inv(np.eye(N=num_node) - alpha * S), V4)
        d4 = d4[:, np.newaxis]
        x = np.concatenate((d1, d2, d3, d4), axis=1)
        x = torch.tensor(x,dtype=torch.float).to(device)
        # seed_vec = torch.tensor(seed_vec).squeeze(-1).long().to(device)
        seed_vec = seed_vec.clone().detach().requires_grad_(True).squeeze(-1).long().to(device)


        # check github code
        pred = model(x, edge_index)
        # print(pred)

        loss = criterion(pred, seed_vec)
        total_loss += loss
        loss.backward()
        optimizer.step()

        # CHECK THE RANGE OF THIS TO RESET THRESHOLD
        pred = torch.softmax(pred,dim=1)
        pred = pred[:,1].squeeze(-1).cpu().detach().numpy()
        # print(np.unique(pred))

        train_auc += roc_auc_score(seed_vec, pred)

        for i in range(len(pred)):
            if pred[i]>=threshold:
                pred[i] = 1
            else:
                pred[i] = 0

        train_acc += accuracy_score(seed_vec, pred)
        train_pr += precision_score(seed_vec, pred , zero_division=0) # change to 0
        train_re += recall_score(seed_vec, pred, zero_division=0)
        train_fs += f1_score(seed_vec, pred, zero_division=0)


        prediction_train.append(pred)
    
    print("loss:{:0.6f}".format(total_loss/inverse_pairs_train.shape[0]))
    print('training acc:', train_acc / inverse_pairs_train.shape[0])
    print('training pr:', train_pr / inverse_pairs_train.shape[0])
    print('training re:', train_re / inverse_pairs_train.shape[0])
    print('training fs:', train_fs / inverse_pairs_train.shape[0])
    print('training auc:', train_auc / inverse_pairs_train.shape[0])
end = time.time()

epoch:0
[0.32344225 0.360383   0.38254073 ... 0.5458678  0.547978   0.5587175 ]
[0.3186379  0.35456502 0.3697801  ... 0.5424421  0.542574   0.5517381 ]
[0.28516334 0.34013483 0.36088613 ... 0.53713596 0.54604036 0.5523653 ]
[0.26846415 0.33321676 0.34316474 ... 0.5538393  0.5559033  0.56913155]
[0.24957341 0.30920815 0.33885983 ... 0.5291483  0.53076756 0.5497614 ]
[0.22070715 0.27547497 0.30773592 ... 0.5264558  0.5314439  0.5369488 ]
[0.18837304 0.2426808  0.2718312  ... 0.52047664 0.52546054 0.53434247]
[0.14555104 0.21299508 0.24159002 ... 0.513309   0.5152316  0.5154857 ]
[0.1180643  0.1802118  0.20594403 ... 0.50473964 0.5087187  0.51708335]
[0.08616343 0.14096381 0.16943246 ... 0.49178484 0.49489188 0.49900547]
[0.0611346  0.11124712 0.13863358 ... 0.4794088  0.47991663 0.48626524]
[0.04269394 0.0848571  0.11089912 ... 0.46930414 0.47259912 0.47291014]
[0.03106524 0.0672646  0.09178001 ... 0.4617627  0.46479273 0.46821076]
[0.01988619 0.04579748 0.06526633 ... 0.44724792 0.45323

KeyboardInterrupt: 

In [None]:
print(end - begin)


3222.4704134464264


2. testing of the entire network

In [None]:
testing_threshold = 0.05

In [None]:
test_acc = 0
test_pr = 0
test_re = 0
test_fs = 0
test_auc = 0


num_node = adj.shape[0]
# setting the model in evaluation mode
model.eval()


for i, influ_mat in enumerate(inverse_pairs_test):
    print("i={:d}".format(i))
    seed_vec = influ_mat[0]
    influ_vec = influ_mat[1]
    V3 = copy.copy(influ_vec)
    V4 = copy.copy(influ_vec)
    V3[influ_vec < 0.5] =  0.5
    V4[influ_vec >= 0.5] =  0.5
    d1 = influ_vec
    d1 = d1[:, np.newaxis]
    d2 = (1 - alpha) * np.matmul(np.linalg.inv(np.eye(N=num_node) - alpha * S), influ_vec)
    d2 = d2[:, np.newaxis]
    d3 = (1 - alpha) * np.matmul(np.linalg.inv(np.eye(N=num_node) - alpha * S), V3)
    d3 = d3[:, np.newaxis]
    d4 = (1 - alpha) * np.matmul(np.linalg.inv(np.eye(N=num_node) - alpha * S), V4)
    d4 = d4[:, np.newaxis]
    x = np.concatenate((d1, d2, d3, d4), axis=1)
    x = torch.tensor(x,dtype=torch.float).to(device)
    # seed_vec = torch.tensor(seed_vec).squeeze(-1).long().to(device)
    seed_vec = seed_vec.clone().detach().requires_grad_(True).squeeze(-1).long().to(device)

    pred = model(x, edge_index)
    pred = torch.softmax(pred,dim=1)
    pred = pred[:,1].squeeze(-1).cpu().detach().numpy()

    test_auc += roc_auc_score(seed_vec, pred )
        
    for i in range(len(pred)):
        if pred[i]>=testing_threshold:
            pred[i] = 1
        else:
            pred[i] = 0

    print("accuracy: " + str(accuracy_score(seed_vec, pred)))
    print("precision_score: " + str(precision_score(seed_vec, pred, zero_division=0)))
    print("recall_score: " + str(recall_score(seed_vec, pred , zero_division=0)))
    print("f1_score: " + str(f1_score(seed_vec, pred , zero_division=0)))
    print("roc_auc_score: " + str(roc_auc_score(seed_vec, pred)))

    test_acc += accuracy_score(seed_vec, pred)
    test_pr += precision_score(seed_vec, pred , zero_division=0)
    test_re += recall_score(seed_vec, pred , zero_division=0)
    test_fs += f1_score(seed_vec, pred , zero_division=0)


i=0
accuracy: 0.9793956043956044
precision_score: 0.0
recall_score: 0.0
f1_score: 0.0
roc_auc_score: 0.5
i=1
accuracy: 0.9793956043956044
precision_score: 0.0
recall_score: 0.0
f1_score: 0.0
roc_auc_score: 0.5
i=2
accuracy: 0.9793956043956044
precision_score: 0.0
recall_score: 0.0
f1_score: 0.0
roc_auc_score: 0.5
i=3
accuracy: 0.9793956043956044
precision_score: 0.0
recall_score: 0.0
f1_score: 0.0
roc_auc_score: 0.5
i=4
accuracy: 0.9793956043956044
precision_score: 0.0
recall_score: 0.0
f1_score: 0.0
roc_auc_score: 0.5
i=5
accuracy: 0.9793956043956044
precision_score: 0.0
recall_score: 0.0
f1_score: 0.0
roc_auc_score: 0.5
i=6
accuracy: 0.9793956043956044
precision_score: 0.0
recall_score: 0.0
f1_score: 0.0
roc_auc_score: 0.5
i=7
accuracy: 0.9793956043956044
precision_score: 0.0
recall_score: 0.0
f1_score: 0.0
roc_auc_score: 0.5
i=8
accuracy: 0.9793956043956044
precision_score: 0.0
recall_score: 0.0
f1_score: 0.0
roc_auc_score: 0.5
i=9
accuracy: 0.9793956043956044
precision_score: 0.0
r

In [None]:

print('test acc:', test_acc / inverse_pairs_test.shape[0])
print('test pr:', test_pr / inverse_pairs_test.shape[0])
print('test re:', test_re / inverse_pairs_test.shape[0])
print('test fs:', test_fs / inverse_pairs_test.shape[0])
print('test auc:', test_auc / inverse_pairs_test.shape[0])


test acc: 0.9793956043956044
test pr: 0.0
test re: 0.0
test fs: 0.0
test auc: 0.5
