In [1]:
import sys
sys.path.append('/home/zss/estimation_new/baseline_new/')
import Utils
import copy
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split 
import matplotlib.pyplot as plt
import easygraph as eg
import random
import os
import torch.nn.functional as F
import pickle

In [2]:
def seed_torch(seed=42):
    seed = int(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.enabled = True


In [13]:
class BiGRUModel(nn.Module):
    def __init__(
            self,
            input_size: int,
            hidden_size: int,
            hidden_size2: int,
            out_features: int
    ):
        super(BiGRUModel, self).__init__()
        # 定义双向GRU层
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True, bidirectional=True)
        self.gru2 = nn.GRU(2*hidden_size, 2*hidden_size, batch_first=True, bidirectional=True)
        # self.gru3 = nn.GRU(4*hidden_size, hidden_size, batch_first=True, bidirectional=True)

        self.dropout = nn.Dropout(p=0.5)
        # 由于是双向GRU，隐藏层维度变为 hidden_size * 2
        self.l1 = nn.Linear(hidden_size * 2, hidden_size2)
        self.l2 = nn.Linear(hidden_size2, out_features)

    def forward(self, x):
        x = x.unsqueeze(1)  
        # Bi-GRU的输出
        gru_out, _ = self.gru(x)
        # gru_out2, _ = self.gru2(gru_out)
        # gru_out3, _ = self.gru3(gru_out2)
        gru_out_last_step = gru_out[:, -1, :]
        gru_out_last_step = self.dropout(gru_out_last_step)

        # 通过全连接层
        l1_out = self.l1(gru_out_last_step)
        output = F.relu(self.l2(l1_out))
        return output


In [4]:
def preprocess(hg, final_features, label):

# There is no default feature vector for this dataset. Users can generate their own features.
# Here we use random initialisation to generate 100-dimensional node feature vectors

  input_feature_dim = final_features.shape[1]
  hidden_dim = 128
  hidden_dim2 = 32
  out_dim = 1

  # 初始划分：90%的数据用于训练和验证，10%的数据用于测试
  train_val_nodes, test_nodes = train_test_split(hg.v, test_size=0.1, random_state=42)
# 在训练和验证数据集里再划分：80%的数据用于训练，20%的数据用于验证
  train_nodes, val_nodes = train_test_split(train_val_nodes, test_size=0.2, random_state=42)
  train_mask = train_nodes
  val_mask = val_nodes
  test_mask = test_nodes

  # X = np.array([node_features[node] for node in range(len(node_labels))])
  # X = torch.from_numpy(X).float()
  X = final_features.float()

  # y = np.array([node_labels[node] for node in range(len(node_labels))])
  # y = torch.from_numpy(y)
  y = label.float()

  dataset = {}
  # dataset["structure"] = eg.Hypergraph(num_v=label.shape[0], e_list=data)
  dataset["features"] = X
  dataset["labels"] = y
  dataset["train_mask"] = train_mask
  dataset["val_mask"] = val_mask
  dataset["test_mask"] = test_mask
  # dataset["num_classes"] = num_classes

  model = BiGRUModel(input_size = input_feature_dim, hidden_size = hidden_dim, hidden_size2 = hidden_dim2, out_features = out_dim)

  return dataset, model

In [5]:
def train(
    data: dict,
    model: nn.Module,
    optimizer: torch.optim.Optimizer,
    criterion: nn.Module,):

    features = data["features"]
    train_mask, labels = data["train_mask"], data["labels"]
    optimizer.zero_grad()
    outputs = model(features)
    loss = criterion(torch.log(outputs[train_mask].squeeze()+1), torch.log(labels[train_mask]+1))
    loss.backward()
    optimizer.step()
    return loss

torch.no_grad()
def valid(model, data):
    features, labels = data["features"], data["labels"]
    val_mask = data["val_mask"]
    # weights = weights
    
    model.eval()
    with torch.no_grad():
        outputs = model(features)
        mse = nn.functional.mse_loss(torch.log(outputs[val_mask].squeeze()+1), torch.log(labels[val_mask]+1)).item()
        # criterion = WeightedMSELoss(weights[data["val_mask"]])
        # mse = criterion(outputs[val_mask].squeeze(), labels[val_mask])
    return mse


torch.no_grad()
def test(model, data):
    features = data["features"]
    test_mask, labels = data["test_mask"], data["labels"]
    # weights = weights
    model.eval()
    with torch.no_grad():
        outputs = model(features)
        # criterion = WeightedMSELoss(weights[data["test_mask"]])
        mse = nn.functional.mse_loss(torch.log(outputs[test_mask].squeeze()+1), torch.log(labels[test_mask]+1)).item()
        # mse = criterion(outputs[test_mask].squeeze(), labels[test_mask])
    return mse

In [6]:
def draw_loss_curve(loss1 ,save_path = "loss_pic.png"):
    plt.clf()
    epochs = range(1, len(loss1) + 1)
    plt.plot(epochs, loss1, 'b', label='EG Training loss')
    plt.title('Training Loss Comparison')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    if save_path is not None:
        plt.savefig(save_path)
    plt.show()

In [7]:
def generate_dict(matrix):
    # 初始化两个字典
    nd_he = {}
    he_nd = {}

    rows, cols = matrix.shape
    for row_idx in range(rows):
        for col_idx in range(cols):
            if matrix[row_idx, col_idx] == 1:
                # 更新 row_to_cols 字典
                if row_idx not in nd_he:
                    nd_he[row_idx] = []
                nd_he[row_idx].append(col_idx)

                # 更新 col_to_rows 字典
                if col_idx not in he_nd:
                    he_nd[col_idx] = []
                he_nd[col_idx].append(row_idx)
    return nd_he, he_nd

In [8]:
networkname = [
    'Algebra', 
    'Bars-Rev', 
    'Geometry', 
    'iAF1260b', 
    'iJO1366', 
    'Music-Rev', 
    'Restaurants-Rev', 
    'senate-committees', 
    'fb-tvshow', 
    'HighSchool_2013_hour', 
    'Highschool_2012_hour', 
    'ht09_contact_hour', 
    'soc-hamsterster', 
    'house-committees'
]

In [25]:
from scipy.stats import kendalltau

all_y={}
all_x={}
all_test_list = []
all_ken=[]
seed_torch(42)
for net in range(len(networkname)):
    seed_torch(42)
    print(networkname[net])
    matrix = np.load(f'/home/zss/estimation_new/datamatrix/{networkname[net]}_incmatrix.npy')
    N, M = matrix.shape
    nd_he, he_nd = generate_dict(matrix)
    # with open('/home/zss/estimation_new/N_E_dict/fb-pages-sport_nd_he.pkl', 'rb') as f:
    #     nd_he = pickle.load(f)

    # with open('/home/zss/estimation_new/N_E_dict/fb-pages-sport_he_nd.pkl', 'rb') as f:
    #     he_nd = pickle.load(f)

    # 使用字典推导式来创建一个新的字典，过滤掉值只有一个元素的键值对
    filtered_dict = {k: v for k, v in he_nd.items() if len(v) > 1}

    data = [] 
    for key in filtered_dict.keys():
        data.append(filtered_dict[key])

    #  label
    npy_array = np.load(f'/home/zss/estimation_new/spread_RP_threshold/{networkname[net]}_ICRP_threshold_iter2000_time50.npy')
    label = torch.from_numpy(npy_array)
    # npy_array_normalized = (npy_array - np.min(npy_array)) / (np.max(npy_array) - np.min(npy_array))
    # label = torch.from_numpy(npy_array_normalized)

    # hypergraph
    hg = eg.Hypergraph(num_v = label.shape[0], e_list = data,  merge_op="sum")

    # feature，中心性特征
    # center_feature= future_generate(matrix,networkname[net])
    # final_features =  center_feature

    # np.save(f'../center_feature/{networkname[net]}_center.npy', final_features)
    #距离特征结合中心性特征
    center_feature=np.load(f'/home/zss/estimation_new/center_feature/{networkname[net]}_center.npy')
    center_feature=torch.from_numpy(center_feature).to(torch.float32)

    #距离特征
    hydis = np.zeros((matrix.shape[0], matrix.shape[0]))
    order=1
    for s in range(1,order+1):
        temp=np.load(f'/home/zss/estimation_new/hyper_distance/{networkname[net]}_distance_{s}.npy')
        temp=1/temp
        hydis+=temp*((0.1)**(order-s))
        
    np.fill_diagonal(hydis,0)
    hydis=torch.from_numpy(hydis).to(torch.float32)

    final_features = torch.cat((hydis,center_feature), dim=1).to(torch.float32)

    # hydis = np.zeros((label.shape[0], label.shape[0]))
    # num = order-1
    # for _o in range(order):
    #     _o += 1
    #     temp = np.load(f'/home/zss/estimation_new/hyper_distance/{name}_distance_{_o}.npy')
    #     temp = 1/temp
    #     hydis += temp*((0.1)**num)
    #     num -= 1

    dataset, model = preprocess(hg, final_features, label)
    loss_lst = []
    acc = []
    test_ls = []
    epoch = 6000
    lr = 0.01
    
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    
    best_val_loss = float('inf')  # 初始化最小验证集损失为正无穷
    for i in range(epoch):
        model.train()
        loss = train(data = dataset, model = model, optimizer=optimizer, criterion=loss_fn)
        loss_lst.append(loss.detach().numpy())

        model.eval()
        val_acc = valid(model = model, data = dataset)

        # acc.append(val_acc)
        # if val_acc < best_val_loss:
        #     best_val_loss = val_acc
        #     torch.save(model.state_dict(), f'/home/zss/estimation_new/baseline_new/Bi-GRU/BI-GRU_best_model_threshold/best_model_{networkname[net]}_1.pth')  # 保存当前模型参数 
    torch.save(model.state_dict(), f'/home/zss/estimation_new/baseline_new/Bi-GRU/BI-GRU_best_model_threshold/best_model_{networkname[net]}_1.pth')

    # draw_loss_curve(loss_lst)
    # draw_loss_curve(acc)

    # dataset, model = preprocess()
    features, labels = dataset["features"], dataset["labels"]
    test_mask = dataset["test_mask"]
    train_mask = dataset["train_mask"]
    # features = features.unsqueeze(1)

    dataset, model_T = preprocess(hg, final_features, label)
    model_T.load_state_dict(torch.load(f'/home/zss/estimation_new/baseline_new/Bi-GRU/BI-GRU_best_model_threshold/best_model_{networkname[net]}_1.pth'))
    model_T.eval()
    with torch.no_grad():
        outputs = model_T(features)
    
    test_acc = test(model = model_T, data=dataset)
    # print("test mse:", round(test_acc,4))
    all_test_list.append(round(test_acc,6))

    # min_value = min(npy_array)
    # max_value = max(npy_array) 

    # y = labels[test_mask].squeeze()* (max_value - min_value) + min_value
    # x = outputs[test_mask].squeeze()* (max_value - min_value) + min_value

    y = labels[test_mask].squeeze()
    x = outputs[test_mask].squeeze()
    all_y[net] = y
    all_x[net] = x
    # print(y)
    ken=round(kendalltau(x, y)[0],6)
    all_ken.append(ken)
    # print(x)
    print(networkname[net])
    print(f'{networkname[net]} kendall:{round(ken,6)}')
    print('loss:',test_acc)

print(all_ken)
print(all_test_list)



Algebra
Algebra
Algebra kendall:0.918051
loss: 0.038519859313964844
Bars-Rev
Bars-Rev
Bars-Rev kendall:0.919785
loss: 0.0068529462441802025
Geometry
Geometry
Geometry kendall:0.898033
loss: 0.08746229112148285
iAF1260b
iAF1260b
iAF1260b kendall:0.854759
loss: 0.001076948712579906
iJO1366
iJO1366
iJO1366 kendall:nan
loss: 0.7929626107215881
Music-Rev
Music-Rev
Music-Rev kendall:0.940675
loss: 0.007641966454684734
Restaurants-Rev
Restaurants-Rev
Restaurants-Rev kendall:0.940771
loss: 0.003949963953346014
senate-committees
senate-committees
senate-committees kendall:0.935961
loss: 0.019843418151140213
fb-tvshow


KeyboardInterrupt: 

In [None]:
##### 获取测试结果，直接调用保存的模型

from scipy.stats import kendalltau


for net in range(len(networkname)):
    seed_torch(42)
    print(networkname[net])
    matrix = np.load(f'/home/zss/estimation_new/datamatrix/{networkname[net]}_incmatrix.npy')
    N, M = matrix.shape
    nd_he, he_nd = generate_dict(matrix)

    # 使用字典推导式来创建一个新的字典，过滤掉值只有一个元素的键值对
    filtered_dict = {k: v for k, v in he_nd.items() if len(v) > 1}

    data = [] 
    for key in filtered_dict.keys():
        data.append(filtered_dict[key])

    #  label
    npy_array = np.load(f'/home/zss/estimation_new/spread_RP_threshold/{networkname[net]}_ICRP_threshold_iter2000_time50.npy')
    label = torch.from_numpy(npy_array)


    # hypergraph
    hg = eg.Hypergraph(num_v = label.shape[0], e_list = data,  merge_op="sum")

    #距离特征结合中心性特征
    center_feature=np.load(f'/home/zss/estimation_new/center_feature/{networkname[net]}_center.npy')
    center_feature=torch.from_numpy(center_feature).to(torch.float32)

    #距离特征
    hydis = np.zeros((matrix.shape[0], matrix.shape[0]))
    order=1
    for s in range(1,order+1):
        temp=np.load(f'/home/zss/estimation_new/hyper_distance/{networkname[net]}_distance_{s}.npy')
        temp=1/temp
        hydis+=temp*((0.1)**(order-s))
        
    np.fill_diagonal(hydis,0)
    hydis=torch.from_numpy(hydis).to(torch.float32)

    final_features = torch.cat((hydis,center_feature), dim=1).to(torch.float32)


    dataset, model_T = preprocess(hg, final_features, label)
    features, labels = dataset["features"], dataset["labels"]
    test_mask = dataset["test_mask"]
    train_mask = dataset["train_mask"]

    
    model_T.load_state_dict(torch.load(f'/home/zss/estimation_new/baseline_new/Bi-GRU/BI-GRU_best_model_threshold/best_model_{networkname[net]}.pth'))
    model_T.eval()
    with torch.no_grad():
        outputs = model_T(features)
    

    # y = labels[range(len(matrix))].squeeze()
    x = outputs[range(len(matrix))].squeeze()

    np.save(f'/home/zss/estimation_new/baseline_new/Bi-GRU/predict_result/{networkname[net]}_Bi-GRU_predict_threshold.npy', x)


Algebra
Bars-Rev
Geometry
iAF1260b
iJO1366
Music-Rev
Restaurants-Rev
senate-committees
fb-tvshow
HighSchool_2013_hour
Highschool_2012_hour
ht09_contact_hour
soc-hamsterster
house-committees
