In [1]:
import torch
import sys
sys.path.append('/home/zjy/project/MetaIM')
pwd = '/home/zjy/project/MetaIM'
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=1)

In [2]:
from torch_geometric.datasets import Planetoid

cora_dataset = Planetoid(root=pwd+'/data/cora', name='cora')
data = cora_dataset[0]
edge_index = data.edge_index

In [3]:
import numpy as np
individual_infection_path = pwd+'/data/for_meta/cora_individual_infection_sir_200.npy'
seeds_infection_path = pwd+'/data/for_meta/cora_seed_infection_sir_200_sample_1000.npy'

individual_infection = np.load(individual_infection_path)
seeds_infection = np.load(seeds_infection_path)
individual_infection.shape,seeds_infection.shape

((2708, 2708), (1000, 2, 2708))

In [4]:
from torch.utils.data import Dataset, DataLoader, random_split


class CustomDataset(Dataset):
    def __init__(self, individual_infection,seeds_infection):
        self.individual_infection = individual_infection
        self.seeds_infection = seeds_infection

    def __len__(self):
        return len(self.seeds_infection)

    def __getitem__(self, idx):
        
        return self.seeds_infection[idx][0], self.seeds_infection[idx][1]

dataset = CustomDataset(individual_infection, seeds_infection)

In [5]:
# 定义划分比例
train_ratio = 0.8
test_ratio = 0.2

# 划分数据集
train_dataset, test_dataset = random_split(dataset, [int(len(dataset)*train_ratio), int(len(dataset)*test_ratio)])

train_batch_size = 32
test_batch_size = 2

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, drop_last=False)
test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

In [6]:
import torch
from torch.utils.data import Dataset, DataLoader

# 自定义 Dataset 类
class MatrixDataset(Dataset):
    def __init__(self, matrix):
        self.matrix = matrix

    def __len__(self):
        return len(self.matrix)

    def __getitem__(self, idx):
        sample = self.matrix[idx]
        return torch.tensor(sample, dtype=torch.float)



# 创建自定义 Dataset 对象
dataset = MatrixDataset(individual_infection)

# 创建 DataLoader 对象
vae_train_batch_size = 32
vae_data_loader = DataLoader(dataset, batch_size=vae_train_batch_size, shuffle=True)

# # 遍历 DataLoader 加载数据
# for batch_idx, data in enumerate(vae_data_loader):
#     # data 是一个包含了 batch_size 个样本的张量，每个样本的形状为 (10,)，代表矩阵的一行数据
#     # 在这里可以将 data 输入模型进行训练
#     print("Batch", batch_idx, "Data shape:", data.shape)


In [7]:
import torch.nn as nn

import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Encoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(Encoder, self).__init__()
        self.FC_input = nn.Linear(input_dim, hidden_dim)
        self.FC_input2 = nn.Linear(hidden_dim, hidden_dim)
        self.FC_output = nn.Linear(hidden_dim, latent_dim)
        
        # self.bn = nn.BatchNorm1d(latent_dim)
        
    def forward(self, x):
        h_ = F.relu(self.FC_input(x))
        h_ = F.relu(self.FC_input2(h_))
        h_ = F.relu(self.FC_input2(h_))
        output = self.FC_output(h_)

        return output

# class GCNEncoder(nn.Module):
#     """
#     Simple GCN-structured Encoder
#     """
#     def __init__(self, input_dim, hidden_dim, latent_dim, gcn_outdim, out_dim, dropout=0.0):
#         super(GCNEncoder, self).__init__()
#         self.gc1 = GCNConv(input_dim, hidden_dim)
#         self.gc2 = GCNConv(hidden_dim, latent_dim)
#         self.FC_mean = nn.Linear(2810*latent_dim, out_dim)
#         self.FC_var = nn.Linear(2810*latent_dim, out_dim)
        
#         self.dropout = nn.Dropout(dropout)
        
#     def forward(self, x, adj):
#         # x = self.dropout(x)
#         x = F.relu(self.gc1(x, adj))
#         x = self.dropout(x)
#         x = F.relu(self.gc2(x, adj))
#         '''
#         # max pooling over nodes
#         x = torch.max(x, dim=1)[0].squeeze()
#         '''
#         mean = self.FC_mean(x.view(8, -1))
#         log_var = self.FC_var(x.view(8, -1))
#         return mean, log_var

In [8]:
class Decoder(nn.Module):
    def __init__(self, input_dim, latent_dim, hidden_dim, output_dim):
        super(Decoder, self).__init__()
        self.FC_input = nn.Linear(input_dim, latent_dim)
        self.FC_hidden_1 = nn.Linear(latent_dim, hidden_dim)
        self.FC_hidden_2 = nn.Linear(hidden_dim, hidden_dim)
        self.FC_output = nn.Linear(hidden_dim, output_dim)
        
        #self.prelu = nn.PReLU()
        
    def forward(self, x):
        h = F.relu(self.FC_input(x))
        h = F.relu(self.FC_hidden_1(h))
        h = F.relu(self.FC_hidden_2(h))
        # x_hat = self.FC_output(h)
        x_hat = F.sigmoid(self.FC_output(h))
        return x_hat

In [9]:
class VAEModel(nn.Module):
    def __init__(self, Encoder, Decoder):
        super(VAEModel, self).__init__()
        self.Encoder = Encoder
        self.Decoder = Decoder
        
    def reparameterization(self, mean, var):
        std = torch.exp(0.5*var) # standard deviation
        epsilon = torch.randn_like(var)
        return mean + std*epsilon

    def forward(self, x, adj=None):
        if adj != None:
            mean,log_var = self.Encoder(x, adj)
        else:
            z = self.Encoder(x)
        # z = mean + log_var # takes exponential function (log var -> var)
        x_hat = self.Decoder(z)
        return x_hat

In [12]:
# from data import model 
# from data.model.model import VAEModel, Encoder, Decoder
from torch.optim import Adam, SGD
import torch.nn.functional as F

# # hidden_dim = 256
# # latent_dim = 64
hidden_dim = 1024
latent_dim = 128

encoder = Encoder(input_dim= len(seeds_infection[0][0]), 
                  hidden_dim=hidden_dim, 
                  latent_dim=latent_dim)
# encoder = GCNEncoder(input_dim= len(seeds_infection[0][0]), 
#                   hidden_dim=hidden_dim, 
#                   latent_dim=latent_dim)

decoder = Decoder(input_dim=latent_dim, 
                  latent_dim=latent_dim, 
                  hidden_dim=hidden_dim, 
                  output_dim=len(seeds_infection[0][0]))

vae_model = VAEModel(Encoder=encoder, Decoder=decoder).to(device)

optimizer_vae = Adam([{'params': vae_model.parameters()}], 
                 lr=1e-3)
vae_model.train()

VAEModel(
  (Encoder): Encoder(
    (FC_input): Linear(in_features=2708, out_features=1024, bias=True)
    (FC_input2): Linear(in_features=1024, out_features=1024, bias=True)
    (FC_output): Linear(in_features=1024, out_features=128, bias=True)
  )
  (Decoder): Decoder(
    (FC_input): Linear(in_features=128, out_features=128, bias=True)
    (FC_hidden_1): Linear(in_features=128, out_features=1024, bias=True)
    (FC_hidden_2): Linear(in_features=1024, out_features=1024, bias=True)
    (FC_output): Linear(in_features=1024, out_features=2708, bias=True)
  )
)

In [13]:
for epoch in range(200):
    train_vae_loss = 0
    mean_train_accuracy = 0
    count = 0
    for batch_idx, seeds_label in enumerate(train_loader): 
        count += 1       
        x = seeds_label[0].to(device)
        optimizer_vae.zero_grad()
        loss = 0
        for i, x_i in enumerate(x):
            x_hat = vae_model(x_i)

            reproduction_loss = F.binary_cross_entropy(x_hat, x_i, reduction='sum')   
            # reproduction_loss = F.mse_loss(x_hat, x_i, reduction='sum')   
            loss += reproduction_loss    
        train_vae_loss += loss.item()
        loss = loss/x.size(0)
        loss.backward()
        optimizer_vae.step()
        
    print("Epoch: {}".format(epoch+1), 
        "\tTrain_vae_loss: {:.4f}".format(train_vae_loss / count),
        )


Epoch: 1 	Train_vae_loss: 26440.9109
Epoch: 2 	Train_vae_loss: 17851.7845
Epoch: 3 	Train_vae_loss: 17407.8187
Epoch: 4 	Train_vae_loss: 17321.3830
Epoch: 5 	Train_vae_loss: 17317.1844
Epoch: 6 	Train_vae_loss: 17301.9598
Epoch: 7 	Train_vae_loss: 17293.2405
Epoch: 8 	Train_vae_loss: 17284.7123
Epoch: 9 	Train_vae_loss: 17281.1514
Epoch: 10 	Train_vae_loss: 17271.2961
Epoch: 11 	Train_vae_loss: 17266.8573
Epoch: 12 	Train_vae_loss: 17258.7166
Epoch: 13 	Train_vae_loss: 17254.8398
Epoch: 14 	Train_vae_loss: 17249.4723
Epoch: 15 	Train_vae_loss: 17253.2235
Epoch: 16 	Train_vae_loss: 17245.9419
Epoch: 17 	Train_vae_loss: 17241.5158
Epoch: 18 	Train_vae_loss: 17235.0925
Epoch: 19 	Train_vae_loss: 17236.4422
Epoch: 20 	Train_vae_loss: 17236.8417
Epoch: 21 	Train_vae_loss: 17229.7601
Epoch: 22 	Train_vae_loss: 17228.1012
Epoch: 23 	Train_vae_loss: 17226.6052
Epoch: 24 	Train_vae_loss: 17223.9847
Epoch: 25 	Train_vae_loss: 17222.8549
Epoch: 26 	Train_vae_loss: 17219.5551
Epoch: 27 	Train_vae_

In [36]:
import torch
import torch.nn as nn

from torch_geometric.nn import GATConv
from torch.optim import Adam, SGD
import torch.nn.functional as F

class GAT(nn.Module):
    def __init__(self, seeds_dim, inflect_dim, hidden_channels, out_channels, num_heads):
        super(GAT, self).__init__()
        self.linear1 = nn.Linear(seeds_dim + inflect_dim, seeds_dim + inflect_dim)
        self.bn1 = nn.BatchNorm1d(seeds_dim + inflect_dim)
        self.conv1 = GATConv(seeds_dim + inflect_dim, hidden_channels, heads=num_heads)
        self.bn2 = nn.BatchNorm1d(hidden_channels * num_heads)
        self.conv2 = GATConv(hidden_channels * num_heads, hidden_channels * num_heads, heads=1)
        self.bn3 = nn.BatchNorm1d(seeds_dim)
        self.linear2 = nn.Linear(hidden_channels * num_heads + seeds_dim, out_channels)

    def forward(self, seeds_i, inflect_i, edge_index):
        x =  torch.cat((seeds_i, inflect_i), dim=-1)
        x = self.linear1(x)
        x = self.bn1(x)
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.bn2(x)
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        seeds_i = self.bn3(seeds_i)
        x =  torch.cat((x, seeds_i), dim=-1)
        x = self.linear2(x)
        return F.relu(x)

In [37]:
import torch
from torch_geometric.utils import to_scipy_sparse_matrix
import scipy.sparse as sp

# 转换为 scipy 稀疏矩阵
adj = to_scipy_sparse_matrix(edge_index)

adj = torch.Tensor(adj.toarray()).to_sparse()
adj = adj.to(device)

In [38]:
inflect_dim = latent_dim
seeds_dim = latent_dim

forward_model = GAT(seeds_dim,inflect_dim, 512, 1, 4)

optimizer = Adam([{'params': forward_model.parameters()}], 
                 lr=0.0001)

adj = adj.to(device)
forward_model = forward_model.to(device)
forward_model.train()

GAT(
  (linear1): Linear(in_features=256, out_features=256, bias=True)
  (bn1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv1): GATConv(256, 512, heads=4)
  (bn2): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): GATConv(2048, 2048, heads=1)
  (bn3): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear2): Linear(in_features=2176, out_features=1, bias=True)
)

In [39]:
for param in vae_model.parameters():
    param.requires_grad = False 
encoder = vae_model.Encoder

In [41]:
inflected = torch.tensor(individual_infection).T.to(device)
inflected

tensor([[1.0000, 0.0069, 0.0076,  ..., 0.0000, 0.0070, 0.0076],
        [0.0028, 1.0000, 0.1019,  ..., 0.0000, 0.0028, 0.0029],
        [0.0273, 0.1277, 1.0000,  ..., 0.0000, 0.0273, 0.0287],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 1.0000, 0.0000, 0.0000],
        [0.0050, 0.0048, 0.0055,  ..., 0.0000, 1.0000, 0.1276],
        [0.0156, 0.0150, 0.0170,  ..., 0.0000, 0.1388, 1.0000]],
       device='cuda:1')

In [43]:
edge_index = edge_index.to(device)
top_num = 500


for epoch in range(2000):

    total_loss = 0
    

    total_train_accuracy = 0
    
    count_train = 0
    for batch_idx, seeds_label in enumerate(train_loader): 
        count_train += 1
        forward_loss = 0 
        seeds =  seeds_label[0].to(device)     
        labels = seeds_label[1].to(device)
        optimizer.zero_grad()
        
        loss = 0
        train_accuracy = 0
        for i, seeds_i in enumerate(seeds):
            infection = inflected * seeds_i.view(-1, 1)

            
            infection_i = encoder(infection).detach()
            seeds_i = encoder(seeds_i).detach()
            seeds_i = seeds_i.expand(seeds.shape[1], -1)
            
            y_i = labels[i]
            y_hat = forward_model(seeds_i, infection_i, edge_index)
            
            
            
            _, top_indices_true = torch.topk(y_i.clone(), top_num)
            label_2 = torch.zeros(y_i.shape).to(device)
            label_2[top_indices_true] = 1
            
            _, top_indices_predict = torch.topk(y_hat.clone().squeeze(-1), top_num)
            
            # 将张量数组转换为Python列表
            list1 = top_indices_true.tolist()
            list_pre = top_indices_predict.tolist()

            # 使用集合操作找到交集
            intersection = list(set(list1) & set(list_pre))
            accuracy_i = len(intersection) / top_num       
            train_accuracy += accuracy_i 

            forward_loss = 0.5*F.mse_loss(y_hat.squeeze(-1), y_i, reduction='sum') + F.mse_loss(y_hat.squeeze(-1), label_2, reduction='sum')    
            loss += forward_loss    
        
           
        train_accuracy /= seeds.size(0)
        total_train_accuracy += train_accuracy
        loss = loss/seeds.size(0)
        total_loss += loss.item() 
        loss.backward()
        optimizer.step()
        # for p in forward_model.parameters():
        #     p.data.clamp_(min=0)
        

    print("Epoch: {}".format(epoch+1), 
        "\tTotal: {:.4f}".format(total_loss / count_train),
        "\tMean_train_accuracy: {:.4f}".format(total_train_accuracy/ count_train),
        )  
    
    total_test_accuracy = 0
    
    count_test = 0

    for batch_idx, seeds_label in enumerate(test_loader): 
        count_test += 1 
        seeds =  seeds_label[0].to(device)     
        labels = seeds_label[1].to(device)
        test_accuracy = 0
        for i, seeds_i in enumerate(seeds):
            infection = inflected * seeds_i.view(-1, 1)

            
            infection_i = encoder(infection).detach()
            seeds_i = encoder(seeds_i).detach()
            seeds_i = seeds_i.expand(seeds.shape[1], -1)
            
            y_i = labels[i]
            
            y_hat = forward_model(seeds_i, infection_i, edge_index)
            
            _, top_indices_true = torch.topk(y_i, top_num)
            
            
            _, top_indices_predict = torch.topk(y_hat.squeeze(-1), top_num)

            
            # 将张量数组转换为Python列表
            list1 = top_indices_true.tolist()
            list_pre = top_indices_predict.tolist()
            

            # 使用集合操作找到交集
            intersection = list(set(list1) & set(list_pre))

            
            accuracy_i = len(intersection) / top_num       
            test_accuracy += accuracy_i 
        test_accuracy /= len(seeds)
        total_test_accuracy += test_accuracy
        

    print(
        "\tMean_test_accuracy: {:.4f}".format(total_test_accuracy / count_test),
        )  

    

Epoch: 1 	Total: 331.2693 	Mean_train_accuracy: 0.6399
	Mean_test_accuracy: 0.6634
Epoch: 2 	Total: 293.8675 	Mean_train_accuracy: 0.6769
	Mean_test_accuracy: 0.6959
Epoch: 3 	Total: 272.0915 	Mean_train_accuracy: 0.7023
	Mean_test_accuracy: 0.7105
Epoch: 4 	Total: 258.1620 	Mean_train_accuracy: 0.7190
	Mean_test_accuracy: 0.7302
Epoch: 5 	Total: 247.4603 	Mean_train_accuracy: 0.7327
	Mean_test_accuracy: 0.7320
Epoch: 6 	Total: 240.4906 	Mean_train_accuracy: 0.7420
	Mean_test_accuracy: 0.7462
Epoch: 7 	Total: 235.0600 	Mean_train_accuracy: 0.7500
	Mean_test_accuracy: 0.7540
Epoch: 8 	Total: 234.0777 	Mean_train_accuracy: 0.7535
	Mean_test_accuracy: 0.7550
Epoch: 9 	Total: 229.1401 	Mean_train_accuracy: 0.7602
	Mean_test_accuracy: 0.7649
Epoch: 10 	Total: 226.7969 	Mean_train_accuracy: 0.7632
	Mean_test_accuracy: 0.7700
Epoch: 11 	Total: 222.3134 	Mean_train_accuracy: 0.7691
	Mean_test_accuracy: 0.7733
Epoch: 12 	Total: 220.1972 	Mean_train_accuracy: 0.7715
	Mean_test_accuracy: 0.7757
E