In [107]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from scipy.spatial.distance import cdist
import numpy as np
import pandas as pd
from sklearn.preprocessing import scale

In [108]:
df = pd.read_csv('Dynamicgraph.csv').iloc[:,1:-1]
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,1.0,0.108302,-0.535908,-0.351269,0.108302,1.0,0.230777,-0.907482,-0.535908,0.230777,1.0,-0.330796,-0.351269,-0.907482,-0.330796,1.0
1,1.0,0.696206,0.233532,-0.851933,0.696206,1.0,-0.133325,-0.58246,0.233532,-0.133325,1.0,-0.66604,-0.851933,-0.58246,-0.66604,1.0
2,1.0,0.738704,-0.234767,-0.529127,0.738704,1.0,-0.133308,-0.693722,-0.234767,-0.133308,1.0,-0.600907,-0.529127,-0.693722,-0.600907,1.0
3,1.0,0.786672,-0.842429,-0.000359,0.786672,1.0,-0.488673,-0.533993,-0.842429,-0.488673,1.0,-0.474209,-0.000359,-0.533993,-0.474209,1.0
4,1.0,0.097516,-0.956668,0.673904,0.097516,1.0,-0.161021,-0.621298,-0.956668,-0.161021,1.0,-0.672272,0.673904,-0.621298,-0.672272,1.0


In [109]:
df = scale(pd.read_csv('Dynamicgraph.csv').iloc[:,1:-1].values)

In [110]:
df.shape

(200, 16)

In [111]:
for i in range(df.shape[0]):
    for j in range(df.shape[1]):
        if df[i][j] == 0:
            df[i][j] = 1

In [112]:
data = np.abs(df.reshape(df.shape[0],4,4))

In [113]:
def preprocess_data(data):
    # 替换NaN值为均值
    nan_mask = np.isnan(data)
    data[nan_mask] = np.nanmean(data)
    
    # 或者替换为特定的值，例如 0
    # data[nan_mask] = 0  # 替换为0
    
    return data

In [114]:
data = preprocess_data(data)

In [115]:
data[5]

array([[1.        , 0.17714915, 0.36213489, 0.42320646],
       [0.17714915, 1.        , 0.52156084, 0.74787603],
       [0.36213489, 0.52156084, 1.        , 1.39719796],
       [0.42320646, 0.74787603, 1.39719796, 1.        ]])

In [116]:
def entropy(X):
    E = []
    for i in range(X.shape[0]):
        P = []
        for j in range(X.shape[1]):
            if i !=j:
                e = -X[i][j]*np.log(X[i][j])
                P.append(e)
        P = np.array(P)
        E.append(np.sum(P))
    return np.array(E)

In [117]:
def graphentropy(X):
    E = []
    for i in range(X.shape[0]):
        e = entropy(X[i])
        E.append(np.sum(e))
    return np.array(E)

In [118]:
entropies = graphentropy(data)

In [119]:
np.inf in entropies

False

In [120]:
distances = cdist(entropies.reshape(-1, 1), entropies.reshape(-1, 1), metric='euclidean')
distances.shape


(200, 200)

In [121]:
most_similar_indices = np.argmin(distances, axis=1)


In [122]:
most_similar_indices

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18

In [123]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=16):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x

# Transformer Encoder
class TransformerEncoder(nn.Module):
    def __init__(self, input_dim, embed_dim, num_heads, ff_dim, num_layers):
        super(TransformerEncoder, self).__init__()
        self.embedding = nn.Linear(input_dim, embed_dim)
        self.pos_encoder = PositionalEncoding(embed_dim)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dim_feedforward=ff_dim)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)

    def forward(self, x):
        x = self.embedding(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        return x

# Autoencoder
class Autoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, bottleneck_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, bottleneck_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(bottleneck_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

In [124]:
input_dim = 16
embed_dim = 16
num_heads = 4
ff_dim = 128
num_layers = 3
hidden_dim = 32
bottleneck_dim = 16
lr = 0.01
num_epochs = 100

In [125]:
transformer_encoder = TransformerEncoder(input_dim, embed_dim, num_heads, ff_dim, num_layers)
autoencoder = Autoencoder(input_dim, hidden_dim, bottleneck_dim)



In [126]:
optimizer = optim.Adam(list(transformer_encoder.parameters()) + list(autoencoder.parameters()), lr=lr)


In [127]:
mse_loss = nn.MSELoss()


In [128]:
len(data)

200

In [129]:
data[0]

array([[1.        , 0.50752799, 0.48567148, 1.31222442],
       [0.50752799, 1.        , 1.03814172, 1.08521685],
       [0.48567148, 1.03814172, 1.        , 0.76051659],
       [1.31222442, 1.08521685, 0.76051659, 1.        ]])

In [130]:
x = torch.tensor(data[2].flatten(), dtype=torch.float32).unsqueeze(0)
similar_x = torch.tensor(data[most_similar_indices[2]].flatten(), dtype=torch.float32).unsqueeze(0)

similar_x

tensor([[1.0000, 1.0054, 1.2586, 1.6396, 1.0054, 1.0000, 0.2559, 0.4589, 1.2586,
         0.2559, 1.0000, 0.1213, 1.6396, 0.4589, 0.1213, 1.0000]])

In [131]:
data[most_similar_indices[2]],data[5],most_similar_indices[2]

(array([[1.        , 1.0054204 , 1.2586383 , 1.63960144],
        [1.0054204 , 1.        , 0.25585827, 0.45890966],
        [1.2586383 , 0.25585827, 1.        , 0.12129512],
        [1.63960144, 0.45890966, 0.12129512, 1.        ]]),
 array([[1.        , 0.17714915, 0.36213489, 0.42320646],
        [0.17714915, 1.        , 0.52156084, 0.74787603],
        [0.36213489, 0.52156084, 1.        , 1.39719796],
        [0.42320646, 0.74787603, 1.39719796, 1.        ]]),
 2)

In [132]:
for epoch in range(num_epochs):
    total_loss = 0
    for i in range(len(data)):
        x = torch.tensor(data[i].flatten(), dtype=torch.float32).unsqueeze(0)
        similar_x = torch.tensor(data[most_similar_indices[i]].flatten(), dtype=torch.float32).unsqueeze(0)

        # Transformer Encoder
        transformer_output = transformer_encoder(x)

        # Autoencoder
        encoded, decoded = autoencoder(similar_x)

        # 损失计算
        loss1 = mse_loss(transformer_output, encoded)
        loss2 = mse_loss(similar_x, decoded)
        loss = loss1 + loss2

        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(data)}')


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/100, Loss: 0.33832161225378515
Epoch 2/100, Loss: 0.25754885010421275
Epoch 3/100, Loss: 0.22625023159198462
Epoch 4/100, Loss: 0.22103169823996724
Epoch 5/100, Loss: 0.21953314787708222
Epoch 6/100, Loss: 0.21266552071087064
Epoch 7/100, Loss: 0.21086209332104772
Epoch 8/100, Loss: 0.2099779650522396
Epoch 9/100, Loss: 0.20685343314427881
Epoch 10/100, Loss: 0.21016464163083584
Epoch 11/100, Loss: 0.213230221667327
Epoch 12/100, Loss: 0.20689185524825007
Epoch 13/100, Loss: 0.20251080514863135
Epoch 14/100, Loss: 0.20649932875763624
Epoch 15/100, Loss: 0.20571111652068794
Epoch 16/100, Loss: 0.21376956368796526
Epoch 17/100, Loss: 0.20311998409219087
Epoch 18/100, Loss: 0.20837560452520848
Epoch 19/100, Loss: 0.20967716386541724
Epoch 20/100, Loss: 0.2083749811211601
Epoch 21/100, Loss: 0.20465779855847357
Epoch 22/100, Loss: 0.2067534336913377
Epoch 23/100, Loss: 0.20763037267606704
Epoch 24/100, Loss: 0.20826562562026082
Epoch 25/100, Loss: 0.20598248824477194
Epoch 26/100, 

In [133]:
def get_embeddings(model, data):
    model.eval()
    embeddings = []
    with torch.no_grad():
        for i in range(len(data)):
            x = torch.tensor(data[i].flatten(), dtype=torch.float32).unsqueeze(0)
            embedding = model(x)
            embeddings.append(embedding.squeeze(0).numpy())
    return np.array(embeddings)

# 获取嵌入向量
embeddings = get_embeddings(transformer_encoder, data)
#np.savetxt('Embedding_vector.csv', embeddings, delimiter=',')

In [134]:
np.save('embeddings.npy', embeddings)

print("Embeddings saved to embeddings.npy")

Embeddings saved to embeddings.npy
