In [20]:
import torch
import torch.nn.functional as F
import torch_geometric
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from karateclub import DeepWalk, Node2Vec, Diff2Vec
import pandas as pd
import numpy as np
import networkx as nx
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx
import torch_geometric.nn as pyg_nn
import glob

In [67]:
# GCN Model
class GCN(torch.nn.Module):
    def __init__(self, num_node_features, embedding_dim):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_node_features, 2 * embedding_dim)
        self.conv2 = GCNConv(2 * embedding_dim, embedding_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# AERNN Model (Autoencoder RNN)
class AERNN(torch.nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim):
        super(AERNN, self).__init__()
        self.encoder = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, embedding_dim)
        )
        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(embedding_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, input_dim)
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

def load_data(filename):
    df = pd.read_csv(filename)
    df = df.iloc[:, 1:-1]  # 去掉第一列和最后一列
    data = df.values
    return data

def preprocess_data(data):
    G = nx.from_numpy_matrix(np.corrcoef(data.T))
    g_data = from_networkx(G)
    g_data.x = torch.tensor(data, dtype=torch.float)
    return g_data

def train_gcn(data, num_features, embedding_dim, epochs=50, lr=0.01):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = GCN(num_features, embedding_dim).to(device)
    data = data.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4)

    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        out = model(data)
        loss = F.mse_loss(out, data.x)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        embedding = model(data)
    return embedding.cpu().numpy()

def train_aernn(data, input_dim, embedding_dim, hidden_dim, epochs=50, lr=0.01):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = AERNN(input_dim, embedding_dim, hidden_dim).to(device)
    data = torch.tensor(data, dtype=torch.float).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        encoded, decoded = model(data)
        loss = F.mse_loss(decoded, data)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        encoded, _ = model(data)
    return encoded.cpu().numpy()

def apply_deepwalk_networkx(data, embedding_dim):
    # 构建图结构
    G = nx.Graph()
    num_nodes = data.shape[0]
    for i in range(num_nodes):
        for j in range(i+1, num_nodes):
            if data[i, j] > 0:  # 根据相关性系数设置边的权重
                G.add_edge(i, j, weight=data[i, j])
    
    # 使用 DeepWalk 进行嵌入学习
    model = DeepWalk(dimensions=embedding_dim)
    model.fit(G)
    embeddings = model.get_embedding()
    
    return embeddings

#def apply_node2vec(data, embedding_dim):
    G = nx.from_numpy_matrix(np.corrcoef(data.T))
    model = Node2Vec(dimensions=embedding_dim)
    model.fit(G)
    embedding = model.get_embedding()
    return np.array([embedding[i] for i in range(len(embedding))])

def apply_structure2vec(data, embedding_dim):
    G = nx.from_numpy_matrix(np.corrcoef(data.T))
    model = Diff2Vec(dimensions=embedding_dim)
    model.fit(G)
    embedding = model.get_embedding()
    return np.array([embedding[i] for i in range(len(embedding))])

def main():
    file_prefix = 'Dynamicgraph_ACCESS'
    num_datasets = 7
    num_features = 16#  # Adjusted after removing two columns
    embedding_dim = 16  # Adjust embedding dimension as needed
    Deepwalk = []

    for i in range(1, num_datasets + 1):
        filename = f'{file_prefix}{i}.csv'
        data = load_data(filename)
        preprocessed_data = preprocess_data(data)

        print(f"Processing {filename}...")

        gcn_embedding = train_gcn(preprocessed_data, num_features, embedding_dim)
        aernn_embedding = train_aernn(data, num_features, embedding_dim, hidden_dim=32)
        aernn_embedding1 = train_aernn(data, num_features, embedding_dim, hidden_dim=64)
        

        #node2vec_embedding = apply_node2vec(data, embedding_dim)
        #structure2vec_embedding = apply_structure2vec(data, embedding_dim)

        np.save(f'Embeddings_ACCESS{i}_deepwork.npy', gcn_embedding)
        np.save(f'Embeddings_ACCESS{i}_node2vec.npy', aernn_embedding)
        np.save(f'Embeddings_ACCESS{i}_structure2vec.npy', aernn_embedding1)
        #np.save(f'{file_prefix}{i}_deepwalk_embedding.npy', deepwalk_embedding)
        #np.save(f'{file_prefix}{i}_node2vec_embedding.npy', node2vec_embedding)
        #np.save(f'{file_prefix}{i}_structure2vec_embedding.npy', structure2vec_embedding)

        print(f"Embeddings for {filename} saved.")

if __name__ == "__main__":
    main()


Processing Dynamicgraph_ACCESS1.csv...
Embeddings for Dynamicgraph_ACCESS1.csv saved.
Processing Dynamicgraph_ACCESS2.csv...
Embeddings for Dynamicgraph_ACCESS2.csv saved.
Processing Dynamicgraph_ACCESS3.csv...
Embeddings for Dynamicgraph_ACCESS3.csv saved.
Processing Dynamicgraph_ACCESS4.csv...
Embeddings for Dynamicgraph_ACCESS4.csv saved.
Processing Dynamicgraph_ACCESS5.csv...
Embeddings for Dynamicgraph_ACCESS5.csv saved.
Processing Dynamicgraph_ACCESS6.csv...
Embeddings for Dynamicgraph_ACCESS6.csv saved.
Processing Dynamicgraph_ACCESS7.csv...
Embeddings for Dynamicgraph_ACCESS7.csv saved.
