In [None]:
# 필요한 모든 라이브러리 임포트
import torch
import torch_geometric
from torch_geometric.datasets import Planetoid
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from gensim.models import Word2Vec
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
import optuna

# CORA 데이터셋 로드
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]

# torch_geometric의 데이터를 NetworkX 그래프로 변환
edge_index = data.edge_index
edges = edge_index.t().numpy()
G = nx.from_edgelist(edges, create_using=nx.Graph())

In [None]:
# torch_geometric의 데이터를 NetworkX 그래프로 변환
edge_index = data.edge_index
edges = edge_index.t().numpy()
graph_nx = nx.from_edgelist(edges, create_using=nx.Graph())

# 중심 노드를 선택 (여기서는 0을 선택)
center_nodes = [0]

# 모든 이웃 노드를 저장할 리스트 초기화
all_neighbors = []

# 중심 노드들의 모든 이웃 노드를 all_neighbors에 추가
for selected_node in center_nodes:
    neighbors = list(graph_nx.neighbors(selected_node))
    all_neighbors += neighbors

# 중심 노드들도 all_neighbors에 추가
all_neighbors += center_nodes

# all_neighbors에 포함된 모든 노드들만으로 구성된 subgraph를 추출
subgraph_nx1 = graph_nx.subgraph(all_neighbors)

# 그래프 그리기
pos = nx.spring_layout(subgraph_nx1, seed=42)  # Spring layout 사용
nx.draw(subgraph_nx1, pos, with_labels=True, node_color="skyblue")

plt.show()

In [None]:
# torch_geometric의 데이터를 NetworkX 그래프로 변환
edge_index = data.edge_index
edges = edge_index.t().numpy()
graph_nx = nx.from_edgelist(edges, create_using=nx.Graph())

# 중심 노드를 선택 (여기서는 1666을 선택)
center_nodes = [1666]

# 모든 이웃 노드를 저장할 리스트 초기화
all_neighbors = []

# 중심 노드들의 모든 이웃 노드를 all_neighbors에 추가
for selected_node in center_nodes:
    neighbors = list(graph_nx.neighbors(selected_node))
    all_neighbors += neighbors

    # 이웃의 이웃(1차 이웃)도 all_neighbors에 추가
    for neighbor in neighbors:
        neighbors_of_neighbor = list(graph_nx.neighbors(neighbor))
        all_neighbors += neighbors_of_neighbor

# 중심 노드들도 all_neighbors에 추가
all_neighbors += center_nodes

# 중복을 제거하기 위해 all_neighbors를 set로 변환 후 다시 list로 변환
all_neighbors = list(set(all_neighbors))

# all_neighbors에 포함된 모든 노드들만으로 구성된 subgraph를 추출
subgraph_nx2 = graph_nx.subgraph(all_neighbors)

# 그래프 그리기
pos = nx.spring_layout(subgraph_nx2, seed=42)  # Spring layout 사용
nx.draw(subgraph_nx2, pos, with_labels=True, node_color="skyblue")

plt.show()

In [None]:
# biased random walk 코드 

import networkx as nx
import numpy as np
import random

def biased_random_walk(G, start_node, walk_length, p=1, q=1):
    walk = [start_node]

    while len(walk) < walk_length:
        cur_node = walk[-1]
        cur_neighbors = list(G.neighbors(cur_node))

        if len(cur_neighbors) > 0:
            if len(walk) == 1:
                walk.append(random.choice(cur_neighbors))
            else:
                prev_node = walk[-2]

                probability = []
                for neighbor in cur_neighbors:
                    if neighbor == prev_node:
                        # Return parameter
                        probability.append(1/p)
                    elif G.has_edge(neighbor, prev_node):
                        # Stay parameter
                        probability.append(1)
                    else:
                        # In-out parameter
                        probability.append(1/q)

                probability = np.array(probability)
                probability = probability / probability.sum()  # normalize

                next_node = np.random.choice(cur_neighbors, p=probability)
                walk.append(next_node)
        else:
            break

    return walk

In [None]:
def generate_walks(G, num_walks, walk_length, p=1, q=1):
    walks = []
    nodes = list(G.nodes())
    for _ in range(num_walks):
        random.shuffle(nodes)  # to ensure randomness
        for node in nodes:
            walk_from_node = biased_random_walk(G, node, walk_length, p, q)
            walks.append(walk_from_node)
    return walks

In [None]:
generate_walks(subgraph_nx2, 2, 8, p=0.8, q=0.2)

In [None]:
# Random Walk 생성
walks = generate_walks(G, num_walks=10, walk_length=20, p=9, q=1)

# String 형태로 변환 (Word2Vec 입력을 위해)
walks = [[str(node) for node in walk] for walk in walks]

# Word2Vec 학습
model = Word2Vec(walks, vector_size=128, window=5, min_count=0,  hs=1, sg=1, workers=4, epochs=10)

# 노드 임베딩 추출
embeddings = np.array([model.wv.get_vector(str(i)) for i in range(data.num_nodes)])

In [None]:
# 이제 각 노드는 128차원의 vector 를 가지게 됩니다.
node_id = '2'  # 노드 한 개를 살펴볼까요?
vector = model.wv[node_id]

In [None]:
vector

In [None]:
# 레이블이 있는 노드만 선택
labels = data.y.numpy()
idx_train = data.train_mask.numpy()
idx_test = data.test_mask.numpy()

X_train, y_train = embeddings[idx_train], labels[idx_train]
X_test, y_test = embeddings[idx_test], labels[idx_test]

# 랜덤 포레스트 분류기 학습
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

# 예측 및 성능 평가
y_pred = rf.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 10, 1000)
    max_depth = trial.suggest_int('max_depth', 1, 100)
    
    rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    rf.fit(X_train, y_train)

    y_pred = rf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

rf_best = RandomForestClassifier(n_estimators=study.best_params['n_estimators'], 
                                 max_depth=study.best_params['max_depth'], 
                                 random_state=42)
rf_best.fit(X_train, y_train)

y_pred = rf_best.predict(X_test)
print(classification_report(y_test, y_pred))