In [4]:
import csv
import numpy as np
import networkx as nx
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split

weighted_edges = []

with open('directed.csv', 'r') as csvfile:
    reader = csv.reader(csvfile, delimiter=' ')
    for row in reader:
        weighted_edges.append((row[0], row[1], row[2]))

# 가중치가 있는 네트워크 생성
G = nx.Graph()
G.add_weighted_edges_from(weighted_edges)

In [38]:
# 네트워크 엣지를 리스트로 변환
edges = list(G.edges(data=True))
# 엣지와 아닌 경우를 생성
non_edges = list(nx.non_edges(G))

# 훈련 데이터와 테스트 데이터 분할
train_edges, test_edges = train_test_split(edges, test_size=0.3, random_state=42)

# 테스트용으로 선택된 엣지 제거
G_train = G.copy()
G_train.remove_edges_from([(u, v) for u, v, _ in test_edges])

In [6]:
print(edges)

[('#explore', '#travel', {'weight': '0.020833333333333332'}), ('#explore', '#nature', {'weight': '0.047619047619047616'}), ('#explore', '#photography', {'weight': '0.02702702702702703'}), ('#explore', '#food', {'weight': '0.023255813953488372'}), ('#explore', '#traveling', {'weight': '0.044444444444444446'}), ('#explore', '#adventure', {'weight': '0.08695652173913043'}), ('#explore', '#wanderlust', {'weight': '0.08695652173913043'}), ('#explore', '#landscape', {'weight': '0.045454545454545456'}), ('#explore', '#vacation', {'weight': '0.08695652173913043'}), ('#explore', '#foodie', {'weight': '0.013333333333333334'}), ('#explore', '#fitness', {'weight': '0.03773584905660377'}), ('#explore', '#fun', {'weight': '0.029411764705882353'}), ('#explore', '#trip', {'weight': '0.09090909090909091'}), ('#explore', '#travelgram', {'weight': '0.09090909090909091'}), ('#travel', '#trip', {'weight': '0.010416666666666666'}), ('#travel', '#vacation', {'weight': '0.020833333333333332'}), ('#travel', '#

In [7]:
print(non_edges)

[('#painting', '#adventure'), ('#painting', '#food'), ('#painting', '#foodporn'), ('#painting', '#running'), ('#painting', '#fitness'), ('#painting', '#dinner'), ('#painting', '#landscape'), ('#painting', '#workout'), ('#painting', '#friends'), ('#painting', '#beach'), ('#painting', '#love'), ('#painting', '#nutrition'), ('#painting', '#eat'), ('#painting', '#explore'), ('#painting', '#sunset'), ('#painting', '#memories'), ('#painting', '#yum'), ('#painting', '#foodie'), ('#painting', '#recipes'), ('#painting', '#travelgram'), ('#painting', '#fitfam'), ('#painting', '#fun'), ('#painting', '#delicious'), ('#painting', '#gym'), ('#painting', '#diet'), ('#painting', '#homemade'), ('#painting', '#chef'), ('#painting', '#happy'), ('#painting', '#motivation'), ('#painting', '#traveling'), ('#painting', '#health'), ('#painting', '#trip'), ('#painting', '#cooking'), ('#painting', '#training'), ('#painting', '#nature'), ('#painting', '#exercise'), ('#painting', '#smile'), ('#painting', '#vacati

In [41]:
def weighted_rwr(graph, start_node, restart_prob=0.15, max_iter=100, tol=1e-6):
    # 초기 확률 벡터 설정
    nodes = list(graph.nodes)
    n = len(nodes)
    idx = {node: i for i, node in enumerate(nodes)}
    p = np.zeros(n)
    p[idx[start_node]] = 1

    # 초기 분포 복사
    p0 = np.copy(p)
    adj_matrix = nx.to_numpy_array(graph, nodelist=nodes, weight='weight')

    # 정규화된 가중치 행렬
    row_sums = adj_matrix.sum(axis=1)
    norm_matrix = np.divide(adj_matrix, row_sums[:, np.newaxis], where=row_sums[:, np.newaxis] != 0)

    for _ in range(max_iter):
        p_new = (1 - restart_prob) * norm_matrix.T.dot(p) + restart_prob * p0
        if np.linalg.norm(p_new - p) < tol:
            break
        p = p_new
    return p


# 모든 노드에 대해 가중치를 반영한 RWR 적용
rwr_scores = {}
for node in G_train.nodes():
    rwr_scores[node] = weighted_rwr(G_train, node)

In [42]:
from sklearn.exceptions import UndefinedMetricWarning
import warnings

# 경고 무시 설정
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)


def get_link_score(rwr_scores, u, v, idx):
    return rwr_scores[u][idx[v]] + rwr_scores[v][idx[u]]


# 테스트 데이터의 노드 인덱스 매핑
nodes = list(G_train.nodes)
idx = {node: i for i, node in enumerate(nodes)}

# 테스트 데이터 예측 점수 계산
y_true = []
y_scores = []

for u, v, data in test_edges:
    y_true.append(1)
    y_scores.append(get_link_score(rwr_scores, u, v, idx))

# Negative samples 예측 점수 계산
for u, v in non_edges:
    if (u, v) not in G.edges() and (v, u) not in G.edges():
        y_true.append(0)
        y_scores.append(get_link_score(rwr_scores, u, v, idx))

# 임계값 설정 및 예측 레이블 생성
threshold = np.median(y_scores)  # 임계값을 중간 값으로 설정 (적절하게 조정 가능)
y_pred = [1 if score > threshold else 0 for score in y_scores]

# 성능 평가 지표 계산
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, zero_division=0)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
roc_auc = roc_auc_score(y_true, y_scores)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC AUC: {roc_auc:.4f}")

Accuracy: 0.6121
Precision: 0.2236
Recall: 1.0000
F1 Score: 0.3655
ROC AUC: 0.9607


In [15]:
print(y_pred)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [16]:
print(y_scores)

[0.03170890419191725, 0.057275240081233476, 0.05495711444290576, 0.04145039267760631, 0.059749344209383194, 0.030245154097628436, 0.0556531642569131, 0.027107847573019057, 0.04009883994013473, 0.039509579870615576, 0.025168165411364164, 0.0501984932127888, 0.026521266066028684, 0.050762017442930116, 0.043248587440343156, 0.04869789330279367, 0.03986840105331939, 0.03763176090943296, 0.041772613254003874, 0.03252261862954769, 0.03745802570923576, 0.03194488344247633, 0.03186935887396843, 0.02547987058198449, 0.03371358702206681, 0.0577599360881596, 0.04210178472053368, 0.05218367387191396, 0.038831170581877586, 0.03636057596672497, 0.05902987308573241, 0.029934053491401637, 0.06470229567509986, 0.031503970549064404, 0.05522207185205305, 0.04066592755194478, 0.06262970893888632, 0.04145437175565202, 0.0344969931167311, 0.07364979111242019, 0.041654297178444794, 0.028367351944060728, 0.04658617470543547, 0.033751959561226105, 0.02826070873133789, 0.05784480466816419, 0.028208903525662802,

In [17]:
print(y_true)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [18]:
print(rwr_scores)

{'#explore': array([0.18929477, 0.02343598, 0.03202202, 0.05159041, 0.01976937,
       0.04566113, 0.0201375 , 0.05261324, 0.01907164, 0.03882637,
       0.03029278, 0.02913553, 0.03020983, 0.04111659, 0.04690696,
       0.00494707, 0.00775335, 0.00604943, 0.00687774, 0.00242779,
       0.00587231, 0.00823716, 0.00630296, 0.00752292, 0.00559672,
       0.01176357, 0.01532491, 0.00789203, 0.01470782, 0.01471179,
       0.02038271, 0.01539142, 0.01039211, 0.01326783, 0.00726873,
       0.00893653, 0.0115916 , 0.01174994, 0.00498847, 0.00635014,
       0.01036039, 0.00522918, 0.00738101, 0.0039828 , 0.00754113,
       0.01098407, 0.00759918, 0.00610971, 0.00390492, 0.008539  ,
       0.00595524, 0.00157316, 0.00437995, 0.01006907]), '#travel': array([0.0108035 , 0.18047897, 0.02050399, 0.04486156, 0.01732962,
       0.01781406, 0.0063378 , 0.01537263, 0.0233978 , 0.0084505 ,
       0.04116342, 0.01574459, 0.02529321, 0.00678956, 0.00805565,
       0.0126382 , 0.01781024, 0.01441111, 0.013