In [2]:
import os, sys
import numpy as np

import networkx as nx
from sklearn import metrics

### Calculating metrics only for predicted edges

In [33]:
# AUC for (a) and (b), computing only over missing edges
edges_true_a = [1,0,0]
edges_true_b = [1,1,0]
edges_pred = [1,1,1] # following provided heuristic

print("AUC score for Case A by computing only over missing edges is: ", 
                                  metrics.roc_auc_score(edges_true_a, edges_pred))

print("AUC score for Case B by computing only over missing edges is: ", 
                                  metrics.roc_auc_score(edges_true_b, edges_pred))

AUC score for Case A by computing only over missing edges is:  0.5
AUC score for Case B by computing only over missing edges is:  0.5


In [34]:
# F1 score for (a) and (b), computing only over missing edges
print("F1 score for Case A by computing only over missing edges is: ", 
                                  metrics.f1_score(edges_true_a, edges_pred))

print("F1 score for Case B by computing only over missing edges is: ", 
                                  metrics.f1_score(edges_true_b, edges_pred))

F1 score for Case A by computing only over missing edges is:  0.5
F1 score for Case B by computing only over missing edges is:  0.8


In [36]:
# construct the provided graph
g_train = nx.Graph()
g_true1 = nx.Graph()
g_true2 = nx.Graph()

for i in range(12):
    g_train.add_node(i)
    g_true1.add_node(i)
    g_true2.add_node(i)
   
g_train.add_edge(0,1)
g_train.add_edge(0,2)
g_train.add_edge(0,3)
g_train.add_edge(4,5)
g_train.add_edge(4,6)
g_train.add_edge(4,7)
g_train.add_edge(8,9)
g_train.add_edge(8,10)
g_train.add_edge(8,11)


# True Graph for (a)
g_true1.add_edge(0,1)
g_true1.add_edge(0,2)
g_true1.add_edge(0,3)
g_true1.add_edge(0,4) #missing edge in the input
g_true1.add_edge(4,5)
g_true1.add_edge(4,6)
g_true1.add_edge(4,7)
g_true1.add_edge(8,9)
g_true1.add_edge(8,10)
g_true1.add_edge(8,11)


# True Graph for (b)
g_true2.add_edge(0,1)
g_true2.add_edge(0,2)
g_true2.add_edge(0,3)
g_true2.add_edge(0,4) #missing edge in the input
g_true2.add_edge(4,5)
g_true2.add_edge(4,6)
g_true2.add_edge(4,7)
g_true2.add_edge(4,8) #missing edge in the input
g_true2.add_edge(8,9)
g_true2.add_edge(8,10)
g_true2.add_edge(8,11)

### Calculating metrics for all edges

In [37]:
def overall_metrics(g_train, g_true):
    
    data_list = list(nx.preferential_attachment(g_train)) # 3-tuple as output

    pred_edges = []
    true_edges = []
    for u,v,p in data_list:
        true_edges.append(g_true.has_edge(u,v))
        pred_edges.append(p > 5) # threshold for link prediction is attachment score of 5+
    
    auc_score = np.round(metrics.roc_auc_score(true_edges, pred_edges), 4)
    f1_score = np.round(metrics.f1_score(true_edges, pred_edges), 9)
    
    return(auc_score, f1_score)

In [38]:
print("AUC score for Case A by computing over all edges is: ", 
                                  overall_metrics(g_train, g_true1)[0])

print("AUC score for Case B by computing only over missing edges is: ", 
                                  overall_metrics(g_train, g_true2)[0])

AUC score for Case A by computing over all edges is:  0.9821
AUC score for Case B by computing only over missing edges is:  0.9909


In [40]:
# F1 score for (a) and (b), computing only over missing edges
print("F1 score for Case A by computing only over missing edges is: ", 
                                  overall_metrics(g_train, g_true1)[1])

print("F1 score for Case B by computing only over missing edges is: ", 
                                  overall_metrics(g_train, g_true2)[1])

F1 score for Case A by computing only over missing edges is:  0.5
F1 score for Case B by computing only over missing edges is:  0.8


In [13]:
# visualizing graph correctness
# pos = nx.spring_layout(g_train)
# nx.draw(
#   g_train, pos, edge_color='black', width=1, linewidths=1,
#   node_size=300, node_color='pink', alpha=0.9,
#   labels={node: node for node in g_train.nodes()}
# )

# pos = nx.spring_layout(g_test2)
# nx.draw(
#   g_test2, pos, edge_color='black', width=1, linewidths=1,
#   node_size=300, node_color='pink', alpha=0.9,
#   labels={node: node for node in g_test2.nodes()}
# )