In [1]:
import os
import torch
import math
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Input data
dpath = ".\\data\\"
gList = dict()
filenames = []

for root, dirs, files in os.walk(dpath):
    for file in files:
        file_path = os.path.join(root, file)
        if 'score' in file and "top" not in file: # Ignore Kadabra output
            filenames.append(file)
            gList[file] = dict()
            file_path = os.path.join(root,file) 
            with open(file_path,'r') as f:
                content = f.readlines()
                for line in content:
                    if 'com' not in file:
                        node_score = line[:-1].split('\t')
                    else:
                        continue # after finish all code run code with com
                        node_score = line[:-1].split(" ")
                    gList[file][int(node_score[0])] = float(node_score[1])

In [3]:
y = torch.tensor([list(gList['0_score.txt'].values())])
gt = torch.transpose(y,0,1)
_,gt_top = torch.topk(gt.view(-1),k=5)
gt_top

tensor([0, 5, 4, 1, 6])

In [4]:
kadabra = ".\\data\\"
kgList = dict()
kfilenames = []
for root, dirs, files in os.walk(dpath):
    for file in files:
        file_path = os.path.join(root, file)
        if "top" in file: # Only consider kadabra output
            if "com" in file: # Ignore com first
                continue
            kfilenames.append(file)
            kgList[file] = dict()
            kgList[file]['top'] = []

            file_path = os.path.join(root,file) 
            with open(file_path,'r') as f:
                content = f.readlines()
                kgList[file]['time'] = float(content[7][:-1].split()[2])
                for i in range(500):
                    if content[12+i][:-1].split()[0] != '?':
                        kgList[file]['top'].append(int(content[12+i][:-1].split()[1]))
                    else:
                        kgList[file]['top'].append(int(content[12+i][:-1].split()[2]))
                    

In [5]:
def topN(n,pred,gt):
    k = math.ceil(gt.size()[0]*10*n/100)
    pred_top = pred[:k]
    _,gt_top = torch.topk(gt.view(-1),k=k)
    intersect = torch.unique(torch.cat((pred_top,gt_top),0))
    acc = (2*k-len(intersect))/k
    return acc

In [10]:
acc_list = []
for i in range(30):
    f1 = str(i)+'_score.txt'
    f2 = str(i)+"_score_top500.txt"
    gt = torch.tensor([list(gList[f1].values())])
    gt = torch.transpose(y,0,1)
    pred = torch.tensor(kgList[f2]['top'])
    acc1 = topN(1,pred,gt)
    acc5 = topN(5,pred,gt)
    acc10 = topN(10,pred,gt)
    acc_list.append(acc)
    print("{} vs. {}: top-1% = {:.2f},top-5% = {:.2f},top-10% = {:.2f}".format(f1,f2,acc1,acc5,acc10))

0_score.txt vs. 0_score_top500.txt: top-1% = 0.90,top-5% = 1.00,top-10% = 1.00
1_score.txt vs. 1_score_top500.txt: top-1% = 0.60,top-5% = 0.99,top-10% = 1.00
2_score.txt vs. 2_score_top500.txt: top-1% = 0.60,top-5% = 0.99,top-10% = 1.00
3_score.txt vs. 3_score_top500.txt: top-1% = 0.56,top-5% = 0.99,top-10% = 1.00
4_score.txt vs. 4_score_top500.txt: top-1% = 0.59,top-5% = 0.99,top-10% = 1.00
5_score.txt vs. 5_score_top500.txt: top-1% = 0.57,top-5% = 0.99,top-10% = 1.00
6_score.txt vs. 6_score_top500.txt: top-1% = 0.57,top-5% = 0.99,top-10% = 1.00
7_score.txt vs. 7_score_top500.txt: top-1% = 0.58,top-5% = 0.99,top-10% = 1.00
8_score.txt vs. 8_score_top500.txt: top-1% = 0.58,top-5% = 0.99,top-10% = 1.00
9_score.txt vs. 9_score_top500.txt: top-1% = 0.59,top-5% = 0.99,top-10% = 1.00
10_score.txt vs. 10_score_top500.txt: top-1% = 0.60,top-5% = 0.99,top-10% = 1.00
11_score.txt vs. 11_score_top500.txt: top-1% = 0.57,top-5% = 0.99,top-10% = 1.00
12_score.txt vs. 12_score_top500.txt: top-1% = 0

In [7]:
torch.mean(torch.tensor(acc_list))

tensor(0.5949)

In [8]:
# Working on com
file_path = ".\\data\\youtube\\com-youtube_score.txt"
file = "com-youtube_score.txt"
with open(file_path,'r') as f:
    content = f.readlines()
    for line in content:
        node_score = line[:-1].split()
    gList[file][int(node_score[0][:-1])] = float(node_score[1])

In [33]:
file_path = ".\\data\\youtube\\com-youtube_score_top113489.txt"
file = "com-youtube_score_top113489.txt"
with open(file_path,'r') as f:
    content = f.readlines()
    kgList[file] = dict()
    kgList[file]['top'] = []
    kgList[file]['time'] = float(content[7][:-1].split()[2])
    for i in range(113489):
        if content[12+i][:-1].split()[0] != '?':
            kgList[file]['top'].append(int(content[12+i][:-1].split()[1]))
        else:
            kgList[file]['top'].append(int(content[12+i][:-1].split()[2]))

In [None]:
f1 = 'com-youtube_score.txt'
f2 = "com-youtube_score_top113489.txt"
gt = torch.tensor([list(gList[f1].values())])
gt = torch.transpose(y,0,1)
pred = torch.tensor(kgList[f2]['top'])
n = 10
acc = topN(n,pred,gt)
print("{} vs. {}: top{} = {}".format(f1,f2,n,acc))