In [6]:
%%capture
import json
import networkx as nx
from tqdm.notebook import tqdm

In [7]:
with open('./datasets/train_set_aan.json') as f:
    train_set = json.load(f)
    
with open('./datasets/test_set_aan.json') as f:
    test_set = json.load(f)
    
full_set = train_set + test_set

In [8]:
citation_graph = nx.DiGraph()
for paper in train_set:
    for ref_id in paper['references']:
        citation_graph.add_edge(paper['id'], ref_id)
        
len(citation_graph)

12420

In [12]:
len(test_set)

3085

In [13]:
# remove papers in test set with no references in train set
train_ids_set = set([paper['id'] for paper in train_set])

test_set = [paper for paper in test_set if not train_ids_set.isdisjoint(set(paper['references']))]

len(test_set)

3030

In [28]:
ground_truth = dict([(ref['id'], ref['references']) for ref in test_set])
recommendation = {}
personalization_dict = dict([(paper_id, 0) for paper_id in train_ids_set])

for input_paper in tqdm(test_set):
    # update graph with references of input_paper
    for ref_id in input_paper['references']:
        if ref_id in train_ids_set:
            citation_graph.add_edge(input_paper['id'], ref_id)    
    
    # compute personalized page rank and get results
    personalization_dict[input_paper['id']] = 1
    candidate_scores = list(nx.pagerank(citation_graph, personalization=personalization_dict).items())
    candidate_scores.sort(key=lambda x: x[1], reverse=True)
    recommendation[input_paper['id']] = [cs[0] for cs in candidate_scores[:101] if cs[0] != input_paper['id']]
    
    # clean graph and personalization dict
    personalization_dict.pop(input_paper['id'])
    citation_graph.remove_node(input_paper['id'])    

len(ground_truth) - len(recommendation)

HBox(children=(FloatProgress(value=0.0, max=3030.0), HTML(value='')))




0

In [29]:
with open('./evaloffsets/base_personalizedpagerank_gt.json', 'w') as f:
    json.dump(ground_truth, f)
    
with open('./evaloffsets/base_personalizedpagerank_rec.json', 'w') as f:
    json.dump(recommendation, f)