In [1]:
%%capture
import json
import networkx as nx
from tqdm.notebook import tqdm

In [2]:
with open('./data/aan_full.json') as f:
    full_set = json.load(f)
    
directed_citation_graph = nx.DiGraph()
for paper in full_set:
    for ref_id in paper['references']:
        directed_citation_graph.add_edge(paper['id'], ref_id)
        
undirected_citation_graph = directed_citation_graph.to_undirected()

In [3]:
nx.is_connected(undirected_citation_graph)

True

# aan_test_single.json

In [21]:
with open('./data/aan_test_single.json') as f:
    test_set = json.load(f)

In [22]:
results = []
personalization_dict = dict([(paper['id'], 0) for paper in full_set])

for input_paper in tqdm(test_set):
    result = {}
    result['input'] = [input_paper['id']]    
    
    personalization_dict[input_paper['id']] = 1
    candidate_scores = list(nx.pagerank(directed_citation_graph, personalization=personalization_dict).items())
    candidate_scores.sort(key=lambda x: x[1], reverse=True)
    filtered_candidate_scores = [cs for cs in candidate_scores if cs[0] != input_paper['id']]
    personalization_dict[input_paper['id']] = 0
    
    result['output'] = [cs[0] for cs in filtered_candidate_scores[:100]]
    results.append(result)

HBox(children=(FloatProgress(value=0.0, max=3000.0), HTML(value='')))




In [23]:
with open('./results/single_base_personalizedpagerank_directed.json', 'w') as f:
    json.dump(results, f)

In [24]:
results = []
personalization_dict = dict([(paper['id'], 0) for paper in full_set])

for input_paper in tqdm(test_set):
    result = {}
    result['input'] = [input_paper['id']]    
    
    personalization_dict[input_paper['id']] = 1
    candidate_scores = list(nx.pagerank(undirected_citation_graph, personalization=personalization_dict).items())
    candidate_scores.sort(key=lambda x: x[1], reverse=True)
    filtered_candidate_scores = [cs for cs in candidate_scores if cs[0] != input_paper['id']]
    personalization_dict[input_paper['id']] = 0
    
    result['output'] = [cs[0] for cs in filtered_candidate_scores[:100]]
    results.append(result)

HBox(children=(FloatProgress(value=0.0, max=3000.0), HTML(value='')))




In [25]:
with open('./results/single_base_personalizedpagerank_undirected.json', 'w') as f:
    json.dump(results, f)

# aan_test_triplet.json

In [3]:
with open('./data/aan_test_triplet.json') as f:
    test_set = json.load(f)

In [4]:
results = []
personalization_dict = dict([(paper['id'], 0) for paper in full_set])

for input_papers in tqdm(test_set):
    result = {}
    result['input'] = [input_paper['id'] for input_paper in input_papers]
    input_paper_ids_set = set(result['input'])
    
    for input_paper in input_papers:
        personalization_dict[input_paper['id']] = 1
    
    candidate_scores = list(nx.pagerank(directed_citation_graph, personalization=personalization_dict).items())
    candidate_scores.sort(key=lambda x: x[1], reverse=True)
    filtered_candidate_scores = [cs for cs in candidate_scores if cs[0] not in input_paper_ids_set]
    
    for input_paper in input_papers:
        personalization_dict[input_paper['id']] = 0
    
    result['output'] = [cs[0] for cs in filtered_candidate_scores[:100]]
    results.append(result)

HBox(children=(FloatProgress(value=0.0, max=3000.0), HTML(value='')))




In [5]:
with open('./results/triplet_base_personalizedpagerank_directed.json', 'w') as f:
    json.dump(results, f)

In [6]:
results = []
personalization_dict = dict([(paper['id'], 0) for paper in full_set])

for input_papers in tqdm(test_set):
    result = {}
    result['input'] = [input_paper['id'] for input_paper in input_papers]
    input_paper_ids_set = set(result['input'])
    
    for input_paper in input_papers:
        personalization_dict[input_paper['id']] = 1
    
    candidate_scores = list(nx.pagerank(undirected_citation_graph, personalization=personalization_dict).items())
    candidate_scores.sort(key=lambda x: x[1], reverse=True)
    filtered_candidate_scores = [cs for cs in candidate_scores if cs[0] not in input_paper_ids_set]
    
    for input_paper in input_papers:
        personalization_dict[input_paper['id']] = 0
    
    result['output'] = [cs[0] for cs in filtered_candidate_scores[:100]]
    results.append(result)

HBox(children=(FloatProgress(value=0.0, max=3000.0), HTML(value='')))




In [7]:
with open('./results/triplet_base_personalizedpagerank_undirected.json', 'w') as f:
    json.dump(results, f)

# aan_test_tripletfromref.json

In [4]:
with open('./data/aan_test_tripletfromref.json') as f:
    test_set = json.load(f)

In [5]:
results = []
personalization_dict = dict([(paper['id'], 0) for paper in full_set])

for input_papers in tqdm(test_set):
    result = {}
    result['input'] = [input_paper['id'] for input_paper in input_papers]
    input_paper_ids_set = set(result['input'])
    
    for input_paper in input_papers:
        personalization_dict[input_paper['id']] = 1
    
    candidate_scores = list(nx.pagerank_scipy(directed_citation_graph, personalization=personalization_dict).items())
    candidate_scores.sort(key=lambda x: x[1], reverse=True)
    filtered_candidate_scores = [cs for cs in candidate_scores if cs[0] not in input_paper_ids_set]
    
    for input_paper in input_papers:
        personalization_dict[input_paper['id']] = 0
    
    result['output'] = [cs[0] for cs in filtered_candidate_scores[:100]]
    results.append(result)

HBox(children=(FloatProgress(value=0.0, max=3000.0), HTML(value='')))




In [6]:
with open('./results/tripletfromref_base_personalizedpagerank_directed.json', 'w') as f:
    json.dump(results, f)

In [7]:
results = []
personalization_dict = dict([(paper['id'], 0) for paper in full_set])

for input_papers in tqdm(test_set):
    result = {}
    result['input'] = [input_paper['id'] for input_paper in input_papers]
    input_paper_ids_set = set(result['input'])
    
    for input_paper in input_papers:
        personalization_dict[input_paper['id']] = 1
    
    candidate_scores = list(nx.pagerank_scipy(undirected_citation_graph, personalization=personalization_dict).items())
    candidate_scores.sort(key=lambda x: x[1], reverse=True)
    filtered_candidate_scores = [cs for cs in candidate_scores if cs[0] not in input_paper_ids_set]
    
    for input_paper in input_papers:
        personalization_dict[input_paper['id']] = 0
    
    result['output'] = [cs[0] for cs in filtered_candidate_scores[:100]]
    results.append(result)

HBox(children=(FloatProgress(value=0.0, max=3000.0), HTML(value='')))




In [8]:
with open('./results/tripletfromref_base_personalizedpagerank_undirected.json', 'w') as f:
    json.dump(results, f)