In [404]:
import pandas as pd
from py2neo import Graph

In [405]:
graph = Graph('bolt://127.0.0.1:7687/db/data')

In [406]:
def to_df(result):
    return pd.DataFrame([r.values() for r in result], columns=result.keys())

## Data exploration

In [408]:
to_df(graph.run("MATCH (n) RETURN distinct labels(n), count(*)"))

Unnamed: 0,labels(n),count(*)
0,[Publication],50000
1,[Category],160
2,[Author],94540


In [410]:
to_df(graph.run("MATCH (p:Publication) RETURN p.id AS id, p.title AS title"))

Unnamed: 0,id,title
0,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN
1,http://arxiv.org/abs/1906.02738v1,Conversing by Reading: Contentful Neural Conve...
2,http://arxiv.org/abs/1906.02736v1,DeepMDP: Learning Continuous Latent Space Mode...
3,http://arxiv.org/abs/1906.02735v1,Residual Flows for Invertible Generative Modeling
4,http://arxiv.org/abs/1906.02611v1,Improving Robustness Without Sacrificing Accur...
5,http://arxiv.org/abs/1906.02732v1,A Look at the Effect of Sample Design on Gener...
6,http://arxiv.org/abs/1906.02729v1,3D-RelNet: Joint Object and Relational Network...
7,http://arxiv.org/abs/1906.02728v1,Feature-level and Model-level Audiovisual Fusi...
8,http://arxiv.org/abs/1906.02719v1,Learning Gaussian Graphical Models with Ordere...
9,http://arxiv.org/abs/1906.02717v1,Adaptive Gradient-Based Meta-Learning Methods


## Query relationships

In [411]:
pub = 'http://arxiv.org/abs/1906.02739v1'

In [412]:
query = """
MATCH (p1:Publication)-[r]-(n)
WHERE p1.id = {pub}
RETURN p1.title, type(r), n.name
"""

to_df(graph.run(query, pub=pub))

Unnamed: 0,p1.title,type(r),n.name
0,Mesh R-CNN,WROTE,Justin Johnson
1,Mesh R-CNN,WROTE,Jitendra Malik
2,Mesh R-CNN,WROTE,Georgia Gkioxari
3,Mesh R-CNN,HAS_PRIMARY_CATEGORY,cs.CV
4,Mesh R-CNN,HAS_CATEGORY,cs.CV


In [413]:
query = """
MATCH (p1:Publication)-[]-(rn)-[]-(p2:Publication)
WHERE p1.id = {pub}
RETURN p1.id, p1.title, count(rn) as n_relations, p2.id, p2.title
ORDER BY n_relations DESC
LIMIT 10
"""

to_df(graph.run(query, pub=pub))

Unnamed: 0,p1.id,p1.title,n_relations,p2.id,p2.title
0,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,6,http://arxiv.org/abs/1903.09977v2,Joint Learning of Discriminative Low-dimension...
1,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,5,http://arxiv.org/abs/1904.03239v1,ShapeMask: Learning to Segment Novel Objects b...
2,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,5,http://arxiv.org/abs/1811.12373v1,Diverse Image Synthesis from Semantic Layouts ...
3,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,5,http://arxiv.org/abs/1901.01971v2,Learning Independent Object Motion from Unlabe...
4,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,5,http://arxiv.org/abs/1809.02882v1,Cost-Sensitive Active Learning for Intracrania...
5,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,5,http://arxiv.org/abs/1904.01201v1,Habitat: A Platform for Embodied AI Research
6,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,5,http://arxiv.org/abs/1812.00940v1,Visual Memory for Robust Path Following
7,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,5,http://arxiv.org/abs/1905.13214v1,On Network Design Spaces for Visual Recognition
8,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,5,http://arxiv.org/abs/1702.03920v3,Cognitive Mapping and Planning for Visual Navi...
9,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,5,http://arxiv.org/abs/1812.03982v2,SlowFast Networks for Video Recognition


## Compute similarities

In [414]:
query = """
MATCH (pub:Publication)-[]-(rel)
WITH {item: id(pub), categories: collect(id(rel))} as pubData
WITH collect(pubData) as data

CALL algo.similarity.jaccard(data, {concurrency:1, similarityCutoff: 0.1, write:false})
YIELD nodes, similarityPairs, write, writeRelationshipType, writeProperty, min, max, mean, stdDev, p25, p50, p75, p90, p95, p99, p999, p100

RETURN nodes, similarityPairs, write, writeRelationshipType, writeProperty, min, max, mean, p95
"""

to_df(graph.run(query, pub=pub))


Unnamed: 0,nodes,similarityPairs,write,writeRelationshipType,writeProperty,min,max,mean,p95
0,50000,126472142,False,SIMILAR,score,0.1,1.000007,0.197844,0.333333


In [416]:
query = """
MATCH (pub:Publication)-[]-(rel), (from:Publication {id: {pub}})
WITH {item: id(pub), categories: collect(id(rel))} as pubData, [id(from)] as sourceIds
WITH collect(pubData) as data, sourceIds

CALL algo.similarity.overlap.stream(data, {sourceIds: sourceIds, concurrency:1, similarityCutoff: 0.1})
YIELD item1, item2, similarity
WITH algo.getNodeById(item1) AS from, algo.getNodeById(item2) AS to, similarity

RETURN from.id, from.title, to.id, to.title, similarity
ORDER BY similarity DESC
LIMIT 100
"""

to_df(graph.run(query, pub=pub))


Unnamed: 0,from.id,from.title,to.id,to.title,similarity
0,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1812.01601v3,Learning 3D Human Dynamics from Video,0.6
1,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1811.12373v1,Diverse Image Synthesis from Semantic Layouts ...,0.6
2,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1905.13214v1,On Network Design Spaces for Visual Recognition,0.6
3,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1806.03265v2,PatchFCN for Intracranial Hemorrhage Detection,0.6
4,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1904.03461v1,Embodied Question Answering in Photorealistic ...,0.6
5,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1904.03239v1,ShapeMask: Learning to Segment Novel Objects b...,0.6
6,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1809.02882v1,Cost-Sensitive Active Learning for Intracrania...,0.6
7,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1905.07553v2,Which Tasks Should Be Learned Together in Mult...,0.6
8,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1702.03920v3,Cognitive Mapping and Planning for Visual Navi...,0.6
9,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1904.01201v1,Habitat: A Platform for Embodied AI Research,0.6


In [425]:
compared_to = [
    'http://arxiv.org/abs/1906.01792v1',
    'http://arxiv.org/abs/1906.02698v1',
    'http://arxiv.org/abs/1812.01601v3',
    'http://arxiv.org/abs/1906.02739v1',
]

query = """
MATCH (pub:Publication)-[]-(rel),
      (from:Publication),
      (to:Publication)
WHERE from.id = {pub} AND to.id IN {to}

WITH {item: id(pub), categories: collect(id(rel))} as pubData, [id(from)] as sourceIds, [id(to)] as targetIds
WITH collect(pubData) as data, sourceIds, targetIds

CALL algo.similarity.overlap.stream(data, {sourceIds: sourceIds, targetIds: targetIds, concurrency:1, similarityCutoff: 0.1})
YIELD item1, item2, similarity
WITH algo.getNodeById(item1) AS from, algo.getNodeById(item2) AS to, similarity

RETURN from.id, from.title, to.id, to.title, similarity
ORDER BY similarity DESC
LIMIT 100
"""

to_df(graph.run(query, pub=pub, to=compared_to))


Unnamed: 0,from.id,from.title,to.id,to.title,similarity
0,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1812.01601v3,Learning 3D Human Dynamics from Video,0.6
1,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1906.01792v1,PAC-GAN: An Effective Pose Augmentation Scheme...,0.4
2,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1906.02698v1,Training large-scale ANNs on simulated resisti...,0.0
