In [387]:
import pandas as pd
from py2neo import Graph

In [388]:
graph = Graph('bolt://127.0.0.1:7687/db/data')

In [389]:
def to_df(result):
    return pd.DataFrame([r.values() for r in result], columns=result.keys())

## Data exploration

In [390]:
to_df(graph.run("MATCH (n) RETURN distinct labels(n), count(*)"))

Unnamed: 0,labels(n),count(*)
0,[Publication],50000
1,[Category],2243
2,[Author],94540
3,[Affiliation],1495


In [391]:
to_df(graph.run("MATCH (p:Publication) RETURN p.id AS id, p.title AS title"))

Unnamed: 0,id,title
0,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN
1,http://arxiv.org/abs/1906.02738v1,Conversing by Reading: Contentful Neural Conve...
2,http://arxiv.org/abs/1906.02736v1,DeepMDP: Learning Continuous Latent Space Mode...
3,http://arxiv.org/abs/1906.02735v1,Residual Flows for Invertible Generative Modeling
4,http://arxiv.org/abs/1906.02611v1,Improving Robustness Without Sacrificing Accur...
5,http://arxiv.org/abs/1906.02732v1,A Look at the Effect of Sample Design on Gener...
6,http://arxiv.org/abs/1906.02729v1,3D-RelNet: Joint Object and Relational Network...
7,http://arxiv.org/abs/1906.02728v1,Feature-level and Model-level Audiovisual Fusi...
8,http://arxiv.org/abs/1906.02719v1,Learning Gaussian Graphical Models with Ordere...
9,http://arxiv.org/abs/1906.02717v1,Adaptive Gradient-Based Meta-Learning Methods


## Query relationships

In [392]:
pub = 'http://arxiv.org/abs/1906.02739v1'

In [393]:
query = """
MATCH (p1:Publication)-[r]-(n)
WHERE p1.id = {pub}
RETURN p1.title, type(r), n.name
"""

to_df(graph.run(query, pub=pub))

Unnamed: 0,p1.title,type(r),n.name
0,Mesh R-CNN,HAS_PRIMARY_CATEGORY,cs.CV
1,Mesh R-CNN,HAS_CATEGORY,cs.CV


In [394]:
query = """
MATCH (p1:Publication)-[]-(rn)-[]-(p2:Publication)
WHERE p1.id = {pub}
RETURN p1.id, p1.title, count(rn) as n_relations, p2.id, p2.title
ORDER BY n_relations DESC
LIMIT 10
"""

to_df(graph.run(query, pub=pub))

Unnamed: 0,p1.id,p1.title,n_relations,p2.id,p2.title
0,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,6,http://arxiv.org/abs/1903.09977v2,Joint Learning of Discriminative Low-dimension...
1,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,4,http://arxiv.org/abs/0906.2716v1,Maximal digital straight segments and converge...
2,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,4,http://arxiv.org/abs/0906.2770v1,Combinatorial pyramids and discrete geometry f...
3,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,4,http://arxiv.org/abs/0905.2463v2,Generalized Kernel-based Visual Tracking
4,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,4,http://arxiv.org/abs/0906.4131v2,Automatic Spatially-Adaptive Balancing of Ener...
5,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,4,http://arxiv.org/abs/0906.3770v1,Automatic Defect Detection and Classification ...
6,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,4,http://arxiv.org/abs/0906.0434v1,"Total Variation, Adaptive Total Variation and ..."
7,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,4,http://arxiv.org/abs/0906.3068v1,Deformable Model with a Complexity Independent...
8,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,4,http://arxiv.org/abs/0906.3323v1,Adaptive Regularization of Ill-Posed Problems:...
9,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,4,http://arxiv.org/abs/0906.4789v1,Efficient IRIS Recognition through Improvement...


## Compute similarities

In [395]:
query = """
MATCH (pub:Publication)-[]-(rel)
WITH {item: id(pub), categories: collect(id(rel))} as pubData
WITH collect(pubData) as data

CALL algo.similarity.jaccard(data, {concurrency:1, similarityCutoff: 0.1, write:false})
YIELD nodes, similarityPairs, write, writeRelationshipType, writeProperty, min, max, mean, stdDev, p25, p50, p75, p90, p95, p99, p999, p100

RETURN nodes, similarityPairs, write, writeRelationshipType, writeProperty, min, max, mean, p95
"""

to_df(graph.run(query, pub=pub))


Unnamed: 0,nodes,similarityPairs,write,writeRelationshipType,writeProperty,min,max,mean,p95
0,50000,102331790,False,SIMILAR,score,0.1,1.000007,0.203534,0.428571


In [397]:
query = """
MATCH (pub:Publication)-[]-(rel), (from:Publication {id: {pub}})
WITH {item: id(pub), categories: collect(id(rel))} as pubData, [id(from)] as sourceIds
WITH collect(pubData) as data, sourceIds

CALL algo.similarity.overlap.stream(data, {sourceIds: sourceIds, concurrency:1, similarityCutoff: 0.1})
YIELD item1, item2, similarity
WITH algo.getNodeById(item1) AS from, algo.getNodeById(item2) AS to, similarity

RETURN from.id, from.title, to.id, to.title, similarity
ORDER BY similarity DESC
LIMIT 10
"""

to_df(graph.run(query, pub=pub))


Unnamed: 0,from.id,from.title,to.id,to.title,similarity
0,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1905.08748v2,RIU-Net: Embarrassingly simple semantic segmen...,1.0
1,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1906.02728v1,Feature-level and Model-level Audiovisual Fusi...,1.0
2,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1906.02729v1,3D-RelNet: Joint Object and Relational Network...,1.0
3,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1901.07213v3,A Fully Convolutional Network for Rectal Cance...,1.0
4,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1906.02659v1,Does Object Recognition Work for Everyone?,1.0
5,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1906.02634v1,Scaling Autoregressive Video Models,1.0
6,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1906.01675v2,4-D Scene Alignment in Surveillance Video,1.0
7,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1905.02706v2,Learning Unsupervised Multi-View Stereopsis vi...,1.0
8,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1906.02549v1,Weakly-Supervised Spatio-Temporally Grounding ...,1.0
9,http://arxiv.org/abs/1906.02739v1,Mesh R-CNN,http://arxiv.org/abs/1905.01235v2,Scaling and Benchmarking Self-Supervised Visua...,1.0
