In [2]:
from neo4j import GraphDatabase
from sklearn.manifold import TSNE
import numpy as np
import altair as alt
import pandas as pd
import IPython
alt.renderers.enable('default')

driver = GraphDatabase.driver("bolt://localhost:1549",

            auth=("neo4j", "00000000"))

with driver.session(database="neo4j") as session:
    result = session.run("""
    MATCH (p:Paintings)-[:IN_COLLECTION]->(collection)
    RETURN p.name AS painting, p.fastrpembedding AS embeddings, collection.name AS collections
    """)
    X = pd.DataFrame([dict(record) for record in result])

X_embedded = TSNE(n_components=2, random_state=6).fit_transform(list(X.embeddings))

collections = X.collections
df = pd.DataFrame(data = {
    "place": collections,
    # "country": X.country,
    "x": [value[0] for value in X_embedded],
    "y": [value[1] for value in X_embedded]
})
print(df)
c = alt.Chart(df).mark_circle(size=60).encode(
    x='x',
    y='y',
    color='place',
    # tooltip=['place']
).properties(width=700, height=400)
c.display()


                                     place          x          y
0                 Kunsthistorisches Museum  -1.397181  -3.982163
1    Department of Paintings of the Louvre -10.115881 -18.997402
2      Bavarian State Painting Collections  -9.578156   8.106401
3                       private collection  -9.244140  23.745750
4               Metropolitan Museum of Art -22.712250 -17.988382
..                                     ...        ...        ...
784    Bavarian State Painting Collections  -9.310609   8.044011
785                     private collection  -8.771601  23.860624
786               Residenzgalerie Salzburg   2.170697  -6.311157
787               Kunsthistorisches Museum  -1.395539  -3.983505
788                     private collection  -8.255746  27.410227

[789 rows x 3 columns]


In [None]:
'''
FastRP
dimensionality reduction while preserving most of the distance information
two nodes that have similar neighborhoods should be assigned similar embedding vectors


use (painting)-[in_collection]-(collection) triple to build embedding
here shows the paintings embedding in 2D (original is 10D)
But when adding more types of nodes, such as material, genre, the embedding becomes less reasonable. Many are equal to 0
'''


'''
Node2vec
similar to wrod2vec, random walk to sample
'''


'''
GraphSAGE
inductive learning
instead of learning embeddings for each node, it learns a function to generate embedding 
by sampling and aggregating features from a node’s local neighborhood. 

pro:do not need to trained the whole graph when adding new nodes
    can utilize the properties of nodes
    


requires the property must have a value, the data we collect from wikidata can not guranttee this
'''



In [3]:
'''
Besides embedding method, content-based similarity metrics could be an option for recommendation

We can calculate the Jaccard index for sets of properties to determine how similar two paintings are.

Suit for recommending the same type of entity
'''
with driver.session(database="neo4j") as session:
    result = session.run("""
    MATCH (m:Paintings {name: "King Caspar"})-[:IN_COLLECTION|GENRE|MATERIAL]-(t)-[:IN_COLLECTION|GENRE|MATERIAL]-(other:Paintings)
    WITH m, other, COUNT(t) AS intersection, COLLECT(t.name) AS i
    MATCH (m)-[:IN_COLLECTION|GENRE|MATERIAL]-(mt)
    WITH m,other, intersection,i, COLLECT(mt.name) AS s1
    MATCH (other)-[:IN_COLLECTION|GENRE|MATERIAL]-(ot)
    WITH m,other,intersection,i, s1, COLLECT(ot.name) AS s2

    WITH m,other,intersection,s1,s2

    WITH m,other,intersection,s1+[x IN s2 WHERE NOT x IN s1] AS union, s1, s2

    RETURN m.name, other.name, s1,s2,((1.0*intersection)/SIZE(union)) AS jaccard ORDER BY jaccard DESC LIMIT 100
    """)
    X_jacard = pd.DataFrame([dict(record) for record in result])
X_jacard




Unnamed: 0,m.name,other.name,s1,s2,jaccard
0,King Caspar,Old Man with Fur Coat,"[oak, Gemäldegalerie, oil paint, portrait]","[oil paint, Gemäldegalerie, portrait, Berlin S...",0.6
1,King Caspar,Bust of a Young Jew,"[oak, Gemäldegalerie, oil paint, portrait]","[Gemäldegalerie, Berlin State Museums, portrai...",0.6
2,King Caspar,Man with a Beard,"[oak, Gemäldegalerie, oil paint, portrait]","[portrait, Berlin State Museums, Gemäldegaleri...",0.6
3,King Caspar,Head of Christ,"[oak, Gemäldegalerie, oil paint, portrait]","[portrait, Berlin State Museums, oil paint, Ge...",0.5
4,King Caspar,Portrait of Cornelis Claeszoon Anslo and his w...,"[oak, Gemäldegalerie, oil paint, portrait]","[Berlin State Museums, Gemäldegalerie, canvas,...",0.5
...,...,...,...,...,...
95,King Caspar,Portrait of a Rabbi,"[oak, Gemäldegalerie, oil paint, portrait]","[oil paint, portrait, Royal Collection]",0.4
96,King Caspar,Portrait of a Woman,"[oak, Gemäldegalerie, oil paint, portrait]","[oil paint, portrait, private collection]",0.4
97,King Caspar,Young Woman with a Gold Chain,"[oak, Gemäldegalerie, oil paint, portrait]","[Museo Nacional de Bellas Artes, oil paint, po...",0.4
98,King Caspar,Portrait of a Bearded Man in a Wide-Brimmed Ha...,"[oak, Gemäldegalerie, oil paint, portrait]","[Norton Simon Museum, portrait, oil paint]",0.4


In [None]:
'''
besides Jaccard, other similarity in neo4j

Node Similarity

K-Nearest Neighbors

Approximate Nearest Neighbors

Cosine Similarity

Euclidean Similarity

Jaccard Similarity

Overlap Similarity

Pearson Similarity
'''


In [5]:
with driver.session(database="neo4j") as session:
    result = session.run("""
    MATCH (m:Paintings {name: "King Caspar"})-[:KEYWORD]-(t)-[:KEYWORD]-(other:Paintings)
    WITH m, other, COUNT(t) AS intersection, COLLECT(t.name) AS i
    MATCH (m)-[:KEYWORD]-(mt)
    WITH m,other, intersection,i, COLLECT(mt.name) AS s1
    MATCH (other)-[:KEYWORD]-(ot)
    WITH m,other,intersection,i, s1, COLLECT(ot.name) AS s2

    WITH m,other,intersection,s1,s2

    WITH m,other,intersection,s1+[x IN s2 WHERE NOT x IN s1] AS union, s1, s2

    RETURN m.name, other.name, s1,s2,((1.0*intersection)/SIZE(union)) AS jaccard ORDER BY jaccard DESC LIMIT 100
    """)
    X_jacard = pd.DataFrame([dict(record) for record in result])
X_jacard

Unnamed: 0,m.name,other.name,s1,s2,jaccard
0,King Caspar,Two moors,"[Saint Caspar, black people]","[black people, man]",0.333333
1,King Caspar,A Moor in a Turban,"[Saint Caspar, black people]","[man, turban, black people]",0.25
