
# Work-through of [embeddings demo](https://neo4j.com/developer/graph-data-science/applied-graph-embeddings/)

### Setup looked like this (changed for beta version of node2vec)
Note: had to add

match (p:Place)
merge (c:Country{code:p.countryCode})<-[r:IN_COUNTRY]-(p)
return count(*);


//cypher
CALL gds.beta.node2vec.stream({
  nodeProjection: "Place",
  relationshipProjection: {
    eroad: {
      type: "EROAD",
      orientation: "UNDIRECTED"
    }
  },
  embeddingDimension: 10,
  iterations: 10,
  walkLength: 10
})
YIELD nodeId, embedding
RETURN gds.util.asNode(nodeId).name AS place, embedding
LIMIT 5;

CALL gds.beta.node2vec.write({
   nodeProjection: "Place",
   relationshipProjection: {
     eroad: {
       type: "EROAD",
       orientation: "UNDIRECTED"
    }
   },
   embeddingDimension: 10,
   iterations: 10,
   walkLength: 10,
   writeProperty: "embeddingNode2vec"
});

// shell
pip install neo4j sklearn altair


In [2]:
from neo4j import GraphDatabase
from sklearn.manifold import TSNE
import numpy as np
import altair as alt
import pandas as pd
import os

password = os.getenv("NEO4J_PWD")
driver = GraphDatabase.driver("bolt+s://cas-testlab-neo4j.co.uk:7687", auth=("scratchuser", password))
with driver.session(database="europeanroads") as session:
    result = session.run("""
    MATCH (p:Place)-[:IN_COUNTRY]->(country)
    WHERE country.code IN $countries
    RETURN p.name AS place, p.embeddingNode2vec AS embedding, country.code AS country
    """, {"countries": ["E", "GB", "F", "TR", "I", "D", "GR"]})
    X = pd.DataFrame([dict(record) for record in result])

print(X)

X_embedded = TSNE(n_components=2, random_state=6).fit_transform(list(X.embedding))

places = X.place
df = pd.DataFrame(data = {
    "place": places,
    "country": X.country,
    "x": [value[0] for value in X_embedded],
    "y": [value[1] for value in X_embedded]
})

          place                                          embedding country
0         Larne  [1.7676339149475098, -1.009367823600769, -0.13...      GB
1       Belfast  [1.8640823364257812, -1.2050737142562866, -0.3...      GB
2     La Coruña  [3.548208236694336, -1.2316635847091675, -1.19...       E
3    Pontevedra  [3.2116541862487793, -1.0502612590789795, -1.6...       E
4        Huelva  [2.93184232711792, -0.4424632787704468, -0.283...       E
..          ...                                                ...     ...
416       Vólos  [-0.6353455781936646, -1.5761232376098633, 0.7...      GR
417     Trapani  [0.8817242980003357, -1.5106432437896729, -1.7...       I
418    Merzifon  [2.2353477478027344, -0.885501503944397, -0.20...      TR
419      Gíthio  [0.004741339012980461, -2.0016746520996094, 0....      GR
420       Thíva  [2.1730356216430664, -1.2309520244598389, 0.02...      GR

[421 rows x 3 columns]




In [3]:
alt.Chart(df).mark_circle(size=60).encode(
    x='x',
    y='y',
    color='country',
    tooltip=['place', 'country']
).properties(width=700, height=400)