In [1]:
import pandas as pd
import numpy as np
from neo4j import GraphDatabase
from raw_generation_nosplit import ArtGraphNoSplit
from raw_generation import ArtGraphWithSplit
import os

In [2]:
conf = {
    "uri": "bolt://localhost:7687",
    "username": "neo4j",
    "password": 'neo4j',
    "database": "recsys"
}

In [3]:
driver = GraphDatabase.driver(uri = "bolt://localhost:7687", auth = (conf['username'], conf['password']))#place here credentials

In [4]:
with driver.session(database=conf['database']) as session:#place here the database name (not project name)
    node_types = session.run("MATCH(n) RETURN  DISTINCT labels(n)[0] as typen")#getting all node types
    node_types = [record['typen'] for record in node_types]#extracting data into a list
mapping_queries = {node.lower(): f"MATCH (n:{node}) RETURN n.name as name" for node in node_types}#generating queries for node types
mapping_queries['rel'] = "MATCH (n)-[r]-(n2) RETURN DISTINCT toLower(type(r)) as rel_label"#generating queries for edge types
mapping_queries

{'artwork': 'MATCH (n:Artwork) RETURN n.name as name',
 'emotion': 'MATCH (n:Emotion) RETURN n.name as name',
 'media': 'MATCH (n:Media) RETURN n.name as name',
 'tag': 'MATCH (n:Tag) RETURN n.name as name',
 'artist': 'MATCH (n:Artist) RETURN n.name as name',
 'city': 'MATCH (n:City) RETURN n.name as name',
 'country': 'MATCH (n:Country) RETURN n.name as name',
 'gallery': 'MATCH (n:Gallery) RETURN n.name as name',
 'style': 'MATCH (n:Style) RETURN n.name as name',
 'genre': 'MATCH (n:Genre) RETURN n.name as name',
 'period': 'MATCH (n:Period) RETURN n.name as name',
 'serie': 'MATCH (n:Serie) RETURN n.name as name',
 'subject': 'MATCH (n:Subject) RETURN n.name as name',
 'field': 'MATCH (n:Field) RETURN n.name as name',
 'movement': 'MATCH (n:Movement) RETURN n.name as name',
 'training': 'MATCH (n:Training) RETURN n.name as name',
 'people': 'MATCH (n:People) RETURN n.name as name',
 'user': 'MATCH (n:User) RETURN n.name as name',
 'rel': 'MATCH (n)-[r]-(n2) RETURN DISTINCT toLower(

In [5]:
node_id_mapping_queries = {node.lower(): f"MATCH (n:{node}) RETURN n.name, ID(n) as nodeId" for node in node_types}
node_id_mapping_queries

{'artwork': 'MATCH (n:Artwork) RETURN n.name, ID(n) as nodeId',
 'emotion': 'MATCH (n:Emotion) RETURN n.name, ID(n) as nodeId',
 'media': 'MATCH (n:Media) RETURN n.name, ID(n) as nodeId',
 'tag': 'MATCH (n:Tag) RETURN n.name, ID(n) as nodeId',
 'artist': 'MATCH (n:Artist) RETURN n.name, ID(n) as nodeId',
 'city': 'MATCH (n:City) RETURN n.name, ID(n) as nodeId',
 'country': 'MATCH (n:Country) RETURN n.name, ID(n) as nodeId',
 'gallery': 'MATCH (n:Gallery) RETURN n.name, ID(n) as nodeId',
 'style': 'MATCH (n:Style) RETURN n.name, ID(n) as nodeId',
 'genre': 'MATCH (n:Genre) RETURN n.name, ID(n) as nodeId',
 'period': 'MATCH (n:Period) RETURN n.name, ID(n) as nodeId',
 'serie': 'MATCH (n:Serie) RETURN n.name, ID(n) as nodeId',
 'subject': 'MATCH (n:Subject) RETURN n.name, ID(n) as nodeId',
 'field': 'MATCH (n:Field) RETURN n.name, ID(n) as nodeId',
 'movement': 'MATCH (n:Movement) RETURN n.name, ID(n) as nodeId',
 'training': 'MATCH (n:Training) RETURN n.name, ID(n) as nodeId',
 'people':

In [6]:
with driver.session(database=conf['database']) as session:
    triplets = session.run("MATCH p=(a)-[r]->(b) RETURN DISTINCT labels(a)[0] as source, type(r) as relation,  labels(b)[0] as destination")
    triplets = [(t['source'], t['relation'], t['destination']) for t in triplets]
relation_queries = {str(tuple(map(lambda x:x.lower(),t))): f"MATCH (a:{t[0]})-[r:{t[1]}]->(b:{t[2]}) RETURN a.name as source_name, b.name as dest_name"
                    for t in triplets}
relation_queries

{"('artwork', 'hasgenre', 'genre')": 'MATCH (a:Artwork)-[r:hasGenre]->(b:Genre) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'hasstyle', 'style')": 'MATCH (a:Artwork)-[r:hasStyle]->(b:Style) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'locatedin', 'gallery')": 'MATCH (a:Artwork)-[r:locatedIn]->(b:Gallery) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'locatedin', 'country')": 'MATCH (a:Artwork)-[r:locatedIn]->(b:Country) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'locatedin', 'city')": 'MATCH (a:Artwork)-[r:locatedIn]->(b:City) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'createdby', 'artist')": 'MATCH (a:Artwork)-[r:createdBy]->(b:Artist) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'about', 'tag')": 'MATCH (a:Artwork)-[r:about]->(b:Tag) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'madeof', 'media')": 'MATCH (a:Artwork)-[r:madeOf]->(b:M

In [7]:
#here is the change
relation_queries["('artwork', 'elicits', 'emotion')"] = """
match(a:Artwork)-[r]-(e:Emotion)
with a, sum(r.arousal) as sum_arousal, e
with a, max(sum_arousal) as max_arousal
match(a)-[r2]-(e2:Emotion)
with a, sum(r2.arousal) as sum2, e2, max_arousal
where sum2 = max_arousal
return a.name as source_name, collect(e2.name)[0] as dest_name
"""
relation_queries

{"('artwork', 'hasgenre', 'genre')": 'MATCH (a:Artwork)-[r:hasGenre]->(b:Genre) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'hasstyle', 'style')": 'MATCH (a:Artwork)-[r:hasStyle]->(b:Style) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'locatedin', 'gallery')": 'MATCH (a:Artwork)-[r:locatedIn]->(b:Gallery) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'locatedin', 'country')": 'MATCH (a:Artwork)-[r:locatedIn]->(b:Country) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'locatedin', 'city')": 'MATCH (a:Artwork)-[r:locatedIn]->(b:City) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'createdby', 'artist')": 'MATCH (a:Artwork)-[r:createdBy]->(b:Artist) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'about', 'tag')": 'MATCH (a:Artwork)-[r:about]->(b:Tag) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'madeof', 'media')": 'MATCH (a:Artwork)-[r:madeOf]->(b:M

In [8]:
# here is the second change, in order to query also the weights

relation_queries["('user', 'rates', 'artwork')"] = 'MATCH (a:User)-[r:rates]->(b:Artwork) RETURN a.name as source_name, b.name as dest_name, r.rating as weight'

In [9]:
stat_queries = {
"node_labels_stats" : "MATCH (n) RETURN distinct labels(n) as node_label, count(*) as count",
"rel_labels_stats" : "MATCH (n)-[r]->(n2) RETURN distinct type(r) as rel_label, count(*) as count",
"triplet-type-list" : "MATCH (x)-[r]->(y) RETURN distinct HEAD(labels(x)) as head, type(r), head(labels(y)) as tail"
}

In [10]:
queries = {
    'mapping': mapping_queries,
    'relations': relation_queries,
    'stats': stat_queries
}

In [11]:
split_paths = {
  'train': os.path.join("split", "train.csv"),
  'valid':os.path.join("split", "valid.csv"),
  'test':os.path.join("split", "test.csv")
}

In [12]:
artgraph = ArtGraphWithSplit(root='artgraph2recsys', conf=conf, queries=queries, split_paths=None)
artgraph.build()
artgraph.write()

Getting mapping...
Getting split
{'name': 'train', '_instances': None, '_attributes': {}}


ValueError: No axis named 1 for object type Series

In [None]:
pd.read_csv('artgraph2recsys/raw/relations/artwork___elicits___emotion/edge.csv', index_col=None, header=None)

In [None]:
pd.read_csv('artgraph2recsys/raw/relations/user___rates___artwork/attributes.csv', index_col=None, header=None)