In [1]:
import pandas as pd
import numpy as np
from neo4j import GraphDatabase
from raw_generation_nosplit import ArtGraphNoSplit
from raw_generation import ArtGraphWithSplit
import os

In [2]:
conf = {
    "uri": "bolt://localhost:7687",
    "username": "neo4j",
    "password": 'neo4j',
    "database": "neo4j"
}

In [3]:
driver = GraphDatabase.driver(uri = "bolt://localhost:7687", auth = (conf['username'], conf['password']))#place here credentials

In [4]:
with driver.session(database=conf['database']) as session:#place here the database name (not project name)
    node_types = session.run("MATCH(n) RETURN  DISTINCT labels(n)[0] as typen")#getting all node types
    node_types = [record['typen'] for record in node_types]#extracting data into a list
mapping_queries = {node.lower(): f"MATCH (n:{node}) RETURN n.name as name" for node in node_types}#generating queries for node types
mapping_queries['rel'] = "MATCH (n)-[r]-(n2) RETURN DISTINCT toLower(type(r)) as rel_label"#generating queries for edge types
mapping_queries

{'genre': 'MATCH (n:Genre) RETURN n.name as name',
 'style': 'MATCH (n:Style) RETURN n.name as name',
 'artist': 'MATCH (n:Artist) RETURN n.name as name',
 'media': 'MATCH (n:Media) RETURN n.name as name',
 'tag': 'MATCH (n:Tag) RETURN n.name as name',
 'artwork': 'MATCH (n:Artwork) RETURN n.name as name',
 'movement': 'MATCH (n:Movement) RETURN n.name as name',
 'training': 'MATCH (n:Training) RETURN n.name as name',
 'serie': 'MATCH (n:Serie) RETURN n.name as name',
 'subject': 'MATCH (n:Subject) RETURN n.name as name',
 'field': 'MATCH (n:Field) RETURN n.name as name',
 'people': 'MATCH (n:People) RETURN n.name as name',
 'period': 'MATCH (n:Period) RETURN n.name as name',
 'gallery': 'MATCH (n:Gallery) RETURN n.name as name',
 'city': 'MATCH (n:City) RETURN n.name as name',
 'country': 'MATCH (n:Country) RETURN n.name as name',
 'emotion': 'MATCH (n:Emotion) RETURN n.name as name',
 'user': 'MATCH (n:User) RETURN n.name as name',
 'rel': 'MATCH (n)-[r]-(n2) RETURN DISTINCT toLower(

In [5]:
node_id_mapping_queries = {node.lower(): f"MATCH (n:{node}) RETURN n.name, ID(n) as nodeId" for node in node_types}
node_id_mapping_queries

{'genre': 'MATCH (n:Genre) RETURN n.name, ID(n) as nodeId',
 'style': 'MATCH (n:Style) RETURN n.name, ID(n) as nodeId',
 'artist': 'MATCH (n:Artist) RETURN n.name, ID(n) as nodeId',
 'media': 'MATCH (n:Media) RETURN n.name, ID(n) as nodeId',
 'tag': 'MATCH (n:Tag) RETURN n.name, ID(n) as nodeId',
 'artwork': 'MATCH (n:Artwork) RETURN n.name, ID(n) as nodeId',
 'movement': 'MATCH (n:Movement) RETURN n.name, ID(n) as nodeId',
 'training': 'MATCH (n:Training) RETURN n.name, ID(n) as nodeId',
 'serie': 'MATCH (n:Serie) RETURN n.name, ID(n) as nodeId',
 'subject': 'MATCH (n:Subject) RETURN n.name, ID(n) as nodeId',
 'field': 'MATCH (n:Field) RETURN n.name, ID(n) as nodeId',
 'people': 'MATCH (n:People) RETURN n.name, ID(n) as nodeId',
 'period': 'MATCH (n:Period) RETURN n.name, ID(n) as nodeId',
 'gallery': 'MATCH (n:Gallery) RETURN n.name, ID(n) as nodeId',
 'city': 'MATCH (n:City) RETURN n.name, ID(n) as nodeId',
 'country': 'MATCH (n:Country) RETURN n.name, ID(n) as nodeId',
 'emotion': 

In [6]:
with driver.session(database=conf['database']) as session:
    triplets = session.run("MATCH p=(a)-[r]->(b) RETURN DISTINCT labels(a)[0] as source, type(r) as relation,  labels(b)[0] as destination")
    triplets = [(t['source'], t['relation'], t['destination']) for t in triplets]
relation_queries = {str(tuple(map(lambda x:x.lower(),t))): f"MATCH (a:{t[0]})-[r:{t[1]}]->(b:{t[2]}) RETURN a.name as source_name, b.name as dest_name"
                    for t in triplets}
relation_queries

{"('artist', 'belongstomovement', 'movement')": 'MATCH (a:Artist)-[r:belongsToMovement]->(b:Movement) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'hassubject', 'subject')": 'MATCH (a:Artist)-[r:hasSubject]->(b:Subject) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'relatedtoschool', 'training')": 'MATCH (a:Artist)-[r:relatedToSchool]->(b:Training) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'belongstofield', 'field')": 'MATCH (a:Artist)-[r:belongsToField]->(b:Field) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'trainedby', 'artist')": 'MATCH (a:Artist)-[r:trainedBy]->(b:Artist) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'haspatron', 'people')": 'MATCH (a:Artist)-[r:hasPatron]->(b:People) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'madeof', 'media')": 'MATCH (a:Artwork)-[r:madeOf]->(b:Media) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'about

In [7]:
#here is the change
relation_queries["('artwork', 'elicits', 'emotion')"] = """
match(a:Artwork)-[r]-(e:Emotion)
with a, sum(r.arousal) as sum_arousal, e
with a, max(sum_arousal) as max_arousal
match(a)-[r2]-(e2:Emotion)
with a, sum(r2.arousal) as sum2, e2, max_arousal
where sum2 = max_arousal
return a.name as source_name, collect(e2.name)[0] as dest_name
"""
relation_queries

{"('artist', 'belongstomovement', 'movement')": 'MATCH (a:Artist)-[r:belongsToMovement]->(b:Movement) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'hassubject', 'subject')": 'MATCH (a:Artist)-[r:hasSubject]->(b:Subject) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'relatedtoschool', 'training')": 'MATCH (a:Artist)-[r:relatedToSchool]->(b:Training) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'belongstofield', 'field')": 'MATCH (a:Artist)-[r:belongsToField]->(b:Field) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'trainedby', 'artist')": 'MATCH (a:Artist)-[r:trainedBy]->(b:Artist) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'haspatron', 'people')": 'MATCH (a:Artist)-[r:hasPatron]->(b:People) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'madeof', 'media')": 'MATCH (a:Artwork)-[r:madeOf]->(b:Media) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'about

In [8]:
# here is the second change, in order to query also the weights

relation_queries["('user', 'rates', 'artwork')"] = 'MATCH (a:User)-[r:rates]->(b:Artwork) RETURN a.name as source_name, b.name as dest_name, r.rating as weight'

In [9]:
stat_queries = {
"node_labels_stats" : "MATCH (n) RETURN distinct labels(n) as node_label, count(*) as count",
"rel_labels_stats" : "MATCH (n)-[r]->(n2) RETURN distinct type(r) as rel_label, count(*) as count",
"triplet-type-list" : "MATCH (x)-[r]->(y) RETURN distinct HEAD(labels(x)) as head, type(r), head(labels(y)) as tail"
}

In [10]:
queries = {
    'mapping': mapping_queries,
    'relations': relation_queries,
    'stats': stat_queries
}

In [11]:
split_paths = {
  'train': os.path.join("split", "train.csv"),
  'valid':os.path.join("split", "valid.csv"),
  'test':os.path.join("split", "test.csv")
}

In [12]:
artgraph = ArtGraphWithSplit(root='artgraph2recsys', conf=conf, queries=queries, split_paths=split_paths)
artgraph.build()
artgraph.write()

Getting mapping...
Getting split...
Getting relations...


INFO:root:Saving on the disk all nodes
INFO:root:Saving on the disk node mapping genre in path artgraph2recsys\mapping\genre_entidx2name.csv
INFO:root:Saving on the disk node mapping style in path artgraph2recsys\mapping\style_entidx2name.csv
INFO:root:Saving on the disk node mapping artist in path artgraph2recsys\mapping\artist_entidx2name.csv
INFO:root:Saving on the disk node mapping media in path artgraph2recsys\mapping\media_entidx2name.csv


Getting labels
Writing mapping...


INFO:root:Saving on the disk node mapping tag in path artgraph2recsys\mapping\tag_entidx2name.csv
INFO:root:Saving on the disk node mapping artwork in path artgraph2recsys\mapping\artwork_entidx2name.csv
INFO:root:Saving on the disk node mapping movement in path artgraph2recsys\mapping\movement_entidx2name.csv
INFO:root:Saving on the disk node mapping training in path artgraph2recsys\mapping\training_entidx2name.csv
INFO:root:Saving on the disk node mapping serie in path artgraph2recsys\mapping\serie_entidx2name.csv
INFO:root:Saving on the disk node mapping subject in path artgraph2recsys\mapping\subject_entidx2name.csv
INFO:root:Saving on the disk node mapping field in path artgraph2recsys\mapping\field_entidx2name.csv
INFO:root:Saving on the disk node mapping people in path artgraph2recsys\mapping\people_entidx2name.csv
INFO:root:Saving on the disk node mapping period in path artgraph2recsys\mapping\period_entidx2name.csv
INFO:root:Saving on the disk node mapping gallery in path artg

Writing labels...
Writing info...
Writing split...


In [14]:
pd.read_csv('artgraph2recsys/raw/relations/artwork___elicits___emotion/edge.csv', index_col=None, header=None)

Unnamed: 0,0,1
0,41688,0
1,3455,2
2,34851,2
3,31840,1
4,9086,1
...,...,...
65231,5333,8
65232,58425,8
65233,64208,8
65234,45083,8


In [15]:
pd.read_csv('artgraph2recsys/raw/relations/user___rates___artwork/attributes.csv', index_col=None, header=None)

Unnamed: 0,0
0,1
1,1
2,1
3,1
4,1
5,1
