## NOTEBOOK TO EXTRACT ROW FILES OF ARTGRAPH QUERYING THE DATABASE

In [1]:
from neo4j import GraphDatabase
import pandas as pd
import numpy as np

### CONNECTION TO DATABASE

In [2]:
driver = GraphDatabase.driver(uri = "bolt://localhost:7687", auth = ('neo4j','neo4j'))#place here credentials

### LOADING ARTEMIS DATASET

In [2]:
artemis = pd.read_csv('artemis_emotion_all.csv')
artemis

Unnamed: 0,painting,emotion,time
0,a.y.-jackson_algoma-in-november-1935.jpg,awe,1
1,a.y.-jackson_algoma-in-november-1935.jpg,fear,4
2,a.y.-jackson_barns-1926.jpg,amusement,1
3,a.y.-jackson_barns-1926.jpg,awe,2
4,a.y.-jackson_barns-1926.jpg,contentment,2
...,...,...,...
269286,zinaida-serebriakova_young-moroccan-1932.jpg,amusement,1
269287,zinaida-serebriakova_young-moroccan-1932.jpg,awe,1
269288,zinaida-serebriakova_young-moroccan-1932.jpg,contentment,1
269289,zinaida-serebriakova_young-moroccan-1932.jpg,excitement,1


### GETTING ALL ARTWORKS

In [4]:
with driver.session(database='test') as session:
    ans = session.run(f"""MATCH (a:Artwork) RETURN a.name""")
    result = [record['a.name'] for record in ans]
    print(result[:10])

['giuseppe-arcimboldo_portrait-of-eve-1578.jpg', 'giuseppe-arcimboldo_portrait-of-adam-1578.jpg', 'giuseppe-arcimboldo_the-vegetable-bowl.jpg', 'giuseppe-arcimboldo_vertumnus-emperor-rudolph-ii.jpg', 'giuseppe-arcimboldo_self-portrait.jpg', 'giuseppe-arcimboldo_the-dinner.jpg', 'frida-kahlo_a-few-small-nips-passionately-in-love-1935.jpg', 'frida-kahlo_coconuts-1951.jpg', 'frida-kahlo_congress-of-peoples-for-peace-1952.jpg', 'frida-kahlo_diego-and-i-1949.jpg']


### INTERSECTION BETWEEN ARTGRAPH AND ARTEMIS

In [5]:
artemis_artworks = artemis.painting.unique().tolist()
common = set(result).intersection(set(artemis_artworks))
print(len(common))

65236


In [6]:
artgraph_artemis = artemis[artemis.painting.isin(common)]
artgraph_artemis

Unnamed: 0,painting,emotion,time
188,abdullah-suriosubroto_air-terjun.jpg,awe,3
189,abdullah-suriosubroto_air-terjun.jpg,contentment,2
190,abdullah-suriosubroto_bamboo-forest.jpg,awe,2
191,abdullah-suriosubroto_bamboo-forest.jpg,contentment,1
192,abdullah-suriosubroto_bamboo-forest.jpg,excitement,2
...,...,...,...
269286,zinaida-serebriakova_young-moroccan-1932.jpg,amusement,1
269287,zinaida-serebriakova_young-moroccan-1932.jpg,awe,1
269288,zinaida-serebriakova_young-moroccan-1932.jpg,contentment,1
269289,zinaida-serebriakova_young-moroccan-1932.jpg,excitement,1


### CREATING EMOTION EDGES

In [7]:
creation_edges = artgraph_artemis.apply(lambda x: """MATCH (a:Artwork{name:\"""" + x[0] + """\"})
MERGE (e:Emotion{name:\"""" + x[1] + """\"})
CREATE (a)-[r: elicit{arousal: """ + str(x[2]) + "}]->(e)", axis = 1)
creation_edges

188       MATCH (a:Artwork{name:"abdullah-suriosubroto_a...
189       MATCH (a:Artwork{name:"abdullah-suriosubroto_a...
190       MATCH (a:Artwork{name:"abdullah-suriosubroto_b...
191       MATCH (a:Artwork{name:"abdullah-suriosubroto_b...
192       MATCH (a:Artwork{name:"abdullah-suriosubroto_b...
                                ...                        
269286    MATCH (a:Artwork{name:"zinaida-serebriakova_yo...
269287    MATCH (a:Artwork{name:"zinaida-serebriakova_yo...
269288    MATCH (a:Artwork{name:"zinaida-serebriakova_yo...
269289    MATCH (a:Artwork{name:"zinaida-serebriakova_yo...
269290    MATCH (a:Artwork{name:"zinaida-serebriakova_yo...
Length: 217131, dtype: object

### SAVING TO FILE

In [1]:
save = False

In [9]:
if save:
    with open('emotion.cypher','w') as file:
        creation_edges.map(lambda x: print(x, file = file, end = '\n\n'))

### SAVING TO DB (requires 6h)

In [12]:
if save:
    with driver.session(database='test') as session:
        creation_edges.map(lambda x: session.run(x))

### EXTRACTING ROW FILE

In [3]:
with driver.session(database='test') as session:#place here the database name (not project name)
    node_types = session.run("MATCH(n) RETURN  DISTINCT labels(n)[0] as typen")#getting all node types
    node_types = [record['typen'] for record in node_types]#extracting data into a list
mapping_queries = {node.lower(): f"MATCH (n:{node}) RETURN n.name as name" for node in node_types}#generating queries for ndoe types
mapping_queries['rel'] = "MATCH (n)-[r]-(n2) RETURN DISTINCT toLower(type(r)) as rel_label"#generating queries for edge types
mapping_queries

{'genre': 'MATCH (n:Genre) RETURN n.name as name',
 'style': 'MATCH (n:Style) RETURN n.name as name',
 'artist': 'MATCH (n:Artist) RETURN n.name as name',
 'media': 'MATCH (n:Media) RETURN n.name as name',
 'tag': 'MATCH (n:Tag) RETURN n.name as name',
 'artwork': 'MATCH (n:Artwork) RETURN n.name as name',
 'movement': 'MATCH (n:Movement) RETURN n.name as name',
 'training': 'MATCH (n:Training) RETURN n.name as name',
 'serie': 'MATCH (n:Serie) RETURN n.name as name',
 'subject': 'MATCH (n:Subject) RETURN n.name as name',
 'field': 'MATCH (n:Field) RETURN n.name as name',
 'people': 'MATCH (n:People) RETURN n.name as name',
 'period': 'MATCH (n:Period) RETURN n.name as name',
 'gallery': 'MATCH (n:Gallery) RETURN n.name as name',
 'city': 'MATCH (n:City) RETURN n.name as name',
 'country': 'MATCH (n:Country) RETURN n.name as name',
 'emotion': 'MATCH (n:Emotion) RETURN n.name as name',
 'rel': 'MATCH (n)-[r]-(n2) RETURN DISTINCT toLower(type(r)) as rel_label'}

In [4]:
node_id_mapping_queries = {node.lower(): f"MATCH (n:{node}) RETURN n.name, ID(n) as nodeId" for node in node_types}
node_id_mapping_queries

{'genre': 'MATCH (n:Genre) RETURN n.name, ID(n) as nodeId',
 'style': 'MATCH (n:Style) RETURN n.name, ID(n) as nodeId',
 'artist': 'MATCH (n:Artist) RETURN n.name, ID(n) as nodeId',
 'media': 'MATCH (n:Media) RETURN n.name, ID(n) as nodeId',
 'tag': 'MATCH (n:Tag) RETURN n.name, ID(n) as nodeId',
 'artwork': 'MATCH (n:Artwork) RETURN n.name, ID(n) as nodeId',
 'movement': 'MATCH (n:Movement) RETURN n.name, ID(n) as nodeId',
 'training': 'MATCH (n:Training) RETURN n.name, ID(n) as nodeId',
 'serie': 'MATCH (n:Serie) RETURN n.name, ID(n) as nodeId',
 'subject': 'MATCH (n:Subject) RETURN n.name, ID(n) as nodeId',
 'field': 'MATCH (n:Field) RETURN n.name, ID(n) as nodeId',
 'people': 'MATCH (n:People) RETURN n.name, ID(n) as nodeId',
 'period': 'MATCH (n:Period) RETURN n.name, ID(n) as nodeId',
 'gallery': 'MATCH (n:Gallery) RETURN n.name, ID(n) as nodeId',
 'city': 'MATCH (n:City) RETURN n.name, ID(n) as nodeId',
 'country': 'MATCH (n:Country) RETURN n.name, ID(n) as nodeId',
 'emotion': 

In [5]:
with driver.session(database='test') as session:
    triplets = session.run("MATCH p=(a)-[r]->(b) RETURN DISTINCT labels(a)[0] as source, type(r) as relation,  labels(b)[0] as destination")
    triplets = [(t['source'], t['relation'], t['destination']) for t in triplets]
relation_queries = {str(tuple(map(lambda x:x.lower(),t))): f"MATCH (a:{t[0]})-[r:{t[1]}]->(b:{t[2]}) RETURN a.name as source_name, b.name as dest_name"
                    for t in triplets}
relation_queries

{"('artist', 'belongstomovement', 'movement')": 'MATCH (a:Artist)-[r:belongsToMovement]->(b:Movement) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'hassubject', 'subject')": 'MATCH (a:Artist)-[r:hasSubject]->(b:Subject) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'relatedtoschool', 'training')": 'MATCH (a:Artist)-[r:relatedToSchool]->(b:Training) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'belongstofield', 'field')": 'MATCH (a:Artist)-[r:belongsToField]->(b:Field) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'trainedby', 'artist')": 'MATCH (a:Artist)-[r:trainedBy]->(b:Artist) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'haspatron', 'people')": 'MATCH (a:Artist)-[r:hasPatron]->(b:People) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'madeof', 'media')": 'MATCH (a:Artwork)-[r:madeOf]->(b:Media) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'about

#change emotion triplet query in order to have only the best emotion
#we need a change
relation_queries["('artwork', 'elicits', 'emotion')"] = "MATCH (a:Artwork)-[r1:elicit]->(e1:Emotion) with a, max(r1.arousal) as max_arousal match(a)-[r2:elicit]->(e2:Emotion) where r2.arousal = max_arousal RETURN a.name as source_name, collect(e2.name)[0] as dest_name"

In [None]:
#here is the change
relation_queries["('artwork', 'elicits', 'emotion')"] = """
match(a:Artwork)-[r]-(e:Emotion)
with a, sum(r.arousal) as sum_arousal, e
with a, max(sum_arousal) as max_arousal
match(a)-[r2]-(e2:Emotion)
with a, sum(r2.arousal) as sum2, e2, max_arousal
where sum2 = max_arousal
return a.name as source_name, collect(e2.name)[0] as dest_name
"""

In [9]:
relation_queries

{"('artist', 'belongstomovement', 'movement')": 'MATCH (a:Artist)-[r:belongsToMovement]->(b:Movement) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'hassubject', 'subject')": 'MATCH (a:Artist)-[r:hasSubject]->(b:Subject) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'relatedtoschool', 'training')": 'MATCH (a:Artist)-[r:relatedToSchool]->(b:Training) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'belongstofield', 'field')": 'MATCH (a:Artist)-[r:belongsToField]->(b:Field) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'trainedby', 'artist')": 'MATCH (a:Artist)-[r:trainedBy]->(b:Artist) RETURN a.name as source_name, b.name as dest_name',
 "('artist', 'haspatron', 'people')": 'MATCH (a:Artist)-[r:hasPatron]->(b:People) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'madeof', 'media')": 'MATCH (a:Artwork)-[r:madeOf]->(b:Media) RETURN a.name as source_name, b.name as dest_name',
 "('artwork', 'about

In [10]:
#useless
stat_queries = {
"node_labels_stats" : "MATCH (n) RETURN distinct labels(n) as node_label, count(*) as count",
"rel_labels_stats" : "MATCH (n)-[r]->(n2) RETURN distinct type(r) as rel_label, count(*) as count",
"triplet-type-list" : "MATCH (x)-[r]->(y) RETURN distinct HEAD(labels(x)) as head, type(r), head(labels(y)) as tail"
}

In [11]:
from raw_generation import ArtGraphWithSplit
conf = {
    "uri": "bolt://localhost:7687",
    "username": "neo4j",
    "password": 'neo4j',
    "database": "test"
}

In [12]:
queries = {
    'mapping': mapping_queries,
    'relations': relation_queries,
    'stats': stat_queries
}

labels = []

In [13]:
import os
split_paths = {
  'train': os.path.join("split", "train.csv"),
  'valid':os.path.join("split", "valid.csv"),
  'test':os.path.join("split", "test.csv")
}

In [14]:
artgraph = ArtGraphWithSplit(root='artgraph2bestemotions', conf=conf, queries=queries, labels=labels, split_paths=split_paths)
artgraph.build()
artgraph.write()

Getting mapping...
Getting split...
Getting relations...
             source_name                   dest_name
0             john-crome  Norwich School of painters
1        christo-coetzee        Neo-Baroque painting
2               otto-dix             New Objectivity
3        christian-schad             New Objectivity
4         adolf-dietrich             New Objectivity
...                  ...                         ...
1051    viktor-vasnetsov            History painting
1052  elizabeth-thompson            History painting
1053         jan-matejko            History painting
1054          paul-brach    Minimalism (visual arts)
1055        frank-stella    Minimalism (visual arts)

[1056 rows x 2 columns]
                     source_name  \
0               jeremy-henderson   
1               jeremy-henderson   
2               jeremy-henderson   
3      christian-ludwig-attersee   
4      christian-ludwig-attersee   
...                          ...   
21049           alexander-ivan

                                              source_name         dest_name
0                        antonio-lopez_dead-girl-1957.jpg     antonio-lopez
1                       antonio-lopez_artists-working.jpg     antonio-lopez
2             antonio-lopez_sinforoso-and-josefa-1955.jpg     antonio-lopez
3                anatoli-papian_portrait-of-anna-1983.jpg    anatoli-papian
4           hans-richter_portrait-of-dora-rukser-1927.jpg      hans-richter
...                                                   ...               ...
116470   alexander-ivanov_portrait-of-vittoria-marini.jpg  alexander-ivanov
116471                   alexander-ivanov_olive-trees.jpg  alexander-ivanov
116472             alexander-ivanov_annunciation-1824.jpg  alexander-ivanov
116473  alexander-ivanov_on-the-shore-of-the-bay-of-na...  alexander-ivanov
116474  alexander-ivanov_joseph-interpreting-dreams-to...  alexander-ivanov

[116475 rows x 2 columns]
                                            source_name  \
0 

INFO:root:Saving on the disk all nodes
INFO:root:Saving on the disk node mapping genre in path artgraph2bestemotions\mapping\genre_entidx2name.csv
INFO:root:Saving on the disk node mapping style in path artgraph2bestemotions\mapping\style_entidx2name.csv
INFO:root:Saving on the disk node mapping artist in path artgraph2bestemotions\mapping\artist_entidx2name.csv
INFO:root:Saving on the disk node mapping media in path artgraph2bestemotions\mapping\media_entidx2name.csv
INFO:root:Saving on the disk node mapping tag in path artgraph2bestemotions\mapping\tag_entidx2name.csv
INFO:root:Saving on the disk node mapping artwork in path artgraph2bestemotions\mapping\artwork_entidx2name.csv


                                      source_name      dest_name
0                          George Krevsky Gallery  San Francisco
1                                 Legion of Honor  San Francisco
2              Fine Arts Museums of San Francisco  San Francisco
3     San Francisco Museum of Modern Art (SFMOMA)  San Francisco
4                          Parma National Gallery          Parma
...                                           ...            ...
1104                           San Pietro Martire         Murano
1105        Novosibirsk State Museum of Fine Arts    Novosibirsk
1106           National Gallery of Victoria (NGV)      Melbourne
1107                    Tver Regional Art Gallery           Tver
1108                        Château de Versailles     Versailles

[1109 rows x 2 columns]
         source_name    dest_name
0             Skagen      Denmark
1          Humlebaek      Denmark
2         Copenhagen      Denmark
3            Randers      Denmark
4          Blackburn     

INFO:root:Saving on the disk node mapping movement in path artgraph2bestemotions\mapping\movement_entidx2name.csv
INFO:root:Saving on the disk node mapping training in path artgraph2bestemotions\mapping\training_entidx2name.csv
INFO:root:Saving on the disk node mapping serie in path artgraph2bestemotions\mapping\serie_entidx2name.csv
INFO:root:Saving on the disk node mapping subject in path artgraph2bestemotions\mapping\subject_entidx2name.csv
INFO:root:Saving on the disk node mapping field in path artgraph2bestemotions\mapping\field_entidx2name.csv
INFO:root:Saving on the disk node mapping people in path artgraph2bestemotions\mapping\people_entidx2name.csv
INFO:root:Saving on the disk node mapping period in path artgraph2bestemotions\mapping\period_entidx2name.csv
INFO:root:Saving on the disk node mapping gallery in path artgraph2bestemotions\mapping\gallery_entidx2name.csv
INFO:root:Saving on the disk node mapping city in path artgraph2bestemotions\mapping\city_entidx2name.csv
INFO:r

Writing labels...
Writing info...
Writing split...


INFO:root:Saving on the disk node mapping valid in path artgraph2bestemotions\raw\split\artwork\valid.csv
