In [1]:
from neo4j import GraphDatabase
import pandas as pd
from tqdm import tqdm

### SETTING DB DRIVER

In [2]:
driver = GraphDatabase.driver(uri="bolt://localhost:7687", auth=('neo4j', 'neo4j'))

In [3]:
dataset = pd.read_csv('captioning_stratified_dataset.csv')
dataset

Unnamed: 0,artwork_name,artwork,author,style,caption_0,caption_1,caption_2
0,felix-vallotton_jug-with-yellow-primroses-1915...,Jug with yellow daffodils,Felix Vallotton,magic realism,"""Jug with Yellow Daffodils"" is a painting crea...","""Jug with Yellow Daffodils"" is an oil painting...",Jug with Yellow Daffodils is a painting by Swi...
1,claude-monet_the-valley-of-falaise.jpg,The Valley Of Falaise,Claude Monet,impressionism,The Valley Of Falaise is a painting by the fam...,The Valley Of Falaise is an oil painting by Fr...,The Valley of Falaise is an oil on canvas land...
2,otto-dix_longing-self-portrait.jpg,"Longing, Self-Portrait",Otto Dix,expressionism,"Longing, Self-Portrait is a painting by German...","""Longing, Self-Portrait"" by Otto Dix is an oil...","Otto Dix’s painting Longing, Self-Portrait, is..."
3,koloman-moser_fabric-design-with-trout-dance-f...,Fabric design with trout dance for Backhausen,Koloman Moser,art nouveau (modern),Koloman Moser's Fabric Design with Trout Dance...,Koloman Moser's Fabric design with trout dance...,"This painting, titled “Fabric Design with Trou..."
4,maerten-van-heemskerck_calvary-1543.jpg,Calvary,Maerten van Heemskerck,mannerism (late renaissance),Calvary is a painting by the Dutch artist Maer...,Calvary is a painting made by Maerten van Heem...,"Maerten van Heemskerck's painting, Calvary, wa..."
...,...,...,...,...,...,...,...
995,frans-hals_portrait-of-a-young-man-1648.jpg,Portrait of a Young Man,Frans Hals,baroque,Portrait of a Young Man by Frans Hals is an oi...,Portrait of a Young Man is an oil painting on ...,"The painting ""Portrait of a Young Man"" is an o..."
996,paul-wunderlich_two-torsos-plakat.jpg,Two Torsos - Plakat,Paul Wunderlich,surrealism,Two Torsos - Plakat is a painting by German ar...,Two Torsos - Plakat is an abstract painting cr...,Two Torsos - Plakat is an abstract painting cr...
997,leo-schnug_presentation-of-a-young-woman-to-an...,Presentation of a young woman to an old bourgeois,Léo Schnug,art nouveau (modern),Presentation of a Young Woman to an Old Bourge...,Presentation of a Young Woman to an Old Bourge...,Presentation of a Young Woman to an Old Bourge...
998,roger-weik_red-pour-2016-2016.jpg,&quot;Red Pour&quot; 2016,Roger Weik,abstract expressionism,"""Red Pour"" is a painting by the artist Roger W...",Red Pour is a 2016 painting by the American ab...,"""Red Pour"" is a 2016 painting by American arti..."


In [4]:
def get_string_captions(raw, n_captions = 3):
    captions = [raw[f'caption_{i}'] for i in range(n_captions)]
    return captions

In [5]:
def write_captions_to_db(raw, driver, db, n_captions=3):
    with driver.session(database=db) as session:
        query = fr"""
        match (a:Artwork {{name : '{raw['artwork_name']}' }})
        set a += {{captions: {get_string_captions(raw, n_captions)} }}
        """
        session.run(query)

In [None]:
dataset.apply(lambda x: write_captions_to_db(x, driver, db='neo4j'), axis = 1)

### CHECK DATA

In [6]:
with driver.session(database = 'neo4j') as session:
    print(next(iter(session.run("match (a:Artwork) where a.captions is not null return count(distinct a)"))))

<Record count(distinct a)=1000>


In [7]:
with driver.session(database = 'neo4j') as session:
    print(next(iter(session.run("match (a:Artwork) where a.captions is not null return distinct size(a.captions)"))))

<Record size(a.captions)=3>


# MIGRATE DATA TO ANOTHER DB.
**N.B.** It is necessary to create the object DB in the same neo4j **DBMS**

### GET ARTWORKS INFORMATION

In [None]:
with driver.session(database = 'neo4j') as session:
    ans = list(iter(session.run("""match p = (a:Artwork)-[]->(n) where a.captions is not null
                                   return relationships(p) as rels, nodes(p) as nodes""")))
    rels = list(map(lambda x: x['rels'][0], ans))
print(rels[0])

In [9]:
def get_properties(n):
    return ', '.join([f'{x}: "{n[x]}"' if isinstance(n[x], str) else f"{x}: {n[x]}" for x in n.keys()])

In [10]:
def update_rel_to_db(rel, driver, db):
    a,b = rel.nodes
    query = f"""
    merge (a: {list(a.labels)[0]} {{ {get_properties(a._properties)} }})
    merge (b: {list(b.labels)[0]} {{ {get_properties(b._properties)} }})
    merge (a)-[:{rel.type} {{ {get_properties(rel._properties)} }}]->(b)
    """
    with driver.session(database = db) as session:
        session.run(query)
    return query

In [11]:
def update_graph(driver, db, rels):
    for rel in tqdm(rels):
        update_rel_to_db(rel, driver, db)

In [12]:
update_graph(driver, 'captioning', rels)

100%|██████████| 10487/10487 [01:21<00:00, 128.95it/s]


### GET ARTIST INFORMATION

In [13]:
with driver.session(database='neo4j') as session:
    artists_links = list(iter(session.run("""
    match (aw:Artwork)-->(a:Artist) where aw.captions is not null
    match p = (a)-->() return relationships(p) as rels, nodes(p) as nodes
    """)))
    artists_links = list(map(lambda x: x['rels'][0], artists_links))
artists_links[0]

<Relationship id=65352 nodes=(<Node id=55 labels=frozenset({'Artist'}) properties={'dbpedia_url': 'https://dbpedia.org/resource/Christian_Attersee', 'gender': 'male', 'wikipedia_url': 'https://en.wikipedia.org/wiki/Christian_Attersee', 'image_url': 'https://uploads5.wikiart.org/00225/images/christian-ludwig-attersee/scaled-200x250-ischinn-attersee-christian-1.jpg!Portrait.jpg', 'birth_date': 'August 28, 1940', 'name': 'christian-ludwig-attersee', 'printed_name': 'Christian Attersee'}>, <Node id=129498 labels=frozenset({'Subject'}) properties={'name': '21st-century male artists'}>) type='hasSubject' properties={}>

In [14]:
update_graph(driver, 'captioning', artists_links)

100%|██████████| 13478/13478 [02:00<00:00, 112.13it/s]


### GET GALLERY LINKS

In [15]:
with driver.session(database = 'neo4j') as session:
    gallery_links = list(iter(session.run("""
    match (a:Artwork)-->(g:Gallery) where a.captions is not null
    match p = (g)-->() return relationships(p) as rels, nodes(p) as nodes
    """)))
    gallery_links = list(map(lambda x: x['rels'][0], gallery_links))
gallery_links[0]

<Relationship id=177958 nodes=(<Node id=133292 labels=frozenset({'Gallery'}) properties={'name': 'Salvador Dali Museum'}>, <Node id=134490 labels=frozenset({'City'}) properties={'name': 'St. Petersburg'}>) type='inCity' properties={}>

In [16]:
update_graph(driver, 'captioning', gallery_links)

100%|██████████| 358/358 [00:01<00:00, 180.64it/s]


### GET CITY LINKS

In [17]:
with driver.session(database = 'neo4j') as session:
    city_links = list(iter(session.run("""
    match (a:Artwork)-->(c:City) where a.captions is not null
    match p = (c)-->() return relationships(p) as rels, nodes(p) as nodes
    """)))
    city_links = list(map(lambda x: x['rels'][0], city_links))
city_links[0]

<Relationship id=182818 nodes=(<Node id=134380 labels=frozenset({'City'}) properties={'name': 'Nizhny Novgorod'}>, <Node id=135026 labels=frozenset({'Country'}) properties={'name': 'Russia'}>) type='inCountry' properties={}>

In [18]:
update_graph(driver, 'captioning', city_links)

100%|██████████| 211/211 [00:00<00:00, 300.30it/s]
