In [1]:
import json
import os
import copy
import pandas as pd
from raw_generation_nosplit import ArtGraphNoSplit
from ArtGraphOGM import  ArtGraphDBConnector
from sklearn.model_selection import train_test_split
from dotenv import load_dotenv, find_dotenv

In [2]:
#get db credential from the .env file
dotenv_path = find_dotenv()
load_dotenv(dotenv_path)

True

In [3]:
conf = {
    'uri': os.environ.get("uri"),
    'username': os.environ.get("username"),
    'password': os.environ.get("password"),
    'database': os.environ.get("database")
}

artgraph_flavor = 'artgraphf2_full'

with open(f"queries/{artgraph_flavor}/mapping-queries.json") as f:
  mapping_queries = json.load(f)

with open(f"queries/{artgraph_flavor}/relation-queries.json") as f:
  relation_queries = json.load(f)

with open(f"queries/{artgraph_flavor}/stat-queries.json") as f:
  stat_queries = json.load(f)

queries = {
    'mapping': mapping_queries,
    'relations': relation_queries,
    'stats': stat_queries
}

labels = [('artwork', 'author', 'artist'),
          ('artwork', 'style', 'style'),
          ('artwork', 'genre', 'genre')]

In [4]:
agdb = ArtGraphDBConnector(conf['uri'], conf['username'], conf['password'], conf['database'])
df_genre = agdb.df_from_query(mapping_queries['genre'])

In [5]:
#get max N artwork for each genre
N = 10000000
artworks = []
genres = []
for genre in df_genre['name']:
    query = f"MATCH (a:Artwork)-[r:Genre]->(g:Genre) WHERE g.name = '{genre}' RETURN a.name as artwork, g.name as genre LIMIT {N}"
    df = agdb.df_from_query(query)
    artworks = artworks + df['artwork'].tolist()
    genres = genres + df['genre'].tolist()
agdb.close()

In [6]:
df_artwork_genre = pd.DataFrame.from_dict({'artwork': artworks, 'genre': genres})

In [7]:
df_artwork_genre

Unnamed: 0,artwork,genre
0,eugene-delacroix_female-nude-killed-from-behin...,nude painting (nu)
1,helene-de-beauvoir_untitled-8.jpg,nude painting (nu)
2,helene-de-beauvoir_untitled-4.jpg,nude painting (nu)
3,helene-de-beauvoir_untitled-3.jpg,nude painting (nu)
4,helene-de-beauvoir_untitled-2.jpg,nude painting (nu)
...,...,...
116470,paul-gauguin_port-de-javel-1876.jpg,cityscape
116471,paul-gauguin_port-de-javel-1876-1.jpg,cityscape
116472,paul-gauguin_rouen-at-spring-1884.jpg,cityscape
116473,paul-gauguin_rue-jouvenet-rouen-1884.jpg,cityscape


In [8]:
def get_split(df_artwork_genre, label_column_name):
    train, drop = train_test_split(df_artwork_genre, test_size=0.3, random_state=42, stratify=df_artwork_genre[label_column_name])
    valid, test = train_test_split(drop, test_size=0.5, random_state=42, stratify=drop[label_column_name])
    return train['artwork'].tolist(), valid['artwork'].tolist(), test['artwork'].tolist()

In [9]:
train_artworks, val_artworks, test_artworks = get_split(df_artwork_genre, 'genre')

In [10]:
artgraph = ArtGraphNoSplit(root='artgraphf2_train', conf=conf, queries=queries, labels=labels, artwork_subset=train_artworks)
artgraph.build()
artgraph.write()
artgraph = ArtGraphNoSplit(root='artgraphf2_valid', conf=conf, queries=queries, labels=labels, artwork_subset=val_artworks)
artgraph.build()
artgraph.write()
artgraph = ArtGraphNoSplit(root='artgraphf2_test', conf=conf, queries=queries, labels=labels, artwork_subset=test_artworks)
artgraph.build()
artgraph.write()


Getting mapping...
Getting relations...


INFO:root:Saving on the disk all nodes
INFO:root:Saving on the disk node mapping artwork in path artgraphf2_train/mapping/artwork_entidx2name.csv
INFO:root:Saving on the disk node mapping artist in path artgraphf2_train/mapping/artist_entidx2name.csv
INFO:root:Saving on the disk node mapping style in path artgraphf2_train/mapping/style_entidx2name.csv
INFO:root:Saving on the disk node mapping genre in path artgraphf2_train/mapping/genre_entidx2name.csv
INFO:root:Saving on the disk node mapping tag in path artgraphf2_train/mapping/tag_entidx2name.csv
INFO:root:Saving on the disk node mapping media in path artgraphf2_train/mapping/media_entidx2name.csv
INFO:root:Saving on the disk node mapping movement in path artgraphf2_train/mapping/movement_entidx2name.csv
INFO:root:Saving on the disk node mapping field in path artgraphf2_train/mapping/field_entidx2name.csv
INFO:root:Saving on the disk node mapping gallery in path artgraphf2_train/mapping/gallery_entidx2name.csv
INFO:root:Saving on th

Getting labels
Writing mapping...


INFO:root:Saving on the disk node mapping subject in path artgraphf2_train/mapping/subject_entidx2name.csv
INFO:root:Saving on the disk node mapping training in path artgraphf2_train/mapping/training_entidx2name.csv
INFO:root:Saving on the disk node mapping people in path artgraphf2_train/mapping/people_entidx2name.csv
INFO:root:Saving on the disk node mapping serie in path artgraphf2_train/mapping/serie_entidx2name.csv
INFO:root:Saving on the disk node mapping country in path artgraphf2_train/mapping/country_entidx2name.csv
INFO:root:Saving on the disk node mapping period in path artgraphf2_train/mapping/period_entidx2name.csv
INFO:root:Saving on the disk node mapping rel in path artgraphf2_train/mapping/rel_entidx2name.csv
INFO:root:Saving on the disk all relations
INFO:root:Saving on the disk relation media in path artgraphf2_train/raw/relations/artwork___media___media
INFO:root:Saving on the disk relation about in path artgraphf2_train/raw/relations/artwork___about___tag
INFO:root:

Writing labels...
Writing info...
Getting mapping...
Getting relations...


INFO:root:Saving on the disk all nodes
INFO:root:Saving on the disk node mapping artwork in path artgraphf2_valid/mapping/artwork_entidx2name.csv
INFO:root:Saving on the disk node mapping artist in path artgraphf2_valid/mapping/artist_entidx2name.csv
INFO:root:Saving on the disk node mapping style in path artgraphf2_valid/mapping/style_entidx2name.csv
INFO:root:Saving on the disk node mapping genre in path artgraphf2_valid/mapping/genre_entidx2name.csv
INFO:root:Saving on the disk node mapping tag in path artgraphf2_valid/mapping/tag_entidx2name.csv
INFO:root:Saving on the disk node mapping media in path artgraphf2_valid/mapping/media_entidx2name.csv
INFO:root:Saving on the disk node mapping movement in path artgraphf2_valid/mapping/movement_entidx2name.csv
INFO:root:Saving on the disk node mapping field in path artgraphf2_valid/mapping/field_entidx2name.csv
INFO:root:Saving on the disk node mapping gallery in path artgraphf2_valid/mapping/gallery_entidx2name.csv
INFO:root:Saving on th

Getting labels
Writing mapping...


INFO:root:Saving on the disk relation genre in path artgraphf2_valid/raw/relations/artwork___genre___genre
INFO:root:Saving on the disk relation style in path artgraphf2_valid/raw/relations/artwork___style___style
INFO:root:Saving on the disk relation author in path artgraphf2_valid/raw/relations/artwork___author___artist
INFO:root:Saving on the disk relation movement in path artgraphf2_valid/raw/relations/artist___movement___movement
INFO:root:Saving on the disk relation field in path artgraphf2_valid/raw/relations/artist___field___field
INFO:root:Saving on the disk relation teacher in path artgraphf2_valid/raw/relations/artist___teacher___artist
INFO:root:Saving on the disk relation subject in path artgraphf2_valid/raw/relations/artist___subject___subject
INFO:root:Saving on the disk relation training in path artgraphf2_valid/raw/relations/artist___training___training
INFO:root:Saving on the disk relation patrons in path artgraphf2_valid/raw/relations/artist___patrons___people
INFO:r

Writing labels...
Writing info...
Getting mapping...
Getting relations...


INFO:root:Saving on the disk all nodes
INFO:root:Saving on the disk node mapping artwork in path artgraphf2_test/mapping/artwork_entidx2name.csv
INFO:root:Saving on the disk node mapping artist in path artgraphf2_test/mapping/artist_entidx2name.csv
INFO:root:Saving on the disk node mapping style in path artgraphf2_test/mapping/style_entidx2name.csv
INFO:root:Saving on the disk node mapping genre in path artgraphf2_test/mapping/genre_entidx2name.csv
INFO:root:Saving on the disk node mapping tag in path artgraphf2_test/mapping/tag_entidx2name.csv
INFO:root:Saving on the disk node mapping media in path artgraphf2_test/mapping/media_entidx2name.csv
INFO:root:Saving on the disk node mapping movement in path artgraphf2_test/mapping/movement_entidx2name.csv
INFO:root:Saving on the disk node mapping field in path artgraphf2_test/mapping/field_entidx2name.csv
INFO:root:Saving on the disk node mapping gallery in path artgraphf2_test/mapping/gallery_entidx2name.csv
INFO:root:Saving on the disk no

Getting labels
Writing mapping...


INFO:root:Saving on the disk relation genre in path artgraphf2_test/raw/relations/artwork___genre___genre
INFO:root:Saving on the disk relation style in path artgraphf2_test/raw/relations/artwork___style___style
INFO:root:Saving on the disk relation author in path artgraphf2_test/raw/relations/artwork___author___artist
INFO:root:Saving on the disk relation movement in path artgraphf2_test/raw/relations/artist___movement___movement
INFO:root:Saving on the disk relation field in path artgraphf2_test/raw/relations/artist___field___field
INFO:root:Saving on the disk relation teacher in path artgraphf2_test/raw/relations/artist___teacher___artist
INFO:root:Saving on the disk relation subject in path artgraphf2_test/raw/relations/artist___subject___subject
INFO:root:Saving on the disk relation training in path artgraphf2_test/raw/relations/artist___training___training
INFO:root:Saving on the disk relation patrons in path artgraphf2_test/raw/relations/artist___patrons___people
INFO:root:Savin

Writing labels...
Writing info...
