developed by Chrisowalandis Deligio and Sebastian Gampe

In [1]:
import sys
sys.path.append('../')
import pandas as pd
import random
import os
import numpy as np
from cnt.generate_re_data import AutoRE

from cnt.annotate import (annotate, annotate_single_design, 
                          annotate_designs, 
                          extract_string_from_annotation, labeling_eng)
from cnt.extract_relation import (path, NERTransformer, FeatureExtractor)
from cnt.evaluate import Metrics
from cnt.vectorize import (Doc2Str, Path2Str, Verbs2Str, AveragedPath2Vec, 
                           AveragedRest2Vec)
from cnt.io import (replace_left_right)
from cnt.io import  Database_Connection
from cnt.preprocess import Preprocess

import warnings
warnings.filterwarnings('ignore')

In [2]:
auto = AutoRE()

In [3]:
subjects = ["Athena", "Ares", "Apollo", "Zeus", "Nike", "Olybrius", "Seleucus I", "Domitianus", "Equitas", "Rhoemetalces I"]

In [4]:
relations = {
    "holding":["sword", "shield", "paludamentum", "staff", "club", "stones", "jar", "bow"],
    "wearing":["belt", "crown", "helmet", "cuirass", "crown juwel", "ribbons", "boots"],
    "seated_on":["dolphin", "shield", "omphalos", "rock", "bull", "panther", "kline"],
    "resting_on":["serpent staff", "torch", "shield", "panther", "club", "water urn", "knee", "rock"],
    "feeding":["panther", "dolphin"],
    "receiving":["apple", "agonistic crown", "crown", "shield"],}

In [5]:
auto.set_subjects(subjects)

In [6]:
auto.set_relations(relations)

In [7]:
result = auto.generate(300)

In [8]:
result.head(5)

Unnamed: 0,design_en,y
0,Olybrius holding staff.,"(Olybrius, holding, staff)"
1,Apollo receiving agonistic crown.,"(Apollo, receiving, agonistic crown)"
2,Athena resting on torch.,"(Athena, resting_on, torch)"
3,Ares receiving crown.,"(Ares, receiving, crown)"
4,Seleucus I wearing crown.,"(Seleucus I, wearing, crown)"


In [9]:
X = result[["design_en"]]
X["design_de"] = ""
X["comment"] = "auto_generated"
X["design_en_changed"] = ""
X.head(2)

Unnamed: 0,design_en,design_de,comment,design_en_changed
0,Olybrius holding staff.,,auto_generated,
1,Apollo receiving agonistic crown.,,auto_generated,


In [10]:
X.shape

(300, 4)

In [11]:
dc =  Database_Connection("mysql+mysqlconnector://root:YourConnection") # Format user:password@IP/Database

In [12]:
X.to_sql("nlp_training_designs",dc.mysql_connection,if_exists="append", index=False)

300

In [13]:
def get_id(design):
    try:
        return (dc.create_own_query("select id from nlp_training_designs where design_en='"+design+"';").id)[0]
    except:
        return "Null"

In [14]:
X["db_id"] = X.apply(lambda row: get_id(row.design_en), axis=1)

In [15]:
X.head(5)

Unnamed: 0,design_en,design_de,comment,design_en_changed,db_id
0,Olybrius holding staff.,,auto_generated,,27819
1,Apollo receiving agonistic crown.,,auto_generated,,27551
2,Athena resting on torch.,,auto_generated,,27563
3,Ares receiving crown.,,auto_generated,,27691
4,Seleucus I wearing crown.,,auto_generated,,27522


In [16]:
X = X.loc[X.db_id!="Null"]
X.shape

(300, 5)

In [17]:
X["y"] = result["y"]

In [18]:
X.head(5)

Unnamed: 0,design_en,design_de,comment,design_en_changed,db_id,y
0,Olybrius holding staff.,,auto_generated,,27819,"(Olybrius, holding, staff)"
1,Apollo receiving agonistic crown.,,auto_generated,,27551,"(Apollo, receiving, agonistic crown)"
2,Athena resting on torch.,,auto_generated,,27563,"(Athena, resting_on, torch)"
3,Ares receiving crown.,,auto_generated,,27691,"(Ares, receiving, crown)"
4,Seleucus I wearing crown.,,auto_generated,,27522,"(Seleucus I, wearing, crown)"


## Add relations to db

In [19]:
entities = dc.create_own_query("select * from nlp_list_entities;")
entities[["id", "name_en", "alternativenames_en"]]

Unnamed: 0,id,name_en,alternativenames_en
0,1,Abundantia,
1,2,Actaeon,
2,3,Aemilian,
3,4,Aeneas,
4,5,Aequitas,Equitas
...,...,...,...
844,845,turning,
845,846,sailing,
846,847,escorted_by,escorted by
847,848,wearing,


In [20]:
X["y"] = X.apply(lambda row: [row.y], axis=1)

In [21]:
ent_dict = {}
for index, row in entities[["id", "name_en", "alternativenames_en"]].iterrows():
    ent_dict[row["id"]] = (row["name_en"]+", "+row["alternativenames_en"]).replace(", None","").lower()

In [22]:
result = pd.DataFrame(columns=["design_id", "subject", "predicate", "object"])

In [23]:
def get_id(ent):
    for row in ent_dict:
        for entry in ent_dict[row].split(', '):
            if ent == entry:
                return row
            if ent.lower() == entry:
                return row
    return ent

In [24]:
for index, row in X.iterrows():
    for y in row.y:
        subject = get_id(y[0])
        subject_str = y[0]
        predicate = get_id(y[1])
        predicate_str = y[1]
        obj = get_id(y[2])
        obj_str = y[2]
        if type(subject) == int and type(predicate) == int and type(obj) == int:
            result = result._append({"design_id": row.db_id, 
                                     "subject": subject, 
                                     "subject_str": subject_str,
                                     "predicate": predicate, 
                                     "predicate_str": predicate_str,
                                     "object": obj,
                                     "object_str": obj_str}, ignore_index=True)
        else:
            print(subject, predicate, obj)

In [25]:
result.shape

(300, 7)

In [26]:
result.to_sql("nlp_relation_extraction_en_v2",dc.mysql_connection,if_exists="append", index=False)

300