developed by Patricia Klinger, modified by Sebastian Gampe, Kerim Gencer, Chrisowalandis Deligio

In [90]:
import sys
sys.path.append('../')
import pandas as pd
import random
import os
import numpy as np
from cnt.model import (DesignEstimator, RelationExtractor, save_pipeline, load_pipeline, predict_re_single_sentence, 
relations_from_adjectives_df, relations_from_adjectives_single, concat_relations)
from cnt.annotate import (annotate, annotate_single_design, 
                          annotate_designs, 
                          extract_string_from_annotation, labeling_eng)
from cnt.extract_relation import (path, NERTransformer, FeatureExtractor)
from cnt.evaluate import Metrics
from cnt.vectorize import (Doc2Str, Path2Str, Verbs2Str, AveragedPath2Vec, 
                           AveragedRest2Vec)
from cnt.io import (replace_left_right)
from cnt.io import  Database_Connection
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, make_pipeline, make_union
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import Normalizer
from sklearn.naive_bayes import MultinomialNB
from itertools import product
import warnings
warnings.filterwarnings('ignore')

### Define the column names for the id and design column 

In [91]:
id_col = "id"
design_col = "design_en"

### Load yaml file with annotated data

In [92]:
import yaml
import_path = "../data/English_RE_data.yaml"
with open(import_path, encoding='utf8') as f:
    dictionary = yaml.safe_load(f)
    d = {replace_left_right(key): value for key, value in dictionary.items()}

In [93]:
relation_counts = {}
labels = []
for sentence, relations in d.items():
    for rel in relations:
        rel_name = rel[1]
        if rel_name not in relation_counts:
            relation_counts[rel_name] = 1
        else:
            relation_counts[rel_name] += 1

sorted(relation_counts.items(), key= lambda x: (-x[1], x[0]))

[('holding', 1113),
 ('wearing', 781),
 ('resting_on', 238),
 ('seated_on', 88),
 ('grasping', 36),
 ('standing', 36),
 ('crowning', 14),
 ('feeding', 10),
 ('coiling', 7),
 ('breaking', 4),
 ('pushing', 3),
 ('flying_over', 2),
 ('receiving', 2),
 ('escorted_by', 1)]

In [94]:
dc =  Database_Connection("mysql+mysqlconnector://root:YourConnection") # Format user:password@IP/Database

In [96]:
language = "_en"
add_columns = ["id", "name"+language, "alternativenames"+language]

In [97]:
entities = {
    "PERSON": dc.load_entities_from_db("nlp_list_person", ["name", "alternativenames"], ["alternativenames"], ",", True),
    "OBJECT": dc.load_entities_from_db("nlp_list_obj", add_columns, [add_columns[1]], ",", True),
    "ANIMAL": dc.load_entities_from_db("nlp_list_animal", add_columns, [add_columns[1]], ",", True),
    "PLANT": dc.load_entities_from_db("nlp_list_plant", add_columns, [add_columns[1]], ",", True)}

In [98]:
X_list, y_list = labeling_eng(d, entities)

In [99]:
X = pd.DataFrame({design_col: X_list, "y" : y_list})

In [100]:
X.shape

(1029, 2)

In [101]:
X[id_col] = X.index

In [102]:
X

Unnamed: 0,design_en,y,id
0,Amphora with ribbed surface and crooked handle...,"[(Amphora, OBJECT, holding, poppy, PLANT)]",0
1,"Half-nude Aphrodite standing facing, head left...","[(Aphrodite, PERSON, holding, apple, PLANT)]",1
2,"Nude Aphrodite standing facing, head right, co...","[(Eros, PERSON, seated_on, dolphin, ANIMAL)]",2
3,"Nude Apollo standing facing, head left, left l...","[(Apollo, PERSON, holding, patera, OBJECT), (s...",3
4,"Artemis advancing right, wearing short flutter...","[(Artemis, PERSON, wearing, chiton, OBJECT), (...",4
...,...,...,...
1024,"Turreted Tyche standing facing, head left, hol...","[(Tyche, PERSON, holding, temple, OBJECT)]",1024
1025,"Turreted Tyche standing facing, head left, hol...","[(Tyche, PERSON, holding, temple, OBJECT)]",1025
1026,"Turreted Tyche standing facing, head left, hol...","[(Tyche, PERSON, holding, temple, OBJECT)]",1026
1027,"Turreted Tyche standing facing, head left, hol...","[(Tyche, PERSON, holding, temple, OBJECT)]",1027


In [103]:
design = X.iloc[0]["design_en"]

In [104]:
design

'Amphora with ribbed surface and crooked handles containing two ears of corn and poppy.'

In [105]:
def get_id(design):
    try:
        return dc.create_own_query("select id from nlp_training_designs where design_en='"+design+"';").id.item()
    except:
        return "Null"

In [106]:
%%capture
X["db_id"] = X.apply(lambda row: get_id(row.design_en), axis=1)

In [61]:
X.shape

(1029, 4)

In [107]:
X.head(2)

Unnamed: 0,design_en,y,id,db_id
0,Amphora with ribbed surface and crooked handle...,"[(Amphora, OBJECT, holding, poppy, PLANT)]",0,9
1,"Half-nude Aphrodite standing facing, head left...","[(Aphrodite, PERSON, holding, apple, PLANT)]",1,33


In [62]:
X = X.loc[X.db_id=="Null"]

In [63]:
X.shape

(34, 4)

In [89]:
X.head(5).style

Unnamed: 0,design_en,y,id,db_id,design_id
20,"Veiled and draped bust of Demeter, right, wearing corn wreath.","[('Demeter', 'PERSON', 'wearing', 'corn wreath', 'OBJECT')]",20,24667,24667
84,"Heracles advancing left, holding transverse lit torch in outstretched right hand and club and lion skin in left arm; in background, column with statue of standing female figure holding staff; two small Erotes, first at base of column, right, second at Heracles' left leg, left.","[('Heracles', 'PERSON', 'holding', 'torch', 'OBJECT'), ('Heracles', 'PERSON', 'holding', 'club', 'OBJECT'), ('Heracles', 'PERSON', 'holding', 'lion skin', 'OBJECT'), ('statue', 'OBJECT', 'holding', 'staff', 'OBJECT')]",84,24668,24668
103,"Nike standing right in biga, holding palm branch in left hand and reigns in both hands.","[('Nike', 'PERSON', 'holding', 'palm branch', 'OBJECT')]",103,24670,24670
107,"Perseus and Andromeda; at left, Andromeda standing right, raising right arm; to right, Perseus standing left, holding harpa and gorgon's head in left hand, releasing Andromeda from her chains with right hand; stepping with right foot on sea-monster Cetus turned to stone under Andromeda.","[('Perseus', 'PERSON', 'standing', 'Cetus', 'ANIMAL')]",107,24671,24671
110,"Prow with naval ram in shape of animal's head, left; on top, emperor (Marcus Aurelius) standing left, extending right hand, holding parazonium in left arm.","[('Marcus Aurelius', 'PERSON', 'holding', 'parazonium', 'OBJECT')]",110,24672,24672


### Testing rule based annotation

In [22]:
obj_list = {
"veiled": ("wearing", "Veil", "before"),
"draped": ("wearing", "Clothing", "before"),
"helmeted": ("wearing", "Helmet", "before"),
"diademed": ("wearing", "Diadem", "before"),
"turreted": ("wearing", "Mural crown", "before"),
"enthroned": ("seated_on", "Throne", "after"),

}

In [23]:
df = pd.DataFrame().from_dict(obj_list, orient="index").rename(columns={0:"relation", 1:"Object"})
df["To_Replace"] = df.index
df.reset_index(inplace=True, drop=True)
df = df[["To_Replace", "relation", "Object"]]

In [24]:
X.head(5)

Unnamed: 0,design_en,y,id,db_id
0,Amphora with ribbed surface and crooked handle...,"[(Amphora, OBJECT, holding, poppy, PLANT)]",0,9
1,"Half-nude Aphrodite standing facing, head left...","[(Aphrodite, PERSON, holding, apple, PLANT)]",1,33
2,"Nude Aphrodite standing facing, head right, co...","[(Eros, PERSON, seated_on, dolphin, ANIMAL)]",2,36
3,"Nude Apollo standing facing, head left, left l...","[(Apollo, PERSON, holding, patera, OBJECT), (s...",3,85
4,"Artemis advancing right, wearing short flutter...","[(Artemis, PERSON, wearing, chiton, OBJECT), (...",4,104


### Train the RE model

In [25]:
classifier = LogisticRegression(max_iter=1000)
string_converter = Path2Str(pos=True) 
vectorizer = CountVectorizer(ngram_range=(1,3))
feature = make_pipeline(string_converter, vectorizer)

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X[[id_col, design_col]], X[[id_col, "y"]], test_size=0.25, random_state=33)

#### load pretrained NER-Model

In [27]:
ner_model_directory = "../cnt/trained_model/ner/english/"
ner_model_name = "english_cno"

#### define RE-Model path

In [28]:
re_model_directory = "../cnt/trained_model/re/"
re_model_name = "english_cno"

In [29]:
inner_pipeline = make_pipeline(feature, classifier)
pipeline = make_pipeline(NERTransformer(ner_model_directory, ner_model_name, id_col, design_col),
                         FeatureExtractor(ner_model_directory, ner_model_name, id_col, design_col),
                         RelationExtractor(inner_pipeline, re_model_directory, re_model_name, id_col))
pipeline.fit(X_train, y_train)

Pipeline(steps=[('nertransformer',
                 NERTransformer(design_col='design_en', id_col='id',
                                model_dir='../cnt/trained_model/ner/english/',
                                model_name='english_cno')),
                ('featureextractor',
                 FeatureExtractor(design_col='design_en', id_col='id',
                                  model_dir='../cnt/trained_model/ner/english/',
                                  model_name='english_cno')),
                ('relationextractor',
                 RelationExtractor(id_col='id', model_name='english_cno',
                                   output_dir='../cnt/trained_model/re/',
                                   pipeline=Pipeline(steps=[('pipeline',
                                                             Pipeline(steps=[('path2str',
                                                                              Path2Str(pos=True)),
                                                          

## Save and Load model

In [30]:
save_pipeline(pipeline, re_model_directory, re_model_name)

In [31]:
model = load_pipeline(re_model_directory, re_model_name)

## Predict

In [32]:
y_pred = model.predict(X_test)

In [33]:
metrics = Metrics()

In [34]:
precision, recall = metrics.score_precision_recall(y_test, y_pred)
F1 = (2*precision*recall) / (precision + recall)

In [35]:
print("Precision", round(precision*100,2))
print("Recall", round(recall*100,2))
print("F1", round(F1*100,2))

Precision 86.45
Recall 71.15
F1 78.06


In [36]:
y_pred["design_en"] = X_test.design_en

In [37]:
y_pred = relations_from_adjectives_df(y_pred, "design_en", "y", ner_model_directory, ner_model_name, id_col, design_col, obj_list, entities_to_consider=["PERSON"])

In [38]:
y_pred.loc[y_pred.design_en.str.contains("Veiled")]

Unnamed: 0,id,y,design_en
31,31,"[(Demeter, PERSON, seated_on, basket, OBJECT),...","Veiled Demeter seated left on basket, wearing ..."
539,539,"[(Hera, PERSON, wearing, kalathos, OBJECT), (H...","Veiled Hera (Samios) standing left, wearing ka..."
684,684,"[(Demeter, PERSON, seated_on, altar, OBJECT), ...","Veiled Demeter seated left on altar, wearing c..."
257,257,"[(Demeter, PERSON, wearing, corn wreath, OBJEC...",Veiled Demeter seated left cista mystica with ...
366,366,"[(Demeter, PERSON, wearing, garment, OBJECT), ...","Veiled Demeter standing facing, head left, wea..."
21,21,"[(Demeter, PERSON, wearing, corn wreath, OBJEC...","Veiled and draped bust of Demeter, right, wear..."
16,16,"[(Cybele, PERSON, holding, patera, OBJECT), (C...","Veiled and turreted Cybele enthroned left, hol..."
702,702,"[(Demeter, PERSON, seated_on, rock, OBJECT), (...","Veiled Demeter seated left on rock, holding ea..."
180,180,"[(Demeter, PERSON, wearing, corn wreath, OBJEC...","Veiled and draped bust of Demeter, right, wear..."
22,22,"[(Demeter, PERSON, wearing, corn wreath, OBJEC...","Veiled head of Demeter, right, wearing corn wr..."


In [39]:
design = "Diademed Alexander the Great to the left and helmeted Athena to the right."
auto_relations = relations_from_adjectives_single(design,ner_model_directory, ner_model_name, id_col, design_col, obj_list)
model_relations = predict_re_single_sentence(model, design, id_col, design_col)
concat_relations(auto_relations, model_relations)

[('Athena', 'PERSON', 'wearing', 'Helmet', 'OBJECT')]

## Prediction dataframe

In [40]:
pre_df = X_test.merge(y_pred, left_on=id_col, right_on =id_col)

In [41]:
pre_df.head(10)

Unnamed: 0,id,design_en_x,y,design_en_y
0,948,"Tyche standing facing, head left, wearing kala...","[(Tyche, PERSON, wearing, kalathos, OBJECT), (...","Tyche standing facing, head left, wearing kala..."
1,779,"Apollo standing left, holding plectrum and lyre.","[(Apollo, PERSON, holding, plectrum, OBJECT), ...","Apollo standing left, holding plectrum and lyre."
2,241,"Turreted Cybele seated, head right, on lion ju...","[(Cybele, PERSON, seated_on, lion, ANIMAL), (C...","Turreted Cybele seated, head right, on lion ju..."
3,347,"Heracles kneeling right, wearing lion skin, dr...",[],"Heracles kneeling right, wearing lion skin, dr..."
4,388,"Nude Apollo standing left, holding patera in o...","[(Apollo, PERSON, holding, patera, OBJECT)]","Nude Apollo standing left, holding patera in o..."
5,689,"Nude Hermes seated left on rock, holding caduc...","[(Hermes, PERSON, holding, caduceus, OBJECT), ...","Nude Hermes seated left on rock, holding caduc..."
6,4,"Artemis advancing right, wearing short flutter...","[(Artemis, PERSON, wearing, chiton, OBJECT), (...","Artemis advancing right, wearing short flutter..."
7,506,Nude Eros in attitude of Thanatos standing rig...,[],Nude Eros in attitude of Thanatos standing rig...
8,31,"Veiled Demeter seated left on basket, wearing ...","[(Demeter, PERSON, seated_on, basket, OBJECT),...","Veiled Demeter seated left on basket, wearing ..."
9,899,"Laureate bust of Apollo, left, wearing chlamys...","[(Apollo, PERSON, wearing, chlamys, OBJECT), (...","Laureate bust of Apollo, left, wearing chlamys..."


## Upload to mysql

In [42]:
upload = False

In [43]:
if upload ==True:
    dc =  Database_Connection("mysql+mysqlconnector://YourConnection")
    cnt_designs = dc.load_designs_from_db("designs", ["DesignID", "DesignEng"])
    cnt_designs.rename(columns={"DesignEng":"Design"}, inplace=True) # if english
    cnt_pred = pipeline.predict(cnt_designs)
    cnt_pipeline_output = pd.DataFrame([(str(designid), *relation) for  _, (designid, relation_list) in cnt_pred.iterrows()
                    for relation in relation_list],
            columns=["DesignID", "Person", "Label_Person", "Relation", "Object",
                     "Label_Object"])
    cnt_pipeline_output.to_sql("CNO.cnt_pipeline_output",dc.mysql_connection,if_exists="replace", index=False)