developed by Patricia Klinger, modified by Sebastian Gampe, Kerim Gencer, Chrisowalandis Deligio

In [1]:
import sys
sys.path.append('../')
import pandas as pd
import random
import os
import numpy as np
from cnt.model import (DesignEstimator, RelationExtractor, save_pipeline, load_pipeline, predict_re_single_sentence, 
relations_from_adjectives_df, relations_from_adjectives_single, concat_relations)
from cnt.annotate import (annotate, annotate_single_design, 
                          annotate_designs, 
                          extract_string_from_annotation, labeling_eng)
from cnt.extract_relation import (path, NERTransformer, FeatureExtractor)
from cnt.evaluate import Metrics
from cnt.vectorize import (Doc2Str, Path2Str, Verbs2Str, AveragedPath2Vec, 
                           AveragedRest2Vec)
from cnt.io import (replace_left_right)
from cnt.io import  Database_Connection
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, make_pipeline, make_union
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import Normalizer
from sklearn.naive_bayes import MultinomialNB
from itertools import product
import warnings
warnings.filterwarnings('ignore')

### Define the column names for the id and design column 

In [2]:
id_col = "id"
design_col = "design_en"

### Load yaml file with annotated data

In [3]:
import yaml
import_path = "../data/English_RE_data.yaml"
with open(import_path, encoding='utf8') as f:
    dictionary = yaml.safe_load(f)
    d = {replace_left_right(key): value for key, value in dictionary.items()}

In [4]:
relation_counts = {}
labels = []
for sentence, relations in d.items():
    for rel in relations:
        rel_name = rel[1]
        if rel_name not in relation_counts:
            relation_counts[rel_name] = 1
        else:
            relation_counts[rel_name] += 1

sorted(relation_counts.items(), key= lambda x: (-x[1], x[0]))

[('holding', 1113),
 ('wearing', 781),
 ('resting_on', 238),
 ('seated_on', 88),
 ('grasping', 36),
 ('standing', 36),
 ('crowning', 14),
 ('feeding', 10),
 ('coiling', 7),
 ('breaking', 4),
 ('pushing', 3),
 ('flying_over', 2),
 ('receiving', 2),
 ('escorted_by', 1)]

In [5]:
dc =  Database_Connection("mysql+mysqlconnector://root:YourConnection") # Format user:password@IP/Database

In [7]:
language = "_en"
add_columns = ["id", "name"+language, "alternativenames"+language]

In [8]:
entities = {
    "PERSON": dc.load_entities_from_db("nlp_list_person", ["name", "alternativenames"], ["alternativenames"], ",", True),
    "OBJECT": dc.load_entities_from_db("nlp_list_obj", add_columns, [add_columns[1]], ",", True),
    "ANIMAL": dc.load_entities_from_db("nlp_list_animal", add_columns, [add_columns[1]], ",", True),
    "PLANT": dc.load_entities_from_db("nlp_list_plant", add_columns, [add_columns[1]], ",", True)}

In [9]:
X_list, y_list = labeling_eng(d, entities)

In [10]:
X = pd.DataFrame({design_col: X_list, "y" : y_list})

In [11]:
X.shape

(1029, 2)

In [12]:
X[id_col] = X.index

In [13]:
X

Unnamed: 0,design_en,y,id
0,Amphora with ribbed surface and crooked handle...,"[(Amphora, OBJECT, holding, poppy, PLANT)]",0
1,"Half-nude Aphrodite standing facing, head left...","[(Aphrodite, PERSON, holding, apple, PLANT)]",1
2,"Nude Aphrodite standing facing, head right, co...","[(Eros, PERSON, seated_on, dolphin, ANIMAL)]",2
3,"Nude Apollo standing facing, head left, left l...","[(Apollo, PERSON, holding, patera, OBJECT), (s...",3
4,"Artemis advancing right, wearing short flutter...","[(Artemis, PERSON, wearing, chiton, OBJECT), (...",4
...,...,...,...
1024,"Turreted Tyche standing facing, head left, hol...","[(Tyche, PERSON, holding, temple, OBJECT)]",1024
1025,"Turreted Tyche standing facing, head left, hol...","[(Tyche, PERSON, holding, temple, OBJECT)]",1025
1026,"Turreted Tyche standing facing, head left, hol...","[(Tyche, PERSON, holding, temple, OBJECT)]",1026
1027,"Turreted Tyche standing facing, head left, hol...","[(Tyche, PERSON, holding, temple, OBJECT)]",1027


In [14]:
design = X.iloc[0]["design_en"]

In [15]:
design

'Amphora with ribbed surface and crooked handles containing two ears of corn and poppy.'

In [16]:
dc.create_own_query("select id from nlp_training_designs where design_en='"+design+"';").id.item()

9

In [17]:
def get_id(design):
    try:
        return dc.create_own_query("select id from nlp_training_designs where design_en='"+design+"';").id.item()
    except:
        return "Null"

In [18]:
X["db_id"] = X.apply(lambda row: get_id(row.design_en), axis=1)

SQL query failed.
SQL query failed.
SQL query failed.
SQL query failed.
SQL query failed.
SQL query failed.
SQL query failed.
SQL query failed.
SQL query failed.
SQL query failed.
SQL query failed.
SQL query failed.
SQL query failed.
SQL query failed.
SQL query failed.
SQL query failed.
SQL query failed.


In [19]:
X = X.loc[X.db_id!="Null"]

In [42]:
X.shape

(877, 4)

### Testing rule based annotation

In [20]:
obj_list = {
"veiled": ("wearing", "Veil", "before"),
"draped": ("wearing", "Clothing", "before"),
"helmeted": ("wearing", "Helmet", "before"),
"diademed": ("wearing", "Diadem", "before"),
"turreted": ("wearing", "Mural crown", "before"),
"enthroned": ("seated_on", "Throne", "after"),

}

In [21]:
df = pd.DataFrame().from_dict(obj_list, orient="index").rename(columns={0:"relation", 1:"Object"})
df["To_Replace"] = df.index
df.reset_index(inplace=True, drop=True)
df = df[["To_Replace", "relation", "Object"]]

In [22]:
X.head(5)

Unnamed: 0,design_en,y,id,db_id
0,Amphora with ribbed surface and crooked handle...,"[(Amphora, OBJECT, holding, poppy, PLANT)]",0,9
1,"Half-nude Aphrodite standing facing, head left...","[(Aphrodite, PERSON, holding, apple, PLANT)]",1,33
2,"Nude Aphrodite standing facing, head right, co...","[(Eros, PERSON, seated_on, dolphin, ANIMAL)]",2,36
3,"Nude Apollo standing facing, head left, left l...","[(Apollo, PERSON, holding, patera, OBJECT), (s...",3,85
4,"Artemis advancing right, wearing short flutter...","[(Artemis, PERSON, wearing, chiton, OBJECT), (...",4,104


### Train the RE model

In [23]:
classifier = LogisticRegression(max_iter=1000)
string_converter = Path2Str(pos=True) 
vectorizer = CountVectorizer(ngram_range=(1,3))
feature = make_pipeline(string_converter, vectorizer)

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X[[id_col, design_col]], X[[id_col, "y"]], test_size=0.25, random_state=33)

#### load pretrained NER-Model

In [25]:
ner_model_directory = "../cnt/trained_model/ner/english/"
ner_model_name = "english_cno"

#### define RE-Model path

In [26]:
re_model_directory = "../cnt/trained_model/re/"
re_model_name = "english_cno"

In [27]:
inner_pipeline = make_pipeline(feature, classifier)
pipeline = make_pipeline(NERTransformer(ner_model_directory, ner_model_name, id_col, design_col),
                         FeatureExtractor(ner_model_directory, ner_model_name, id_col, design_col),
                         RelationExtractor(inner_pipeline, re_model_directory, re_model_name, id_col))
pipeline.fit(X_train, y_train)

Pipeline(steps=[('nertransformer',
                 NERTransformer(design_col='design_en', id_col='id',
                                model_dir='../cnt/trained_model/ner/english/',
                                model_name='english_cno')),
                ('featureextractor',
                 FeatureExtractor(design_col='design_en', id_col='id',
                                  model_dir='../cnt/trained_model/ner/english/',
                                  model_name='english_cno')),
                ('relationextractor',
                 RelationExtractor(id_col='id', model_name='english_cno',
                                   output_dir='../cnt/trained_model/re/',
                                   pipeline=Pipeline(steps=[('pipeline',
                                                             Pipeline(steps=[('path2str',
                                                                              Path2Str(pos=True)),
                                                          

## Save and Load model

In [28]:
save_pipeline(pipeline, re_model_directory, re_model_name)

In [29]:
model = load_pipeline(re_model_directory, re_model_name)

## Predict

In [30]:
y_pred = model.predict(X_test)

In [31]:
metrics = Metrics()

In [32]:
precision, recall = metrics.score_precision_recall(y_test, y_pred)
F1 = (2*precision*recall) / (precision + recall)

In [33]:
print("Precision", round(precision*100,2))
print("Recall", round(recall*100,2))
print("F1", round(F1*100,2))

Precision 90.84
Recall 80.7
F1 85.47


In [34]:
y_pred["design_en"] = X_test.design_en

In [35]:
y_pred = relations_from_adjectives_df(y_pred, "design_en", "y", ner_model_directory, ner_model_name, id_col, design_col, obj_list, entities_to_consider=["PERSON"])

In [36]:
y_pred.loc[y_pred.design_en.str.contains("Veiled")]

Unnamed: 0,id,y,design_en
258,258,"[(Demeter, PERSON, seated_on, rock, OBJECT), (...","Veiled Demeter seated left on rock, holding tw..."
22,22,"[(Demeter, PERSON, wearing, corn wreath, OBJEC...","Veiled head of Demeter, right, wearing corn wr..."
765,765,"[(Demeter, PERSON, holding, torch, OBJECT), (D...","Veiled Demeter standing left, wearing double c..."
16,16,"[(Cybele, PERSON, holding, patera, OBJECT), (C...","Veiled and turreted Cybele enthroned left, hol..."
32,32,"[(Demeter, PERSON, wearing, Veil, OBJECT)]","Veiled Demeter standing facing, head left, hol..."
27,27,"[(Demeter, PERSON, resting_on, torch, OBJECT),...","Veiled Demeter enthroned left, holding two ear..."
221,221,"[(Demeter, PERSON, holding, torch, OBJECT), (D...","Veiled Demeter standing facing, head right, ho..."
23,23,"[(Demeter, PERSON, wearing, corn wreath, OBJEC...","Veiled and draped bust of Demeter, right, wear..."
28,28,"[(Demeter, PERSON, wearing, corn wreath, OBJEC...",Veiled Demeter seated left on cista mystica en...
288,288,"[(Demeter, PERSON, wearing, garment, OBJECT), ...","Veiled Demeter standing facing, head left, wea..."


In [37]:
design = "Diademed Alexander the Great to the left and helmeted Athena to the right."
auto_relations = relations_from_adjectives_single(design,ner_model_directory, ner_model_name, id_col, design_col, obj_list)
model_relations = predict_re_single_sentence(model, design, id_col, design_col)
concat_relations(auto_relations, model_relations)

[('Athena', 'PERSON', 'wearing', 'Helmet', 'OBJECT'),
 ('Alexander', 'PERSON', 'wearing', 'Diadem', 'OBJECT')]

## Prediction dataframe

In [38]:
pre_df = X_test.merge(y_pred, left_on=id_col, right_on =id_col)

In [39]:
pre_df.head(10)

Unnamed: 0,id,design_en_x,y,design_en_y
0,347,"Heracles kneeling right, wearing lion skin, dr...","[(Heracles, PERSON, wearing, lion skin, OBJECT)]","Heracles kneeling right, wearing lion skin, dr..."
1,232,"Nude athlete advancing left, head right, hair ...","[(athlete, PERSON, holding, wreath, OBJECT), (...","Nude athlete advancing left, head right, hair ..."
2,870,"Head of Demeter, right, wearing earrings and n...","[(Demeter, PERSON, wearing, earrings, OBJECT),...","Head of Demeter, right, wearing earrings and n..."
3,749,"Nude Apollo (Bonus Eventus) standing left, hol...","[(Apollo, PERSON, holding, patera, OBJECT)]","Nude Apollo (Bonus Eventus) standing left, hol..."
4,613,"Diademed head of Ptolemy III Euergetes, right,...","[(Ptolemy iii euergetes, PERSON, wearing, Diad...","Diademed head of Ptolemy III Euergetes, right,..."
5,846,"Eirene standing facing, head right, holding a ...",[],"Eirene standing facing, head right, holding a ..."
6,933,"Hermes standing right, wearing petasus and chl...","[(Hermes, PERSON, holding, petasus, OBJECT), (...","Hermes standing right, wearing petasus and chl..."
7,267,"Nike standing in galloping biga, right; holdin...","[(Nike, PERSON, standing, biga, OBJECT), (Nike...","Nike standing in galloping biga, right; holdin..."
8,209,"Nude bearded Heracles advancing left, wearing ...","[(Heracles, PERSON, wearing, lion skin, OBJECT...","Nude bearded Heracles advancing left, wearing ..."
9,258,"Veiled Demeter seated left on rock, holding tw...","[(Demeter, PERSON, seated_on, rock, OBJECT), (...","Veiled Demeter seated left on rock, holding tw..."


## Upload to mysql

In [40]:
upload = False

In [41]:
if upload ==True:
    dc =  Database_Connection("mysql+mysqlconnector://YourConnection")
    cnt_designs = dc.load_designs_from_db("designs", ["DesignID", "DesignEng"])
    cnt_designs.rename(columns={"DesignEng":"Design"}, inplace=True) # if english
    cnt_pred = pipeline.predict(cnt_designs)
    cnt_pipeline_output = pd.DataFrame([(str(designid), *relation) for  _, (designid, relation_list) in cnt_pred.iterrows()
                    for relation in relation_list],
            columns=["DesignID", "Person", "Label_Person", "Relation", "Object",
                     "Label_Object"])
    cnt_pipeline_output.to_sql("CNO.cnt_pipeline_output",dc.mysql_connection,if_exists="replace", index=False)