developed by Patricia Klinger, modified by Sebastian Gampe, Kerim Gencer, Chrisowalandis Deligio

In [1]:
import os
import sys
sys.path.append('../')
import pandas as pd
import random
import numpy as np
import spacy
import swifter
from cnt.model import DesignEstimator, save_ner_model, load_ner_model,save_ner_model_v2, load_ner_model_v2
from cnt.annotate import (annotate, annotate_single_design, 
                          annotate_designs, 
                          extract_string_from_annotation, split_alternativenames)
from cnt.evaluate import Metrics
#from cnt.stem_lemma_annotation import Stem_Lemma_Annotatizer
from cnt.preprocess import Preprocess
from cnt.io import  Database_Connection



import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


### Define the column names for the id and design column 

In [2]:
id_col = "id"
design_col = "design_en"
use_lemma_stem = False

In [3]:
language = "_en"
add_columns = ["name"+language, "alternativenames"+language, "class"]

In [4]:
# Database access in text file: "mysql+mysqlconnector://root:YourConnection" -> Format user:password@IP/Database
f = open("/home/bigdatalab/Projects/D4N4/NLP_release_1.0/db_access.txt", "r")
access = f.read()
dc =  Database_Connection(access)

In [5]:
designs = dc.load_designs_from_db("nlp_training_designs", [id_col, design_col])

In [6]:
designs.head(5)

Unnamed: 0,id,design_en
0,1,Diademed head of deified Alexander the Great w...
1,6,"Altar, lighted and garlanded."
2,8,Prize amphora on ornamental stand; within line...
3,9,Amphora with ribbed surface and crooked handle...
4,10,"Bust of youthful Anchialos, right, wearing tae..."


In [7]:
designs.shape

(23164, 2)

## Load and annotate designs

In [8]:
entities = {
    "PERSON": dc.load_entities_from_db_v2("thrakien_d4n4.nlp_list_entities", "PERSON", add_columns, [add_columns[1]], ",", True),
    "OBJECT": dc.load_entities_from_db_v2("thrakien_d4n4.nlp_list_entities", "OBJECT", add_columns, [add_columns[1]], ",", True),
    "ANIMAL": dc.load_entities_from_db_v2("thrakien_d4n4.nlp_list_entities", "ANIMAL", add_columns, [add_columns[1]], ",", True),
    "PLANT": dc.load_entities_from_db_v2("thrakien_d4n4.nlp_list_entities", "PLANT", add_columns, [add_columns[1]], ",", True)}

In [9]:
annotated_designs = annotate_designs(entities, designs, id_col, design_col)
annotated_designs = annotated_designs[
    annotated_designs.annotations.map(len) > 0]

In [10]:
if use_lemma_stem:
    annotater = Stem_Lemma_Annotatizer() # parameter: method="lemma_stem", language="en", backbone="spacy_snowball"
    annotated_designs = annotater.annotate(annotated_designs, entities, id_col, design_col)

In [11]:
annotated_designs.head(5)

Unnamed: 0,design_en,id,annotations
0,Diademed head of deified Alexander the Great w...,1,"[(0, 8, OBJECT), (9, 13, OBJECT), (25, 44, PER..."
1,"Altar, lighted and garlanded.",6,"[(0, 5, OBJECT), (19, 28, OBJECT)]"
2,Prize amphora on ornamental stand; within line...,8,"[(6, 13, OBJECT)]"
3,Amphora with ribbed surface and crooked handle...,9,"[(0, 7, OBJECT), (40, 47, OBJECT), (63, 75, PL..."
4,"Bust of youthful Anchialos, right, wearing tae...",10,"[(0, 4, OBJECT), (17, 26, PERSON), (43, 49, OB..."


In [12]:
annotated_designs.shape

(22996, 3)

## Preprocessing

In [13]:
annotated_designs["design_en_changed"] = ""

In [14]:
df_entities = dc.load_from_db("nlp_list_entities", add_columns)

In [15]:
# Add rules for preprocessing
preprocess = Preprocess()
preprocess.add_rule("horseman", "horse man")
preprocess.add_rule("horsemen", "horse men")

for index, row in df_entities.iterrows():
    if row["alternativenames_en"] is not None:
        if row["class"] != "VERB":
            standard_name = row["name_en"]
            alt_names = row["alternativenames_en"].split(", ")
            for alt_name in alt_names:
                preprocess.add_rule(alt_name, standard_name)

#### Bei römischen Zahlen scheint es noch kleine Probleme zu geben, daher nochmal extra manuell

In [16]:
for rule in list(preprocess.rules):
    if " I." in rule or " II." in rule or " III." in rule or " IV." in rule or " V." in rule:
        del preprocess.rules[rule]

In [17]:
for index, row in annotated_designs.iterrows():
    if " I." in row["design_en"]:
        designs.at[index, "design_en"] = row["design_en"].replace(" I.", " I")
    if " II." in row["design_en"]:
        designs.at[index, "design_en"] = row["design_en"].replace(" II.", " II")
    if " III." in row["design_en"]:
        designs.at[index, "design_en"] = row["design_en"].replace(" III.", " III")
    if " IV." in row["design_en"]:
        designs.at[index, "design_en"] = row["design_en"].replace(" IV.", " IV")
    if " V." in row["design_en"]:
        designs.at[index, "design_en"] = row["design_en"].replace(" V.", " V")

In [18]:
# Apply defined rules
annotated_designs["design_en_changed"] = annotated_designs.swifter.apply(lambda row: preprocess.preprocess_design(row.design_en, row.id)[0], axis=1)

Pandas Apply: 100%|███████████████████████| 22996/22996 [07:14<00:00, 52.93it/s]


In [19]:
# Deleting brackets and questionmarks
annotated_designs["design_en_changed"] = annotated_designs.swifter.apply(lambda row: row["design_en_changed"].replace("?", "").replace("(", "").replace(")", ""), axis=1)

Pandas Apply: 100%|███████████████████| 22996/22996 [00:00<00:00, 208771.48it/s]


In [20]:
annotated_designs.rename(columns={"design_en":"design_en_orig", "design_en_changed":"design_en", "annotations":"annotations_orig"}, inplace=True)

In [21]:
annotated_designs.head(500).style

Unnamed: 0,design_en_orig,id,annotations_orig,design_en
0,"Diademed head of deified Alexander the Great with horn of Ammon, right. Border of dots.",1,"[(0, 8, 'OBJECT'), (9, 13, 'OBJECT'), (25, 44, 'PERSON'), (50, 54, 'OBJECT')]","Diadem head of deified Alexander the Great with horn of Ammon, right. Border of dots."
1,"Altar, lighted and garlanded.",6,"[(0, 5, 'OBJECT'), (19, 28, 'OBJECT')]","Altar, lighted and garland."
2,Prize amphora on ornamental stand; within linear square and incuse square.,8,"[(6, 13, 'OBJECT')]",Prize amphora on ornamental stand; within linear square and incuse square.
3,Amphora with ribbed surface and crooked handles containing two ears of corn and poppy.,9,"[(0, 7, 'OBJECT'), (40, 47, 'OBJECT'), (63, 75, 'PLANT'), (80, 85, 'PLANT')]",Amphora with ribbed surface and crooked handle containing two corn and poppy.
4,"Bust of youthful Anchialos, right, wearing taenia. Border of dots.",10,"[(0, 4, 'OBJECT'), (17, 26, 'PERSON'), (43, 49, 'OBJECT')]","Bust of youthful Anchialos, right, wearing taenia. Border of dots."
5,"Inverted anchor; under left fluke, crayfish, under right fluke, ethnicon.",11,"[(9, 15, 'OBJECT'), (35, 43, 'ANIMAL')]","Inverted anchor; under left fluke, crayfish, under right fluke, ethnicon."
6,"Inverted anchor; under left fluke, crayfish, under right fluke, ethnicon; all within circular incuse.",12,"[(9, 15, 'OBJECT'), (35, 43, 'ANIMAL')]","Inverted anchor; under left fluke, crayfish, under right fluke, ethnicon; all within circular incuse."
7,"Inverted anchor; under right fluke, crayfish, under left fluke, ethnicon.",14,"[(9, 15, 'OBJECT'), (37, 45, 'ANIMAL')]","Inverted anchor; under right fluke, crayfish, under left fluke, ethnicon."
8,"Inverted anchor; under right fluke, crayfish, under left fluke, ethnicon; all within shallow incuse square.",15,"[(9, 15, 'OBJECT'), (36, 44, 'ANIMAL')]","Inverted anchor; under right fluke, crayfish, under left fluke, ethnicon; all within shallow incuse square."
9,"Inverted anchor; under right fluke, crayfish.",17,"[(9, 15, 'OBJECT'), (36, 44, 'ANIMAL')]","Inverted anchor; under right fluke, crayfish."


In [22]:
train_designs = annotate_designs(entities, annotated_designs[["id", "design_en"]], id_col, design_col)
train_designs = train_designs[
    train_designs.annotations.map(len) > 0]
annotated_designs = annotated_designs.merge(train_designs[["id", "annotations"]], left_on="id", right_on="id")

In [23]:
annotated_designs.head(5)

Unnamed: 0,design_en_orig,id,annotations_orig,design_en,annotations
0,Diademed head of deified Alexander the Great w...,1,"[(0, 8, OBJECT), (9, 13, OBJECT), (25, 44, PER...",Diadem head of deified Alexander the Great wit...,"[(0, 6, OBJECT), (7, 11, OBJECT), (23, 42, PER..."
1,"Altar, lighted and garlanded.",6,"[(0, 5, OBJECT), (19, 28, OBJECT)]","Altar, lighted and garland.","[(0, 5, OBJECT), (19, 26, OBJECT)]"
2,Prize amphora on ornamental stand; within line...,8,"[(6, 13, OBJECT)]",Prize amphora on ornamental stand; within line...,"[(6, 13, OBJECT)]"
3,Amphora with ribbed surface and crooked handle...,9,"[(0, 7, OBJECT), (40, 47, OBJECT), (63, 75, PL...",Amphora with ribbed surface and crooked handle...,"[(0, 7, OBJECT), (40, 46, OBJECT), (62, 66, PL..."
4,"Bust of youthful Anchialos, right, wearing tae...",10,"[(0, 4, OBJECT), (17, 26, PERSON), (43, 49, OB...","Bust of youthful Anchialos, right, wearing tae...","[(0, 4, OBJECT), (17, 26, PERSON), (43, 49, OB..."


In [24]:
annotated_designs.shape

(22994, 5)

## Train NER

In [25]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(annotated_designs[[id_col, design_col]],
                                                    annotated_designs[[id_col, "annotations"]],
                                                    test_size=0.25, random_state = 12)
y_test = y_test.rename(columns={"annotations": "y"})

In [26]:
X_test.index = [i for i in range(X_test.shape[0])]
y_test.index = [i for i in range(y_test.shape[0])]

#### output directory for the trained model

In [27]:
output_dir =  "../cnt/trained_model/ner/english_new_org/"
model_name = "english_cno"

In [28]:
my_estimator = DesignEstimator(4, output_dir, model_name, id_col, design_col)
my_estimator.set_labels("PERSON", "OBJECT", "ANIMAL", "PLANT")
my_estimator.fit(X_train, y_train.annotations, "cnt")

../cnt/trained_model/ner/english_new_org
Saved model to ../cnt/trained_model/ner/english_new_org


## Load and evaluate model

In [29]:
model = load_ner_model_v2(output_dir, model_name, id_col, design_col)

In [30]:
x_predict = model.predict(X_test,as_doc=False)

In [31]:
x_predict

Unnamed: 0,id,y
0,10613,"[(0, 8, PERSON), (13, 20, PERSON)]"
1,15462,"[(0, 6, OBJECT), (8, 12, OBJECT), (18, 24, OBJ..."
2,15911,"[(35, 43, OBJECT), (47, 55, OBJECT)]"
3,5460,"[(0, 4, OBJECT), (42, 46, OBJECT), (48, 52, OB..."
4,1852,"[(0, 6, PERSON), (24, 28, OBJECT), (49, 56, OB..."
...,...,...
5744,14904,"[(15, 21, OBJECT), (39, 43, ANIMAL), (53, 57, ..."
5745,16930,"[(0, 4, OBJECT), (8, 29, PERSON), (31, 37, OBJ..."
5746,7225,"[(0, 7, ANIMAL)]"
5747,18481,"[(0, 11, PERSON), (37, 47, OBJECT), (61, 65, O..."


In [32]:
metrics = Metrics()

In [33]:
scores_frame = metrics.create_score_frame(y_test, x_predict, my_estimator.get_labels())
scores_frame

Unnamed: 0,Unnamed: 1,Total(TP+FN),Hits(TP),Wrongs(FP),%
0,PERSON,6334,6311,40,99.6
1,OBJECT,22801,22783,44,99.9
2,ANIMAL,1367,1356,7,99.2
3,PLANT,940,933,0,99.3


In [34]:
precision, recall = metrics.score_precision_recall(y_test, x_predict)

In [35]:
F1 = (2*precision*recall) / (precision + recall)

In [36]:
print("Precision", round(precision*100,2))
print("Recall", round(recall*100,2))
print("F1", round(F1*100,2))

Precision 99.71
Recall 99.81
F1 99.76


## Entity scores

In [37]:
y_test.head(5)

Unnamed: 0,id,y
0,10613,"[(0, 8, PERSON), (13, 20, PERSON)]"
1,15462,"[(0, 6, OBJECT), (8, 12, OBJECT), (18, 24, OBJ..."
2,15911,"[(35, 43, OBJECT), (47, 55, OBJECT)]"
3,5460,"[(0, 4, OBJECT), (42, 46, OBJECT), (48, 52, OB..."
4,1852,"[(0, 6, PERSON), (24, 28, OBJECT), (49, 56, OB..."


In [38]:
x_predict.head(5)

Unnamed: 0,id,y
0,10613,"[(0, 8, PERSON), (13, 20, PERSON)]"
1,15462,"[(0, 6, OBJECT), (8, 12, OBJECT), (18, 24, OBJ..."
2,15911,"[(35, 43, OBJECT), (47, 55, OBJECT)]"
3,5460,"[(0, 4, OBJECT), (42, 46, OBJECT), (48, 52, OB..."
4,1852,"[(0, 6, PERSON), (24, 28, OBJECT), (49, 56, OB..."


In [39]:
X_test["annotation"] = y_test["y"]
X_test["prediction"] = x_predict["y"]
X_test.head(2)

Unnamed: 0,id,design_en,annotation,prediction
0,10613,Aurelian and Iupiter,"[(0, 8, PERSON), (13, 20, PERSON)]","[(0, 8, PERSON), (13, 20, PERSON)]"
1,15462,"Ensign, vase, and shield","[(0, 6, OBJECT), (8, 12, OBJECT), (18, 24, OBJ...","[(0, 6, OBJECT), (8, 12, OBJECT), (18, 24, OBJ..."


In [40]:
def get_text(design, ent_list):
    result = []
    for i in ent_list:
        result.append(design[i[0]:i[1]])
    return result

In [41]:
X_test["annotation_str"] = X_test.apply(lambda row: get_text(row.design_en, row.annotation), axis=1)
X_test["prediction_str"] = X_test.apply(lambda row: get_text(row.design_en, row.prediction), axis=1)

In [42]:
X_test.head(2)

Unnamed: 0,id,design_en,annotation,prediction,annotation_str,prediction_str
0,10613,Aurelian and Iupiter,"[(0, 8, PERSON), (13, 20, PERSON)]","[(0, 8, PERSON), (13, 20, PERSON)]","[Aurelian, Iupiter]","[Aurelian, Iupiter]"
1,15462,"Ensign, vase, and shield","[(0, 6, OBJECT), (8, 12, OBJECT), (18, 24, OBJ...","[(0, 6, OBJECT), (8, 12, OBJECT), (18, 24, OBJ...","[Ensign, vase, shield]","[Ensign, vase, shield]"


In [43]:
X_train["annotation"] = y_train["annotations"]

In [44]:
X_train["annotation_str"] = X_train.apply(lambda row: get_text(row.design_en, row.annotation), axis=1)

In [45]:
labels = {}
for index, row in X_test.iterrows():
    for i in row.annotation_str:
        labels[i] = [0,0,0]

for index, row in X_train.iterrows():
    for i in row.annotation_str:
        labels[i] = [0,0,0]

In [46]:
for index, row in X_test.iterrows():
    annot = row.annotation_str
    pred = row.prediction_str
    
    for i in annot:
        labels[i][0] += 1
        if i in pred:
            labels[i][1] += 1

In [47]:
for index, row in X_train.iterrows():
    annot = row.annotation_str
    
    for i in annot:
        labels[i][2] += 1


In [48]:
label_scores = pd.DataFrame().from_dict(labels, orient="index").rename(columns={0:"Annotation", 1:"Prediction", 2:"Total_in_train"})

In [49]:
label_scores["Accuracy"] = label_scores.apply(lambda row: row.Prediction/row.Annotation, axis=1)

In [50]:
label_scores.loc[label_scores.index.str.contains("Alexander")]

Unnamed: 0,Annotation,Prediction,Total_in_train,Accuracy
Severus Alexander,29,29,132,1.0
Domitius Alexander,1,1,3,1.0
Alexander the Great,1,1,4,1.0


In [51]:
label_scores.sort_values("Accuracy").head(10)

Unnamed: 0,Annotation,Prediction,Total_in_train,Accuracy
Via Trajana,2,0,0,0.0
Omphale,1,0,0,0.0
barley,1,0,2,0.0
figures,1,0,0,0.0
Moesia,2,0,0,0.0
bukephalion,1,0,0,0.0
Augean Stables,1,0,0,0.0
crab claw,4,0,0,0.0
visor,1,0,0,0.0
Goth,1,0,0,0.0


## Map result back

In [52]:
X_test["y"] = y_test["y"]

In [53]:
X_test

Unnamed: 0,id,design_en,annotation,prediction,annotation_str,prediction_str,y
0,10613,Aurelian and Iupiter,"[(0, 8, PERSON), (13, 20, PERSON)]","[(0, 8, PERSON), (13, 20, PERSON)]","[Aurelian, Iupiter]","[Aurelian, Iupiter]","[(0, 8, PERSON), (13, 20, PERSON)]"
1,15462,"Ensign, vase, and shield","[(0, 6, OBJECT), (8, 12, OBJECT), (18, 24, OBJ...","[(0, 6, OBJECT), (8, 12, OBJECT), (18, 24, OBJ...","[Ensign, vase, shield]","[Ensign, vase, shield]","[(0, 6, OBJECT), (8, 12, OBJECT), (18, 24, OBJ..."
2,15911,"Fides, standing front, holding two standard; a...","[(35, 43, OBJECT), (47, 55, OBJECT)]","[(35, 43, OBJECT), (47, 55, OBJECT)]","[standard, standard]","[standard, standard]","[(35, 43, OBJECT), (47, 55, OBJECT)]"
3,5460,"Head of a goddess, right, wearing crossed band...","[(0, 4, OBJECT), (42, 46, OBJECT), (48, 52, OB...","[(0, 4, OBJECT), (42, 46, OBJECT), (48, 52, OB...","[Head, band, hair, bun, nape, head]","[Head, band, hair, bun, nape, head]","[(0, 4, OBJECT), (42, 46, OBJECT), (48, 52, OB..."
4,1852,"Athena standing facing, head right, wearing lo...","[(0, 6, PERSON), (24, 28, OBJECT), (49, 56, OB...","[(0, 6, PERSON), (24, 28, OBJECT), (49, 56, OB...","[Athena, head, garment, helmet, object, hand, ...","[Athena, head, garment, helmet, object, hand, ...","[(0, 6, PERSON), (24, 28, OBJECT), (49, 56, OB..."
...,...,...,...,...,...,...,...
5744,14904,"Dae Caelestis, draped, riding right on lion, h...","[(15, 21, OBJECT), (39, 43, ANIMAL), (53, 57, ...","[(15, 21, OBJECT), (39, 43, ANIMAL), (53, 57, ...","[draped, lion, drum, hand, scepter, hand, rock]","[draped, lion, drum, hand, scepter, hand, rock]","[(15, 21, OBJECT), (39, 43, ANIMAL), (53, 57, ..."
5745,16930,"Head of Constantine the Great, wreath, right; ...","[(0, 4, OBJECT), (8, 29, PERSON), (31, 37, OBJ...","[(0, 4, OBJECT), (8, 29, PERSON), (31, 37, OBJ...","[Head, Constantine the Great, wreath]","[Head, Constantine the Great, wreath]","[(0, 4, OBJECT), (8, 29, PERSON), (31, 37, OBJ..."
5746,7225,Panther walking right.,"[(0, 7, ANIMAL)]","[(0, 7, ANIMAL)]",[Panther],[Panther],"[(0, 7, ANIMAL)]"
5747,18481,"Liberalitas standing right, emptying cornucopi...","[(0, 11, PERSON), (37, 47, OBJECT), (61, 65, O...","[(0, 11, PERSON), (37, 47, OBJECT), (61, 65, O...","[Liberalitas, cornucopia, hand]","[Liberalitas, cornucopia, hand]","[(0, 11, PERSON), (37, 47, OBJECT), (61, 65, O..."


In [54]:
X_test["design_en_orig"] = X_test.swifter.apply(lambda row: preprocess.map_back_design(row.design_en, row.id) if row.id in preprocess.rules_applied else row.design_en, axis=1)

Pandas Apply: 100%|██████████████████████| 5749/5749 [00:00<00:00, 52468.96it/s]


In [55]:
X_test["y_orig"] = X_test.swifter.apply(lambda row: preprocess.map_result_ner(row.design_en,row.y, row.id) if row.id in preprocess.rules_applied else row.y, axis=1)

Pandas Apply: 100%|███████████████████████| 5749/5749 [00:00<00:00, 6038.00it/s]


In [56]:
X_test.loc[X_test.design_en.str.contains("Alexander III")]

Unnamed: 0,id,design_en,annotation,prediction,annotation_str,prediction_str,y,design_en_orig,y_orig


In [57]:
X_test.loc[X_test.design_en.str.contains("Veil")].head(5)

Unnamed: 0,id,design_en,annotation,prediction,annotation_str,prediction_str,y,design_en_orig,y_orig
249,1522,"Veil and draped bust of Demeter, right, wearin...","[(0, 4, OBJECT), (9, 15, OBJECT), (16, 20, OBJ...","[(0, 4, OBJECT), (9, 15, OBJECT), (16, 20, OBJ...","[Veil, draped, bust, Demeter, corn wreath, cor...","[Veil, draped, bust, Demeter, corn wreath, cor...","[(0, 4, OBJECT), (9, 15, OBJECT), (16, 20, OBJ...","Veiled and draped bust of Demeter, right, wear...","[(0, 6, OBJECT), (11, 17, OBJECT), (18, 22, OB..."
317,492,"Veil woman goddess standing facing, head left,...","[(0, 4, OBJECT), (5, 10, PERSON), (36, 40, OBJ...","[(0, 4, OBJECT), (5, 10, PERSON), (36, 40, OBJ...","[Veil, woman, head, object, hand, scepter, hand]","[Veil, woman, head, object, hand, scepter, hand]","[(0, 4, OBJECT), (5, 10, PERSON), (36, 40, OBJ...","Veiled female goddess standing facing, head le...","[(0, 6, OBJECT), (7, 13, PERSON), (39, 43, OBJ..."
341,7211,"Veil Demeter advancing left, holding two torch.","[(0, 4, OBJECT), (5, 12, PERSON), (41, 46, OBJ...","[(0, 4, OBJECT), (5, 12, PERSON), (41, 46, OBJ...","[Veil, Demeter, torch]","[Veil, Demeter, torch]","[(0, 4, OBJECT), (5, 12, PERSON), (41, 46, OBJ...","Veiled Demeter advancing left, holding two tor...","[(0, 6, OBJECT), (7, 14, PERSON), (43, 50, OBJ..."
880,1961,"Veil Demeter standing facing, head left, weari...","[(0, 4, OBJECT), (5, 12, PERSON), (30, 34, OBJ...","[(0, 4, OBJECT), (5, 12, PERSON), (30, 34, OBJ...","[Veil, Demeter, head, garment, corn, hand, cis...","[Veil, Demeter, head, garment, corn, hand, cis...","[(0, 4, OBJECT), (5, 12, PERSON), (30, 34, OBJ...","Veiled Demeter standing facing, head left, wea...","[(0, 6, OBJECT), (7, 14, PERSON), (32, 36, OBJ..."
1082,355,"Veil Demeter standing facing, head left, weari...","[(0, 4, OBJECT), (5, 12, PERSON), (30, 34, OBJ...","[(0, 4, OBJECT), (5, 12, PERSON), (30, 34, OBJ...","[Veil, Demeter, head, corn wreath, garment, co...","[Veil, Demeter, head, corn wreath, garment, co...","[(0, 4, OBJECT), (5, 12, PERSON), (30, 34, OBJ...","Veiled Demeter standing facing, head left, wea...","[(0, 6, OBJECT), (7, 14, PERSON), (32, 36, OBJ..."


# Visualize prediction

In [58]:
x_predict_as_doc = model.predict(X_test, as_doc=True)

In [None]:
from spacy import displacy
colors = {'PERSON': 'mediumpurple','OBJECT': 'greenyellow', 'ANIMAL' : 'orange', 'PLANT': 'salmom', 'VERBS': 'skyblue'}
options = {'ent': ['PERSON', 'OBJECT', 'ANIMAL', 'PLANT'], 'colors': colors}
displacy.render(x_predict_as_doc.y, 
                style='ent', jupyter=True, options=options)

## Upload data to mysql

In [59]:
# Preprocessing
upload = True
id_col = "id"
design_col = "design_en"
f = open("/home/bigdatalab/Projects/D4N4/NLP_release_1.0/db_access.txt", "r")
access = f.read()
dc =  Database_Connection(access)
cnt_designs = dc.load_designs_from_db("data_designs", [id_col, design_col])
cnt_designs = cnt_designs.rename(columns={"id": "design_id"})

cnt_designs["design_en_changed"] = cnt_designs.swifter.apply(lambda row: preprocess.preprocess_design(row.design_en, row.design_id)[0], axis=1)
cnt_designs["design_en_changed"] = cnt_designs.swifter.apply(lambda row: row["design_en_changed"].replace("?", "").replace("(", "").replace(")", ""), axis=1)

Pandas Apply: 100%|█████████████████████████| 7618/7618 [02:22<00:00, 53.36it/s]
Pandas Apply: 100%|█████████████████████| 7618/7618 [00:00<00:00, 206277.69it/s]


In [60]:
cnt_designs = cnt_designs.rename(columns={"design_en": "design_en_org"})
cnt_designs = cnt_designs.rename(columns={"design_en_changed": "design_en"})
cnt_designs = cnt_designs.rename(columns={"design_id": "id"})

In [61]:
if upload ==True:
    cnt_pred = model.predict_clear(cnt_designs)
    cnt_pred_predictions_only = cnt_pred["y"]
    
    cnt_ner_output = pd.DataFrame([(str(designid), *relation) for  _, (designid, relation_list) in cnt_pred.iterrows()
                    for relation in relation_list],
            columns=["DesignID", "Entity", "Label_Entity"])

    cnt_ner_output.to_sql("cnt_pipeline_ner", dc.mysql_connection, 
                           if_exists="replace", index=False)

In [35]:
cnt_designs.head().style

Unnamed: 0,id,design_en_org,design_en
0,1,"Diademed head of deified Alexander the Great with horn of Ammon, right. Border of dots.","Diadem head of deified Alexander the Great with horn of Ammon, right. Border of dots."
1,6,"Altar, lighted and garlanded.","Altar, lighted and garland."
2,8,Prize amphora on ornamental stand; within linear square and incuse square.,Prize amphora on ornamental stand; within linear square and incuse square.
3,9,Amphora with ribbed surface and crooked handles containing two ears of corn and poppy.,Amphora with ribbed surface and crooked handleholding two corn and poppy.
4,10,"Bust of youthful Anchialos, right, wearing taenia. Border of dots.","Bust of youthful Anchialos, right, wearing taenia. Border of dots."
