developed by Patricia Klinger, modified by Sebastian Gampe, Kerim Gencer, Chrisowalandis Deligio

In [1]:
import sys
sys.path.append('../')
import pandas as pd
import random
import os
import numpy as np
from cnt.model import DesignEstimator, load_ner_model_v2
from cnt.annotate import (annotate, annotate_single_design, 
                          annotate_designs, annotate_designs_german, 
                          extract_string_from_annotation)
from cnt.evaluate import Metrics
import spacy
from cnt.io import  Database_Connection
import warnings
warnings.filterwarnings('ignore')

In [None]:
dc =  Database_Connection("mysql+mysqlconnector://root:YourConnection") # Format user:password@IP/Database

In [3]:
designs = dc.load_designs_from_db("designs", ["DesignID", "Design"])

### Load and annotate designs

In [4]:
entities = {
    "PERSON": dc.load_entities_from_db("nlp_list_person", ["name", "alternativenames"], ["alternativenames"], ",", True),
    "OBJECT": dc.load_entities_from_db("nlp_list_obj_ger", ["name", "alternativenames"], ["alternativenames"], ",", True),
    "ANIMAL": dc.load_entities_from_db("nlp_list_animal_ger", ["name", "alternativenames"], ["alternativenames"], ",", True),
    "PLANT": dc.load_entities_from_db("nlp_list_plant_ger", ["name", "alternativenames"], ["alternativenames"], ",", True),
}

In [5]:
annotated_designs = annotate_designs(entities, designs, "Design")
annotated_designs = annotated_designs[
    annotated_designs.annotations.map(len) > 0]

In [6]:
annotated_designs.shape

(5366, 3)

In [7]:
annotated_designs.head(5)

Unnamed: 0,Design,DesignID,annotations
0,Kopf des vergöttlichten Alexander des Großen n...,1,"[(0, 4, OBJECT), (24, 44, PERSON), (61, 67, OB..."
1,Flammender und bekränzter Altar.,6,"[(26, 31, OBJECT)]"
3,Amphora mit gerippter Bauchoberfläche und gebo...,9,"[(0, 7, OBJECT), (72, 77, PLANT), (82, 90, PLA..."
4,Brustbild des jugendlichen Anchialos nach rech...,10,"[(0, 9, OBJECT), (27, 36, PERSON), (53, 59, OB..."
5,Umgekehrter Anker; unter der linken Ankerschau...,11,"[(12, 17, OBJECT), (51, 56, ANIMAL)]"


## Train NER

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(annotated_designs[["DesignID", "Design"]],
                                                    annotated_designs[["DesignID", "annotations"]],
                                                    test_size=0.25, random_state = 12)
y_test = y_test.rename(columns={"annotations": "y"})

In [9]:
X_test.index = [i for i in range(X_test.shape[0])]
y_test.index = [i for i in range(y_test.shape[0])]

#### output directory for the trained model

In [10]:
output_dir =  "../cnt/trained_model/ner/german/"
model_name = "german_cno"

In [11]:
my_estimator = DesignEstimator(4, output_dir, model_name, "Design", spacy_model="de_core_news_sm")
my_estimator.set_labels("PERSON", "OBJECT", "ANIMAL", "PLANT")
my_estimator.fit(X_train, y_train.annotations, "cnt")

..\cnt\trained_model\ner\german
Saved model to ..\cnt\trained_model\ner\german


## Load and evaluate model

In [12]:
model = load_ner_model_v2(output_dir, model_name)

In [13]:
x_predict = model.predict(X_test)

In [14]:
metrics = Metrics()

In [15]:
scores_frame = metrics.create_score_frame(y_test, x_predict, my_estimator.get_labels())
scores_frame

Unnamed: 0,Unnamed: 1,Total(TP+FN),Hits(TP),Wrongs(FP),%
0,PERSON,1154,1129,47,97.8
1,OBJECT,3243,3223,32,99.4
2,ANIMAL,401,395,6,98.5
3,PLANT,215,207,10,96.3


In [16]:
precision, recall = metrics.score_precision_recall(y_test, x_predict)

In [17]:
F1 = (2*precision*recall) / (precision + recall)

In [18]:
print("Precision", round(precision*100,2))
print("Recall", round(recall*100,2))
print("F1", round(F1*100,2))

Precision 98.12
Recall 98.82
F1 98.47


# Visualize prediction

In [19]:
x_predict_as_doc = model.predict(designs, as_doc=True)

In [20]:
from spacy import displacy
colors = {'PERSON': 'mediumpurple','OBJECT': 'greenyellow', 'ANIMAL' : 'orange', 'PLANT': 'salmom', 'VERBS': 'skyblue'}
options = {'ent': ['PERSON', 'OBJECT', 'ANIMAL', 'PLANT'], 'colors': colors}
displacy.render(x_predict_as_doc.y, 
                style='ent', jupyter=True, options=options)