# Test following NERSpaCy from Carmen

* As Spacy is already installed, any installation is needed. All models is already setup.

## importing SpaCy model in french

In [1]:
import spacy
nlp = spacy.load("fr_core_news_md")

### Read and load existing text files.

In [2]:
def read_text(file_name):
    with open (file_name, "r", encoding="utf_8") as myfile:
        lines = list(line for line in (l.strip() for l in myfile) if line)
    return str(lines)  

data_bd = read_text("output_savoirs/BNU_01_Didier.txt")
data_bc = read_text("output_savoirs/BNU_02_Colin.txt")

### Applying the language model on files

In [3]:
doc_bd = nlp(data_bd)
doc_bc = nlp(data_bc)

### Highlighting the results
#### first file

In [4]:
from spacy import displacy
options = {"ents": ["LOC", "FAC", "GPE"]}
displacy.render(doc_bd, style = "ent",jupyter = True, options=options)

In [5]:
from spacy import displacy
options = {"ents": ["LOC", "FAC", "GPE"]}
displacy.render(doc_bc, style = "ent",jupyter = True, options=options)

### Display the results as a list
#### Function which brings all information concise

In [6]:
def read_save_ents(doc):
    ne_mentions = []
    if doc.ents: 
        for ent in doc.ents: 
            if (ent.label_ in ["LOC","FAC","GPE"] ):
                ne_mentions.append(ent.text)
                print(ent.text+' - ' +str(ent.start_char) +' - '+ str(ent.end_char) +' - '+ent.label_+ ' - '+str(spacy.explain(ent.label_)))             
    return ne_mentions

#### print the results for each file

In [7]:
print("--- Didier ---")
ne_mentions_bd = read_save_ents(doc_bd)

--- Didier ---
Strasbourg - 101 - 111 - LOC - Non-GPE locations, mountain ranges, bodies of water
Paris - 321 - 326 - LOC - Non-GPE locations, mountain ranges, bodies of water
Raspail - 384 - 391 - LOC - Non-GPE locations, mountain ranges, bodies of water
Paris - 401 - 406 - LOC - Non-GPE locations, mountain ranges, bodies of water
FRANCE - 410 - 416 - LOC - Non-GPE locations, mountain ranges, bodies of water
Strasbourg - 653 - 663 - LOC - Non-GPE locations, mountain ranges, bodies of water
\xa0Savoirs\xa0 - 735 - 750 - LOC - Non-GPE locations, mountain ranges, bodies of water
Paris - 904 - 909 - LOC - Non-GPE locations, mountain ranges, bodies of water
Métopes - 1341 - 1348 - LOC - Non-GPE locations, mountain ranges, bodies of water
Strasbourg - 1878 - 1888 - LOC - Non-GPE locations, mountain ranges, bodies of water
Bibliothèque impériale - 1973 - 1995 - LOC - Non-GPE locations, mountain ranges, bodies of water
place de l’Empereur - 2260 - 2279 - LOC - Non-GPE locations, mountain rang

In [8]:
print("--- Colin ---")
ne_mentions_bc = read_save_ents(doc_bc)

--- Colin ---
Strasbourg - 67 - 77 - LOC - Non-GPE locations, mountain ranges, bodies of water
Paris - 361 - 366 - LOC - Non-GPE locations, mountain ranges, bodies of water
Raspail - 424 - 431 - LOC - Non-GPE locations, mountain ranges, bodies of water
Paris - 441 - 446 - LOC - Non-GPE locations, mountain ranges, bodies of water
FRANCE - 450 - 456 - LOC - Non-GPE locations, mountain ranges, bodies of water
Strasbourg - 655 - 665 - LOC - Non-GPE locations, mountain ranges, bodies of water
\xa0Savoirs\xa0 - 833 - 848 - LOC - Non-GPE locations, mountain ranges, bodies of water
Paris - 1002 - 1007 - LOC - Non-GPE locations, mountain ranges, bodies of water
Métopes - 1439 - 1446 - LOC - Non-GPE locations, mountain ranges, bodies of water
Strasbourg - 1939 - 1949 - LOC - Non-GPE locations, mountain ranges, bodies of water
Bibliothèque - 2078 - 2090 - LOC - Non-GPE locations, mountain ranges, bodies of water
Strasbourg - 2124 - 2134 - LOC - Non-GPE locations, mountain ranges, bodies of water


### Function to count the NE

In [9]:
def count_ents(ne_mentions):
    sorted_count_occ = {}
    count_occ = {i:ne_mentions.count(i) for i in ne_mentions}
    #print(count_occ)
    sorted_count_occ = {k: v for k, v in sorted(count_occ.items(), key=lambda item: item[1])}
    #print(sorted_count_occ)
    for k in sorted_count_occ:
        print(k, sorted_count_occ[k])
    return sorted_count_occ

#### print the results for each file

In [10]:
print("--- Didier ---")
sorted_count_occ_bd = count_ents(ne_mentions_bd)

--- Didier ---
Raspail 1
FRANCE 1
\xa0Savoirs\xa0 1
Métopes 1
Bibliothèque impériale 1
place de l’Empereur 1
Nohlen\xa0 1
Neckelmann 1
Forêt-Noire 1
Badois 1
\xa0parlants\xa0 1
Allemagne 1
Metz 1
Aubette 1
Europe 1
Lessing 1
Erasme 1
Lorraine\xa0 1
Bucer\xa0 1
\xa0Bezirke\xa0 1
Basse-Alsace 1
Haute-Alsace 1
Herrade 1
Landsberg 1
bibliothèque de la Ville 1
Empire allemand 1
Moselle 1
Empire\xa0 1
Frédéric\xa0III 1
Guillaume\xa0II 1
France 1
Baupolitik 1
Berlin\xa0 1
Mann 1
Bauten 1
München\xa0 1
Kaiserplatz 2
palais de l’Empereur 2
Strassburg 2
Saint-Empire 2
Elsass 2
Lothringen 2
Reichsland 3
Paris 4
Lorraine 7
Alsace 9
Strasbourg 10


In [11]:
print("--- Colin ---")

sorted_count_occ_bc = count_ents(ne_mentions_bc)

--- Colin ---
Raspail 1
FRANCE 1
\xa0Savoirs\xa0 1
Métopes 1
Bibliothèque 1
Égypte ancienne\xa0 1
W.\xa0Clarysse 1
H.\xa0Verreth\xa0(éd 1
Guillaume\xa0II\xa0 1
Kaiserliche 1
Empire allemand 1
Deutsches 1
Musées royaux de Berlin 1
Dok 1
Sarapion 1
W.\xa0Spiegelberg 1
France 1
Université impériale de Strasbourg 1
Collège de France 1
Palais 1
Empire 1
Palais universitaire 1
Hanovre 1
\xa0Seminar\xa0 1
Nagada 1
hiver\xa01894/95 1
Ramesseum 1
année\xa0- 1
103\xa0873 1
Londres 1
facilement\xa0!\xa0 1
\xa0a 1
l’«\xa0ägyptologische Professur\xa0 1
Etat 1
München 1
Straßburg 1
Februar\xa01875 1
Université de Strasbourg 1
Faculté\xa0 1
AL\xa0103\xa0873 1
\xa0Dispositionsfonds\xa0 1
Altesse\xa0 1
\xa0Glasplattlieferungen\xa0 1
\xa0le 1
S.\xa0P.\xa0Vleeming 1
Papyruskartells\xa0 1
Doc.\xa06\xa0 1
Doc.\xa07\xa0 1
Égypte\xa0 1
Nefret 1
disponibles\xa0- 1
Occident 1
Hohenlohe 1
Langenburg 1
la Terre 1
-\xa0Berlin 1
Munich 1
Alsaciens 1
Allemands 1
Dominicains 1
Fr.\xa0Igersheim 1
Athènes 1
Allemagne\

### As the results are listed and some are classified wrongly, the NLP need to be ordered to avoid misclassification

#### print the new results without the misclassification for both files

In [12]:
#print(nlp.pipe_names)
print(nlp.config.to_str())
if ("entity_ruler" in nlp.pipe_names):
    nlp.remove_pipe("entity_ruler")
config = {"overwrite_ents": True }
ruler = nlp.add_pipe("entity_ruler", config=config)
#print(nlp.pipe_names)

placenames = ["Lorraine", "Berlin" , "München"]
for p in placenames:
    ruler.add_patterns([{"label": "GPE", "pattern": p}])

not_placenames = ["\xa0Savoirs\xa0", "Métopes", "Nohlen\xa0", "Neckelmann", "\xa0parlants\xa0",
                 "Erasme", "Lorraine\xa0","Bucer\xa0", "\xa0Bezirke\xa0", "Empire\xa0", 
                 "Frédéric\xa0III", "Guillaume\xa0II" , "Baupolitik", "Mann" , "Bauten"
                 ]
for p in not_placenames:
    ruler.add_patterns([{"label": "PER", "pattern": p}])  
     
doc_bd = nlp(data_bd)
ne_mentions = read_save_ents(doc_bd)

#print(nlp.config.to_str())

[paths]
train = "corpus/fr-dep-news/train.spacy"
dev = "corpus/fr-dep-news/dev.spacy"
vectors = "corpus/fr_vectors"
raw = null
init_tok2vec = null
vocab_data = null

[system]
gpu_allocator = null
seed = 0

[nlp]
lang = "fr"
pipeline = ["tok2vec","morphologizer","parser","senter","ner","attribute_ruler","lemmatizer"]
disabled = ["senter"]
before_creation = null
after_creation = null
after_pipeline_creation = null
batch_size = 256
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}

[components]

[components.attribute_ruler]
factory = "attribute_ruler"
validate = false

[components.lemmatizer]
factory = "lemmatizer"
mode = "rule"
model = null
overwrite = false

[components.morphologizer]
factory = "morphologizer"

[components.morphologizer.model]
@architectures = "spacy.Tagger.v1"
nO = null

[components.morphologizer.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode:width}
upstream = "*"

[components.ner]
factory = "ner"
moves = null
update

In [13]:
#print(nlp.pipe_names)
print(nlp.config.to_str())
if ("entity_ruler" in nlp.pipe_names):
    nlp.remove_pipe("entity_ruler")
config = {"overwrite_ents": True }
ruler = nlp.add_pipe("entity_ruler", config=config)
#print(nlp.pipe_names)

placenames = ["Lorraine", "Berlin" , "München"]
for p in placenames:
    ruler.add_patterns([{"label": "GPE", "pattern": p}])

not_placenames = ["\xa0Savoirs\xa0", "Métopes", "Nohlen\xa0", "Neckelmann", "\xa0parlants\xa0",
                 "Erasme", "Lorraine\xa0","Bucer\xa0", "\xa0Bezirke\xa0", "Empire\xa0", 
                 "Frédéric\xa0III", "Guillaume\xa0II" , "Baupolitik", "Mann" , "Bauten"
                 ]
for p in not_placenames:
    ruler.add_patterns([{"label": "PER", "pattern": p}])  
     
doc_bc = nlp(data_bc)
ne_mentions = read_save_ents(doc_bc)

#print(nlp.config.to_str())

[paths]
train = "corpus/fr-dep-news/train.spacy"
dev = "corpus/fr-dep-news/dev.spacy"
vectors = "corpus/fr_vectors"
raw = null
init_tok2vec = null
vocab_data = null

[system]
gpu_allocator = null
seed = 0

[nlp]
lang = "fr"
pipeline = ["tok2vec","morphologizer","parser","senter","ner","attribute_ruler","lemmatizer","entity_ruler"]
disabled = ["senter"]
before_creation = null
after_creation = null
after_pipeline_creation = null
batch_size = 256
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}

[components]

[components.attribute_ruler]
factory = "attribute_ruler"
validate = false

[components.entity_ruler]
factory = "entity_ruler"
ent_id_sep = "||"
overwrite_ents = true
phrase_matcher_attr = null
validate = false

[components.lemmatizer]
factory = "lemmatizer"
mode = "rule"
model = null
overwrite = false

[components.morphologizer]
factory = "morphologizer"

[components.morphologizer.model]
@architectures = "spacy.Tagger.v1"
nO = null

[components.morphologizer.model.tok2vec]
@architect

In [15]:
bd_ent_as_set = set(sorted_count_occ_bd.keys())
intersection = bd_ent_as_set.intersection(sorted_count_occ_bd.keys())
intersection_as_list = list(intersection)
common_places = intersection_as_list
print(common_places)

['FRANCE', 'Europe', 'Empire\\xa0', 'Erasme', 'bibliothèque de la Ville', 'Badois', 'Bibliothèque impériale', 'Aubette', 'Guillaume\\xa0II', 'Métopes', 'Kaiserplatz', 'Basse-Alsace', 'Neckelmann', 'Alsace', 'Metz', 'Frédéric\\xa0III', 'Lorraine', 'Saint-Empire', 'Herrade', 'Paris', '\\xa0parlants\\xa0', 'Moselle', 'place de l’Empereur', 'Lorraine\\xa0', '\\xa0Bezirke\\xa0', 'Mann', 'palais de l’Empereur', 'München\\xa0', '\\xa0Savoirs\\xa0', 'Allemagne', 'Lessing', 'Strassburg', 'Forêt-Noire', 'Bucer\\xa0', 'Haute-Alsace', 'Bauten', 'Raspail', 'Landsberg', 'Lothringen', 'Elsass', 'Nohlen\\xa0', 'Baupolitik', 'France', 'Berlin\\xa0', 'Reichsland', 'Empire allemand', 'Strasbourg']


In [16]:
def show_adj(doc):
    adjectives = []
    for token in doc:
        if token.pos_ == 'ADJ':
            adjectives.append(token)
    print(adjectives)
    return adjectives

In [17]:
print("--- Didier ---")
adjectives_bd = show_adj(doc_bd)

--- Didier ---
[nationale, no\xa01, Première, numérique, Mêmes, politique, nationale, numérique, différents, expérimentale, heuristiques, collaborative, géographique, supérieur, gestionnaire, nationale, nouveau, impériale, savantslieubibliothèque, symbolique, nouveau, allemandes, actuelle, régional, politique, allemand, régionaux, centrale, culturel, éducatif, politique, idéologique, nouveau, allemand, symbolique, techniques, régulières, régional, nouveau, dernier, même, mêmes, mêmes, vosgien, intérieurs, même, iconographique, tous, imagées, allégoriques, historiques, nouvel, politique, culturel, européen, particulières, tous, haut, régional, principales, princières, ministériels, riche, intéressant, principale, triangulaire, allégoriques, toutes, première, nord, autres, allégoriques, modernes, humain, décoratifs, actuelle, antiques, Tous, sûr, essentiels, générale, remarquable, programmatique, iconographique, grande, seuls, centraux, haut, antique, principale, autre, central, allemand

In [18]:
print("--- Colin ---")
adjectives_bc = show_adj(doc_bc)

# There are some missed matches, for instance, in "thriving place", thriving is categorised as verb.

--- Colin ---
[\xa0bibliothèque, précieux, de\xa01870, no\xa02, Première, numérique, Mêmes, précieux, numérique, différents, expérimentale, heuristiques, collaborative, géographique, supérieur, gestionnaire, \xa0bibliothèque, précieux, de\xa01870, précieuses, nationale, remarquable, ostraca, régulière, anecdotique, égyptien, hiéroglyphique, démotique, copte, arabe, nationale, autre, authentiques, exceptionnel, tous, susceptibles, académique, économique, inestimables, universitaires, culturel, future, connues, diplomatique, interne, allemand, étrangères, dernier, strasbourgeoise, financiers, allemandes, soucieuses, grecs, prussiennes, grecs, littéraires, royaux, vrai, classique, égyptien, scientifiques, long, papyrologiques, fondateur, inconnues, strasbourgeois, tous, impossible, autre, départementales, Bas-Rhin, relatifs, égyptiennes, impériale, original, culturelle, premiers, majeures, petite, académique, savant, grande, égyptiens, strasbourgeoises, seule, scientifique, prosaïque, aca

### Calculating concordances of NE in text, we also keep syntax information for the following step.

In [19]:
#see: https://spacy.io/api/token#attributes
#IOB code of named entity tag. 3 means the token begins an entity, 2 means it is outside an entity, 
#1 means it is inside an entity, and 0 means no entity tag is set.
#print(len(doc))
# Adapt to more than 3 mutitoken NE.
    
def calculate_concordances(doc, window):
    count = 0
    ent_context = []
    id_ent = 0
    right_context, left_context = [], []
    for token in doc:

        if (token.ent_type_ in ["LOC","FAC","GPE"]):

            if (token.ent_iob == 3 
                and doc[count+1].ent_iob == 2): #single token NE

                ne = token.text
                left_context = doc[count-window:count]
                right_context = doc[count+1:count+window+1]

                ent_context.append({"id":id_ent, "ent": ne, "left_context":left_context, "right_context":right_context})
                id_ent = id_ent + 1

            elif (token.ent_iob == 3  and doc[count+1].ent_iob == 1): #multi-token NE, begin
                ent_started = True
                ne = token.text
                left_context = doc[count-window:count]
                right_context = ""

            elif (ent_started == True and token.ent_iob == 1):  #multi-token NE, continuation
                ent_started = False
                ne = ne + " " + token.text
                right_context = doc[count+1:count+window+1]

                ent_context.append({"id":id_ent, "ent": ne, "left_context":left_context, "right_context":right_context})
                id_ent = id_ent + 1

        count = count + 1   
    return ent_context    

ent_context_bd = calculate_concordances(doc_bd, 20)
ent_context_bc = calculate_concordances(doc_bc, 20)

### What each author is saying about places?

#### Adjectives are good indicators for obtaining somehow the emotional sense of the text associated to the context of a place. We display concordances of NE with adjectives highlighted.


In [30]:
from termcolor import colored

def print_concordances(ent_context):
    for nec in ent_context:
        exc_msg = colored("Excerpt #"+str(nec["id"]), color='blue')
        print(exc_msg)
        print(' '.join(["_"+x.text+"_" if x.pos_=='ADJ' else x.text for x in nec["left_context"]]) 
          + " **" + nec["ent"] + "** " +
          ' '.join(["_"+x.text+"_" if x.pos_=='ADJ' else x.text for x in nec["right_context"]])) 
        print("")    

In [31]:
print("--- Didier ---")
print_concordances(ent_context_bd)

--- Didier ---
[34mExcerpt #0[0m
[ ' Science et politique\xa0 : le message de pierre de la ' , ' Bibliothèque _nationale_ et universitaire de **Strasbourg** ' , ' La Revue de la BNU _no\xa01_ ' , ' Christophe ' , ' Didier ' , '

[34mExcerpt #1[0m
' , ' supervision ' , ' Axel ' , ' Le Roy ' , ' _Première_ édition _numérique_ , **Paris** , 2020 dans le cadre du projet Savoirs . ' , ' EHESS ' , ' 54 bd Raspail '

[34mExcerpt #2[0m
, Paris , 2020 dans le cadre du projet Savoirs . ' , ' EHESS ' , ' 54 bd **Raspail** ' , ' 75006 Paris ' , ' FRANCE ' , ' Creative ' , ' Commons Attribution ' ,

[34mExcerpt #3[0m
le cadre du projet Savoirs . ' , ' EHESS ' , ' 54 bd Raspail ' , ' 75006 **Paris** ' , ' FRANCE ' , ' Creative ' , ' Commons Attribution ' , ' BNU ' , '

[34mExcerpt #4[0m
Savoirs . ' , ' EHESS ' , ' 54 bd Raspail ' , ' 75006 Paris ' , ' **FRANCE** ' , ' Creative ' , ' Commons Attribution ' , ' BNU ' , ' BY - NC -

[34mExcerpt #5[0m
, ' Science et _politique_ : le message d

In [32]:
print("--- Colin ---")
print_concordances(ent_context_bc)

--- Colin ---
[34mExcerpt #0[0m
 **Strasbourg** compensa la perte des manuscrits _précieux_ ' , ' brûlés dans le siège _de\xa01870_ ' , ' La Revue de

[34mExcerpt #1[0m
' , ' supervision ' , ' Axel ' , ' Le Roy ' , ' _Première_ édition _numérique_ , **Paris** , 2020 dans le cadre du projet Savoirs . ' , ' EHESS ' , ' 54 bd Raspail '

[34mExcerpt #2[0m
, Paris , 2020 dans le cadre du projet Savoirs . ' , ' EHESS ' , ' 54 bd **Raspail** ' , ' 75006 Paris ' , ' FRANCE ' , ' Creative ' , ' Commons Attribution ' ,

[34mExcerpt #3[0m
le cadre du projet Savoirs . ' , ' EHESS ' , ' 54 bd Raspail ' , ' 75006 **Paris** ' , ' FRANCE ' , ' Creative ' , ' Commons Attribution ' , ' BNU ' , '

[34mExcerpt #4[0m
Savoirs . ' , ' EHESS ' , ' 54 bd Raspail ' , ' 75006 Paris ' , ' **FRANCE** ' , ' Creative ' , ' Commons Attribution ' , ' BNU ' , ' BY - NC -

[34mExcerpt #5[0m
' les _Mêmes_ Conditions ' , ' Comment la création d\'une " bibliothèque de ' , ' papyrus " à **Strasbourg** compensa l

[34mExcerpt #299[0m
Gesellschaft ou des musées ' , ' allemands et _étrangers_ ( Berlin , Bonn , Liverpool et ' , ' **Bruxelles** ) . ' , ' Pour Spiegelberg , les deux ' , ' voyages sont l’ occasion de s’ immerger

[34mExcerpt #300[0m
voyages sont l’ occasion de s’ immerger dans le milieu _cosmopolite_ de ' , ' l’ égyptologie de terrain en **Égypte** . Au moment ' , ' de lancer sa carrière et de fonder sa crédibilité _locale_ sur un ' ,

[34mExcerpt #301[0m
quelques mois après la prise de Strasbourg par les ' , ' troupes _françaises_ , la publication des graffitis copiés **en\xa01895/96** . Lorsqu’ il signe les avant-propos , le ' , ' 7\xa0janvier 1919 -\xa0à cette ' , ' date ,

[34mExcerpt #302[0m
son expulsion vers l’ Allemagne\xa0- c’ est ' , ' vers l’ âge d’ or de son _premier_ voyage en **Égypte** que le ' , ' professeur _strasbourgeois_ tourne ses pensées\xa0 : « \xa0Aujourd’ _hui_ , au ' , ' moment

[34mExcerpt #303[0m
' , ' anglais\xa0!\xa0 » ' , ' W.\xa0Spiegelberg , 

### Functions to display place mentions and adjectives in the context of the mention.

In [35]:
from IPython.display import HTML, display
import tabulate

def create_display_table_ent_context(ent_context):
    table = []
    for nec in ent_context:
        table.append([[x.text for x in nec["left_context"] if x.pos_=='ADJ'], 
                      nec["ent"], 
                      [x.text for x in nec["right_context"] if x.pos_=='ADJ']])
       
    display(HTML(tabulate.tabulate(table, tablefmt='html')))
    return table

def create_display_table_ent_context_from_mention(ent_context, mention):
    table = []
    for nec in ent_context:
        if (nec["ent"] == mention):
            table.append([[x.text for x in nec["left_context"] if x.pos_=='ADJ'], 
                      nec["ent"], 
                      [x.text for x in nec["right_context"] if x.pos_=='ADJ']])
       
    display(HTML(tabulate.tabulate(table, tablefmt='html')))
    return table

In [36]:
print("--- Didier ---")
table_ent_cont_bd = create_display_table_ent_context(ent_context_bd)
print(len(table_ent_cont_bd))

--- Didier ---


0,1,2
['nationale'],Strasbourg,['no\\xa01']
"['Première', 'numérique']",Paris,[]
[],Raspail,[]
[],Paris,[]
[],FRANCE,[]
"['politique', 'nationale']",Strasbourg,[]
[],\xa0Savoirs\xa0,['numérique']
[],Paris,['différents']
['nationale'],Strasbourg,['nouveau']
['nouveau'],Bibliothèque impériale,['savantslieubibliothèque']


73


In [37]:
print("--- Colin ---")
table_ent_cont_bc = create_display_table_ent_context(ent_context_bc)
print(len(table_ent_cont_bc))

--- Colin ---


0,1,2
[],Strasbourg,"['précieux', 'de\\xa01870']"
"['Première', 'numérique']",Paris,[]
[],Raspail,[]
[],Paris,[]
[],FRANCE,[]
['Mêmes'],Strasbourg,['précieux']
[],\xa0Savoirs\xa0,['numérique']
[],Paris,['différents']
['\\xa0bibliothèque'],Strasbourg,"['précieux', 'de\\xa01870']"
"['de\\xa01870', 'précieuses']",Bibliothèque,"['nationale', 'remarquable', 'ostraca']"


376


### Display similar information for common places.


In [38]:
print("--- Didier ---")
for p in common_places:
    create_display_table_ent_context_from_mention(ent_context_bd, p)

--- Didier ---


0,1,2
[],FRANCE,[]


0,1,2
['grande'],Europe,"['seuls', 'centraux']"


0,1,2
"['disparues', 'ancien']",Empire\xa0,['nouvel']


0,1,2
"['intérieurs', 'même']",Badois,"['iconographique', 'tous']"


0,1,2
['nouveau'],Bibliothèque impériale,['savantslieubibliothèque']


0,1,2
[],Aubette,[]


0,1,2
['modernes'],Guillaume\xa0II,[]


0,1,2
['allemandes'],Kaiserplatz,"['actuelle', 'régional']"
"['manifeste', 'politique', 'dernier']",Kaiserplatz,[]


0,1,2
[],Basse-Alsace,[]


0,1,2
"['politique', 'idéologique']",Alsace,"['nouveau', 'allemand', 'symbolique']"
['régional'],Alsace,[]
['princières'],Alsace,['ministériels']
['grands'],Alsace,[]
[],Alsace,[]
[],Alsace,[]
"['dernier', 'actuels']",Alsace,[]
['directe'],Alsace,[]
[],Alsace,['germanophiles']


0,1,2
"['principales', 'princières']",Metz,['ministériels']


0,1,2
['modernes'],Frédéric\xa0III,[]


0,1,2
"['politique', 'idéologique']",Lorraine,"['nouveau', 'allemand', 'symbolique']"
['régional'],Lorraine,['principales']
[],Lorraine,['ministériels']
[],Lorraine,[]
[],Lorraine,['dernier']
['actuels'],Lorraine,[]
['directe'],Lorraine,[]


0,1,2
"['locales', 'XVe', 'XVIe\\xa0siècles']",Saint-Empire,"['unificatrice', 'incontestable', 'artistique']"
['dernier'],Saint-Empire,['actuels']


0,1,2
['dernier'],Herrade,[]


0,1,2
"['Première', 'numérique']",Paris,[]
[],Paris,[]
[],Paris,['différents']
[],Paris,[]


0,1,2
"['iconographique', 'tous']",\xa0parlants\xa0,"['imagées', 'allégoriques', 'historiques']"


0,1,2
[],Moselle,[]


0,1,2
['grands'],Lorraine\xa0,[]


0,1,2
[],\xa0Bezirke\xa0,[]


0,1,2
[],München\xa0,[]


0,1,2
[],\xa0Savoirs\xa0,['numérique']


0,1,2
"['principales', 'princières']",Allemagne,[]


0,1,2
"['central', 'allemande']",Lessing,['européennes']


0,1,2
"['autre', 'central', 'allemande']",Strassburg,['européennes']
[],Strassburg,[]


0,1,2
[],Bucer\xa0,[]


0,1,2
[],Haute-Alsace,['dernier']


0,1,2
[],Raspail,[]


0,1,2
['dernier'],Landsberg,[]


0,1,2
[],Lothringen,['véritable']
[],Lothringen,[]


0,1,2
[],Elsass,[]
[],Elsass,[]


0,1,2
[],Nohlen\xa0,['même']


0,1,2
[],France,"['germanophiles', 'français']"


0,1,2
[],Berlin\xa0,[]


0,1,2
[],Reichsland,[]


0,1,2
"['artistique', 'culturel', 'germaniques', 'nouvel']",Empire allemand,['gratuites']


0,1,2
['nationale'],Strasbourg,['no\\xa01']
"['politique', 'nationale']",Strasbourg,[]
['nationale'],Strasbourg,['nouveau']
['régional'],Strasbourg,"['politique', 'allemand', 'régionaux']"
"['intérieurs', 'même']",Strasbourg,"['iconographique', 'tous']"
"['principales', 'princières']",Strasbourg,[]
['ancienne'],Strasbourg,['toute']
['toute'],Strasbourg,"['prussiennes', 'ancienne']"
['local'],Strasbourg,['allemandes']
[],Strasbourg,[]


In [39]:
print("--- Colin ---")
for p in common_places:
    create_display_table_ent_context_from_mention(ent_context_bc, p)

--- Colin ---


0,1,2
[],FRANCE,[]


0,1,2
['diverses'],Europe,['nombreux']


0,1,2
"['universitaires', 'culturel']",Alsace,[]
['dernier'],Alsace,['strasbourgeoise']
[],Alsace,['autre']
"['original', 'culturelle']",Alsace,['premiers']
"['académique', 'égyptiennes', 'universitaire']",Alsace,"['politique', 'militaire', 'culturelle']"
[],Alsace,"['compétent', 'extraordinaires']"
['simple'],Alsace,[]
['impérial'],Alsace,['extraordinaire']
['budgétaire'],Alsace,[]
['seul'],Alsace,[]


0,1,2
"['universitaires', 'culturel']",Lorraine,[]
['dernier'],Lorraine,['strasbourgeoise']
[],Lorraine,['autre']
[],Lorraine,['jeune']
[],Lorraine,[]
[],Lorraine,"['compétent', 'extraordinaires']"
[],Lorraine,[]
['simple'],Lorraine,[]
['impérial'],Lorraine,['extraordinaire']
['budgétaire'],Lorraine,[]


0,1,2
"['Première', 'numérique']",Paris,[]
[],Paris,[]
[],Paris,['différents']
['célèbre'],Paris,[]


0,1,2
[],\xa0Savoirs\xa0,['numérique']


0,1,2
"['politique', 'militaire', 'culturelle']",Allemagne,['de\\xa01870']
"['seuls', 'premier', 'culturelles']",Allemagne,['complètes']


0,1,2
[],Raspail,[]


0,1,2
"['politique', 'militaire', 'culturelle']",France,['de\\xa01870']


0,1,2
"['diplomatique', 'interne', 'allemand']",Reichsland,"['étrangères', 'dernier']"
[],Reichsland,['24\\xa0mars']
['élégante'],Reichsland,"['culturel', 'augustes']"
[],Reichsland,['universitaires']
"['grand', 'papyrologique']",Reichsland,['interuniversitaire']


0,1,2
"['connues', 'diplomatique', 'interne']",Empire allemand,['étrangères']


0,1,2
[],Strasbourg,"['précieux', 'de\\xa01870']"
['Mêmes'],Strasbourg,['précieux']
['\\xa0bibliothèque'],Strasbourg,"['précieux', 'de\\xa01870']"
"['précieuses', 'nationale']",Strasbourg,"['remarquable', 'ostraca']"
['prussiennes'],Strasbourg,"['grecs', 'littéraires']"
['vrai'],Strasbourg,['classique']
"['académique', 'égyptiennes']",Strasbourg,"['universitaire', 'politique']"
[],Strasbourg,['naissants']
['égyptienne'],Strasbourg,"['nécessaire', 'bonne']"
['égyptologique'],Strasbourg,['probable']


### Display results side by side.

In [40]:
comparison_table = []
comparison_table.append(["place mention", "Didier", "Colin"])
for p in common_places:
    
    bd_left_context = [x[0] for x in table_ent_cont_bd if x[1]==p]
    bd_right_context = [x[2] for x in table_ent_cont_bd if x[1]==p]
    
    bc_left_context = [x[0] for x in table_ent_cont_bc if x[1]==p]
    bc_right_context = [x[0] for x in table_ent_cont_bc if x[1]==p]
    
    comparison_table.append((p, [item for sublist in bd_left_context + bd_right_context for item in sublist], 
                             [item for sublist in bc_left_context + bc_right_context for item in sublist]))
    
display(HTML(tabulate.tabulate(comparison_table, tablefmt='html')))

0,1,2
place mention,Didier,Colin
FRANCE,[],[]
Europe,"['grande', 'seuls', 'centraux']","['diverses', 'diverses']"
Empire\xa0,"['disparues', 'ancien', 'nouvel']",[]
Erasme,[],[]
bibliothèque de la Ville,[],[]
Badois,"['intérieurs', 'même', 'iconographique', 'tous']",[]
Bibliothèque impériale,"['nouveau', 'savantslieubibliothèque']",[]
Aubette,[],[]
Guillaume\xa0II,['modernes'],[]
