##**Part 1 :: Creating training labels from XML file to train custom Spacy Model**



In [None]:
!mkdir xml_files #Create a directory for interacting with operating system

In [None]:
#Load the .zip into your directory and rename he path in the cell below

In [None]:
!unzip Protokoll-Zionistenkongress-Basel_1897-0200.zip -d xml_files/ #unziü the zip file which contains all .xml files with the annotations of the Persons and Locations
                                

In [None]:
#Import these packages and dependencies - have also a look at "requirements.txt"

In [None]:
import os
import xml.etree.ElementTree as ET

In [None]:
xml_files = os.listdir("xml_files")

In [None]:
xml_files = sorted(xml_files)

In [None]:
len(xml_files)

202

**Below cell is the logic for the parsing XML file and extracting label and converting it to spacy format.**

In [None]:
final_all_ents_tuple = []
all_sentences_present = []

#looping over all the files
for j in range(len(xml_files)):

  print("processing file=================================== ", xml_files[j])
  
  mytree = ET.parse('xml_files/'+xml_files[j])
  myroot = mytree.getroot()

  for x in myroot[1][1]:
    if x.tag.endswith('TextLine'):
      if "person" in x.attrib['custom'] or "place" in x.attrib['custom']:
 
        ents = x.attrib['custom'].split(" ")[2:]
        print(ents)
        sentence = x[-1][0].text
        all_sentences_present.append(sentence)
 
        all_ents = []
 
        for i in range(0, len(ents)):
          if ents[i] in ['person', 'place']:
            if ents[i] == 'person':
               ent = 'PERSON'
            else:
              ent = 'LOC'
 
            a = int(ents[i+1].split(":")[1][:-1])

            ## following if-else condition is written as there are some labels which has 'continued:true' means there are more word belong to current word
            if ents[i+2].endswith("}"):
              b = int(ents[i+2].split(":")[1][:-2])
            else:
              try:
                i += 4
                b1 = int(a[i+1].split(":")[1][:-1])
                if ents[i+2].endswith("}"):
                  b2 = int(ents[i+2].split(":")[1][:-2])
                else:
                  b2 = int(ents[i+2].split(":")[1][:-1])
                b = b1 + b2
              except:
                i -= 4
                b = int(ents[i+2].split(":")[1][:-1])
 
            ent_tuple = (a, a+b, ent) #single tuple as per the format defined by spacy
            all_ents.append(ent_tuple)

        # following loop is written because in there are some samples which has overlapping range, this loop handles those overlapping words as they are already covered.
        all_ents_copy = all_ents.copy()
        for k in range(len(all_ents)-1):
          if all_ents[k][0] <= all_ents[k+1][0] <= all_ents[k][1] or all_ents[k][0] <= all_ents[k+1][1] <= all_ents[k][1]:
            try:
              del all_ents_copy[k+1]
            except:
              del all_ents_copy[k]

        final_tuple = (sentence, {'entities' : all_ents_copy})
        print(final_tuple)
        final_all_ents_tuple.append(final_tuple)  #this variable holds all the tuples from all the files
        print("=="*50)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
['person', '{offset:0;', 'length:6;}', 'person', '{offset:8;', 'length:12;}', 'person', '{offset:22;', 'length:18;}', 'person', '{offset:42;', 'length:11;}', 'person', '{offset:55;', 'length:8;}']
('Philos, Ibn Gabirols, Jehuda ben Halewys, Ben Maimons, Spinozas,', {'entities': [(0, 6, 'PERSON'), (8, 20, 'PERSON'), (22, 40, 'PERSON'), (42, 53, 'PERSON'), (55, 63, 'PERSON')]})
['person', '{offset:0;', 'length:6;}']
('Heines kennt diese Geldprotzen nicht, die alles geringschätzen, was', {'entities': [(0, 6, 'PERSON')]})
['person', '{offset:39;', 'length:14;}']
('Präsident: Das Wort hat Herr Architekt Oscar Marmorek:', {'entities': [(39, 53, 'PERSON')]})
['person', '{offset:0;', 'length:14;}']
('Oscar Marmorek: Geliebte Brüder! Ein Wundertag ist der heutigé,', {'entities': [(0, 14, 'PERSON')]})
['person', '{offset:47;', 'length:16;}']
('werden noch andere Referate hören, als das des Herrn Dr. Nordau, die', {'entities': [(47,

In [None]:
# storing all the labels in txt file 
with open("outfile.txt", "w") as outfile:
    outfile.write("\n".join(str(item) for item in final_all_ents_tuple))

##**Part 2 :: Custom training with Spacy**

In [None]:
!pip show spacy

Name: spacy
Version: 2.2.4
Summary: Industrial-strength Natural Language Processing (NLP) in Python
Home-page: https://spacy.io
Author: Explosion
Author-email: contact@explosion.ai
License: MIT
Location: /usr/local/lib/python3.7/dist-packages
Requires: catalogue, plac, tqdm, thinc, setuptools, requests, murmurhash, blis, wasabi, srsly, numpy, cymem, preshed
Required-by: fastai, en-core-web-sm


In [None]:
from __future__ import unicode_literals, print_function
import plac
import random
from pathlib import Path
import spacy
from tqdm import tqdm

In [None]:
!mkdir de_spacy_custom

In [None]:
model = None
output_dir=Path("/content/de_spacy_custom") #output folder in which trained model will be stored
n_iter=100 #number of training epochs

In [None]:
if model is not None:
    nlp = spacy.load(model)  
    print("Loaded model '%s'" % model)
else:
    nlp = spacy.blank('de')  
    print("Created blank 'de' model")

#set up the pipeline

if 'ner' not in nlp.pipe_names:
    ner = nlp.create_pipe('ner')
    nlp.add_pipe(ner, last=True)
else:
    ner = nlp.get_pipe('ner')

Created blank 'de' model


**Below cell is for spacy training code**

**It took more than 2 hours on google colab to complete training for the 100 epochs.**

In [None]:
for _, annotations in final_all_ents_tuple:
    for ent in annotations.get('entities'):
        ner.add_label(ent[2])

other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes):  # only train NER
    optimizer = nlp.begin_training()
    for itn in range(n_iter):
        random.shuffle(final_all_ents_tuple)
        losses = {}
        for text, annotations in tqdm(final_all_ents_tuple):
            nlp.update(
                [text],  
                [annotations],  
                drop=0.5,  
                sgd=optimizer,
                losses=losses)
        print(losses)

100%|██████████| 1702/1702 [00:50<00:00, 33.47it/s]


{'ner': 3468.15108129049}


100%|██████████| 1702/1702 [01:01<00:00, 27.49it/s]


{'ner': 2360.619252815709}


100%|██████████| 1702/1702 [01:07<00:00, 25.18it/s]


{'ner': 1947.1395176959447}


100%|██████████| 1702/1702 [01:07<00:00, 25.18it/s]


{'ner': 1717.754913793541}


100%|██████████| 1702/1702 [01:07<00:00, 25.17it/s]


{'ner': 1471.8697436801224}


100%|██████████| 1702/1702 [01:06<00:00, 25.54it/s]


{'ner': 1384.0757926612148}


100%|██████████| 1702/1702 [01:06<00:00, 25.78it/s]


{'ner': 1226.588851659266}


100%|██████████| 1702/1702 [01:06<00:00, 25.48it/s]


{'ner': 1119.9870202993018}


100%|██████████| 1702/1702 [01:07<00:00, 25.38it/s]


{'ner': 991.1724086730294}


100%|██████████| 1702/1702 [01:07<00:00, 25.22it/s]


{'ner': 1024.388499801159}


100%|██████████| 1702/1702 [01:06<00:00, 25.49it/s]


{'ner': 931.4536323289365}


100%|██████████| 1702/1702 [01:06<00:00, 25.46it/s]


{'ner': 912.1971778538739}


100%|██████████| 1702/1702 [01:06<00:00, 25.44it/s]


{'ner': 897.1510484181001}


100%|██████████| 1702/1702 [01:07<00:00, 25.07it/s]


{'ner': 851.7189457293396}


100%|██████████| 1702/1702 [01:07<00:00, 25.15it/s]


{'ner': 920.6304967238862}


100%|██████████| 1702/1702 [01:07<00:00, 25.14it/s]


{'ner': 759.2679428220757}


100%|██████████| 1702/1702 [01:07<00:00, 25.34it/s]


{'ner': 797.151085651704}


100%|██████████| 1702/1702 [01:07<00:00, 25.14it/s]


{'ner': 746.3818883417522}


100%|██████████| 1702/1702 [01:07<00:00, 25.37it/s]


{'ner': 776.0229107285609}


100%|██████████| 1702/1702 [01:06<00:00, 25.54it/s]


{'ner': 737.0022879721987}


100%|██████████| 1702/1702 [01:07<00:00, 25.18it/s]


{'ner': 684.7199429320273}


100%|██████████| 1702/1702 [01:07<00:00, 25.39it/s]


{'ner': 705.2386986229035}


100%|██████████| 1702/1702 [01:07<00:00, 25.36it/s]


{'ner': 644.0458322848128}


100%|██████████| 1702/1702 [01:07<00:00, 25.12it/s]


{'ner': 547.1092324327323}


100%|██████████| 1702/1702 [01:07<00:00, 25.08it/s]


{'ner': 597.355322959654}


100%|██████████| 1702/1702 [01:07<00:00, 25.07it/s]


{'ner': 716.4852499241568}


100%|██████████| 1702/1702 [01:09<00:00, 24.44it/s]


{'ner': 609.0132918573756}


100%|██████████| 1702/1702 [01:13<00:00, 23.22it/s]


{'ner': 578.2176435944083}


100%|██████████| 1702/1702 [01:17<00:00, 21.99it/s]


{'ner': 538.1235695028345}


100%|██████████| 1702/1702 [01:23<00:00, 20.29it/s]


{'ner': 632.8907937205723}


100%|██████████| 1702/1702 [01:23<00:00, 20.33it/s]


{'ner': 530.9479757322625}


100%|██████████| 1702/1702 [01:24<00:00, 20.13it/s]


{'ner': 618.2326081020908}


100%|██████████| 1702/1702 [01:23<00:00, 20.31it/s]


{'ner': 565.0738980894076}


100%|██████████| 1702/1702 [01:24<00:00, 20.13it/s]


{'ner': 524.627843880726}


100%|██████████| 1702/1702 [01:23<00:00, 20.34it/s]


{'ner': 473.8711305394477}


100%|██████████| 1702/1702 [01:23<00:00, 20.48it/s]


{'ner': 447.2505946501074}


100%|██████████| 1702/1702 [01:23<00:00, 20.35it/s]


{'ner': 499.07856695180953}


100%|██████████| 1702/1702 [01:23<00:00, 20.38it/s]


{'ner': 487.6598993627997}


100%|██████████| 1702/1702 [01:24<00:00, 20.16it/s]


{'ner': 481.3835944919976}


100%|██████████| 1702/1702 [01:23<00:00, 20.31it/s]


{'ner': 424.5856088870395}


100%|██████████| 1702/1702 [01:23<00:00, 20.44it/s]


{'ner': 437.94038499606455}


100%|██████████| 1702/1702 [01:23<00:00, 20.32it/s]


{'ner': 453.7315329342577}


100%|██████████| 1702/1702 [01:23<00:00, 20.32it/s]


{'ner': 478.14385248939874}


100%|██████████| 1702/1702 [01:23<00:00, 20.28it/s]


{'ner': 462.37957614752236}


100%|██████████| 1702/1702 [01:23<00:00, 20.42it/s]


{'ner': 492.00321732602663}


100%|██████████| 1702/1702 [01:24<00:00, 20.25it/s]


{'ner': 431.7331743652162}


100%|██████████| 1702/1702 [01:23<00:00, 20.34it/s]


{'ner': 475.5485141231905}


100%|██████████| 1702/1702 [01:24<00:00, 20.14it/s]


{'ner': 403.93615740366954}


100%|██████████| 1702/1702 [01:24<00:00, 20.24it/s]


{'ner': 395.047861309561}


100%|██████████| 1702/1702 [01:23<00:00, 20.26it/s]


{'ner': 351.8842592928839}


100%|██████████| 1702/1702 [01:24<00:00, 20.25it/s]


{'ner': 335.11962110408024}


100%|██████████| 1702/1702 [01:24<00:00, 20.22it/s]


{'ner': 369.52000124927736}


100%|██████████| 1702/1702 [01:24<00:00, 20.14it/s]


{'ner': 418.1503496214902}


100%|██████████| 1702/1702 [01:24<00:00, 20.18it/s]


{'ner': 380.6135555124138}


100%|██████████| 1702/1702 [01:24<00:00, 20.24it/s]


{'ner': 418.14243129032883}


100%|██████████| 1702/1702 [01:24<00:00, 20.16it/s]


{'ner': 437.9045959088474}


100%|██████████| 1702/1702 [01:24<00:00, 20.12it/s]


{'ner': 397.265760751853}


100%|██████████| 1702/1702 [01:24<00:00, 20.14it/s]


{'ner': 485.171899209421}


100%|██████████| 1702/1702 [01:25<00:00, 20.02it/s]


{'ner': 366.2781698601742}


100%|██████████| 1702/1702 [01:24<00:00, 20.16it/s]


{'ner': 405.80628432194476}


100%|██████████| 1702/1702 [01:24<00:00, 20.13it/s]


{'ner': 410.19144849182544}


100%|██████████| 1702/1702 [01:24<00:00, 20.16it/s]


{'ner': 362.2019985936909}


100%|██████████| 1702/1702 [01:24<00:00, 20.23it/s]


{'ner': 340.3654554465334}


100%|██████████| 1702/1702 [01:24<00:00, 20.24it/s]


{'ner': 334.2324176336094}


100%|██████████| 1702/1702 [01:24<00:00, 20.26it/s]


{'ner': 399.1312524436455}


100%|██████████| 1702/1702 [01:24<00:00, 20.21it/s]


{'ner': 350.4503894766992}


100%|██████████| 1702/1702 [01:22<00:00, 20.53it/s]


{'ner': 359.21478080965255}


100%|██████████| 1702/1702 [01:22<00:00, 20.53it/s]


{'ner': 369.47792937138126}


100%|██████████| 1702/1702 [01:22<00:00, 20.53it/s]


{'ner': 340.0310491814081}


100%|██████████| 1702/1702 [01:23<00:00, 20.38it/s]


{'ner': 350.1190883619593}


100%|██████████| 1702/1702 [01:24<00:00, 20.24it/s]


{'ner': 360.3201410024016}


100%|██████████| 1702/1702 [01:22<00:00, 20.62it/s]


{'ner': 400.1845326939402}


100%|██████████| 1702/1702 [01:23<00:00, 20.41it/s]


{'ner': 340.9035726159918}


100%|██████████| 1702/1702 [01:24<00:00, 20.19it/s]


{'ner': 341.2930963920799}


100%|██████████| 1702/1702 [01:24<00:00, 20.11it/s]


{'ner': 376.9326543856417}


100%|██████████| 1702/1702 [01:23<00:00, 20.29it/s]


{'ner': 363.94732764564867}


100%|██████████| 1702/1702 [01:24<00:00, 20.12it/s]


{'ner': 335.8740189801051}


100%|██████████| 1702/1702 [01:25<00:00, 20.02it/s]


{'ner': 314.43082027123825}


100%|██████████| 1702/1702 [01:25<00:00, 19.81it/s]


{'ner': 368.7731339145922}


100%|██████████| 1702/1702 [01:26<00:00, 19.71it/s]


{'ner': 338.62769714789414}


100%|██████████| 1702/1702 [01:24<00:00, 20.11it/s]


{'ner': 313.5989588274188}


100%|██████████| 1702/1702 [01:25<00:00, 19.98it/s]


{'ner': 344.9352509328601}


100%|██████████| 1702/1702 [01:24<00:00, 20.24it/s]


{'ner': 324.33745068965254}


100%|██████████| 1702/1702 [01:25<00:00, 19.97it/s]


{'ner': 344.0685668492738}


100%|██████████| 1702/1702 [01:23<00:00, 20.28it/s]


{'ner': 326.6935771427598}


100%|██████████| 1702/1702 [01:24<00:00, 20.16it/s]


{'ner': 355.6414888000163}


100%|██████████| 1702/1702 [01:24<00:00, 20.12it/s]


{'ner': 416.95576866888825}


100%|██████████| 1702/1702 [01:24<00:00, 20.21it/s]


{'ner': 340.77157709065773}


100%|██████████| 1702/1702 [01:24<00:00, 20.13it/s]


{'ner': 347.14524792841377}


100%|██████████| 1702/1702 [01:24<00:00, 20.03it/s]


{'ner': 337.35735132708174}


100%|██████████| 1702/1702 [01:25<00:00, 19.99it/s]


{'ner': 315.1031168289445}


100%|██████████| 1702/1702 [01:24<00:00, 20.13it/s]


{'ner': 342.30502633556165}


100%|██████████| 1702/1702 [01:24<00:00, 20.12it/s]


{'ner': 343.5321754172211}


100%|██████████| 1702/1702 [01:25<00:00, 19.90it/s]


{'ner': 333.51127601882996}


100%|██████████| 1702/1702 [01:25<00:00, 20.00it/s]


{'ner': 295.5495145064173}


100%|██████████| 1702/1702 [01:24<00:00, 20.10it/s]


{'ner': 314.7067001100969}


100%|██████████| 1702/1702 [01:25<00:00, 19.84it/s]


{'ner': 335.8166325418103}


100%|██████████| 1702/1702 [01:25<00:00, 19.82it/s]


{'ner': 360.7215248082787}


100%|██████████| 1702/1702 [01:24<00:00, 20.14it/s]


{'ner': 317.2809933649003}


100%|██████████| 1702/1702 [01:25<00:00, 20.02it/s]

{'ner': 330.6965194955011}





In [None]:
#saving trained model in directory
if output_dir is not None:
    output_dir = Path(output_dir)
    if not output_dir.exists():
        output_dir.mkdir()
    nlp.to_disk(output_dir)
    print("Saved model to", output_dir)

Saved model to /content/de_spacy_custom


In [None]:
#sample inference using trained model
for text, _ in final_all_ents_tuple[:5]:
    doc = nlp(text)
    print('Entities', [(ent.text, ent.label_) for ent in doc.ents])

Entities [('London', 'LOC')]
Entities [('Waissmann', 'PERSON'), ('Moritz Guttmann', 'PERSON'), ('Josef Herzler, Elias Schwarz', 'PERSON')]
Entities [('Lamprecht', 'PERSON')]
Entities [('Türkei', 'LOC')]
Entities [('Daniel Spielmann', 'PERSON'), ('Julius Rischou', 'PERSON'), ('Alois Eisenstein', 'PERSON'), ('Max Fuchs', 'PERSON')]


##**Part 3 :: Inference of Custom trained model on test data - change this txt file and/or path to set the test dataset**

In [None]:
import pandas as pd
test_df = pd.read_csv('03_Protokoll-Zionistenkongress-Basel_1899.txt', delimiter = "\n", header=None, names=["text"])

In [None]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14740 entries, 0 to 14739
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    14740 non-null  object
dtypes: object(1)
memory usage: 115.3+ KB


In [None]:
test_df.head(30)

In [None]:
all_persons = []
all_locations = []

for jj in range(len(test_df)):
  doc = nlp(test_df['text'][jj])
  persons = [ent.text for ent in doc.ents if ent.label_ == 'PERSON']
  locations = [ent.text for ent in doc.ents if ent.label_ == 'LOC']
  all_persons.append(persons)
  all_locations.append(locations)

In [None]:
test_df['Custom-trained_Spacy_Person'] = pd.Series(all_persons)
test_df['Custom-trained_Spacy_Location'] = pd.Series(all_locations)

In [None]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14740 entries, 0 to 14739
Data columns (total 3 columns):
 #   Column                         Non-Null Count  Dtype 
---  ------                         --------------  ----- 
 0   text                           14740 non-null  object
 1   Custom-trained_Spacy_Person    14740 non-null  object
 2   Custom-trained_Spacy_Location  14740 non-null  object
dtypes: object(3)
memory usage: 345.6+ KB


In [None]:
test_df.head(30)

In [None]:
test_df.tail(30)

In [None]:
##**These are the results of the own trained model - saves as "Custom_NER_inference_results.csv" - change directory and/or name if needed**

In [None]:
test_df.to_csv('Custom_NER_inference_results.csv',index=False) #saving inference results of custom trained model


##**Part 4 :: Using pre-trained German spacy Large model to detect entity**

In [None]:
!pip install -U spacy

Collecting spacy
  Downloading spacy-3.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.2 MB)
[K     |████████████████████████████████| 6.2 MB 23.1 MB/s 
Collecting typer<0.5.0,>=0.3.0
  Downloading typer-0.4.1-py3-none-any.whl (27 kB)
Collecting typing-extensions<4.0.0.0,>=3.7.4
  Downloading typing_extensions-3.10.0.2-py3-none-any.whl (26 kB)
Collecting thinc<8.1.0,>=8.0.14
  Downloading thinc-8.0.15-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (653 kB)
[K     |████████████████████████████████| 653 kB 50.4 MB/s 
Collecting langcodes<4.0.0,>=3.2.0
  Downloading langcodes-3.3.0-py3-none-any.whl (181 kB)
[K     |████████████████████████████████| 181 kB 42.4 MB/s 
[?25hCollecting spacy-loggers<2.0.0,>=1.0.0
  Downloading spacy_loggers-1.0.2-py3-none-any.whl (7.2 kB)
Collecting pathy>=0.3.5
  Downloading pathy-0.6.1-py3-none-any.whl (42 kB)
[K     |████████████████████████████████| 42 kB 1.5 MB/s 
Collecting spacy-legacy<3.1.0,>=3.0.9
  Downloading spacy_

In [None]:
!python -m spacy download de_core_news_lg

Collecting de-core-news-lg==3.3.0
  Downloading https://github.com/explosion/spacy-models/releases/download/de_core_news_lg-3.3.0/de_core_news_lg-3.3.0-py3-none-any.whl (567.8 MB)
[K     |████████████████████████████████| 567.8 MB 25 kB/s 
Installing collected packages: de-core-news-lg
Successfully installed de-core-news-lg-3.3.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('de_core_news_lg')


In [None]:
import spacy

In [None]:
import pandas as pd

In [None]:
nlp = spacy.load("de_core_news_lg")

In [None]:
df = pd.read_csv("Custom_NER_inference_results.csv")

In [None]:
all_persons = []
all_locations = []

for jj in range(len(df)):
  doc = nlp(df['text'][jj])
  persons = [ent.text for ent in doc.ents if ent.label_ == 'PER']
  locations = [ent.text for ent in doc.ents if ent.label_ == 'LOC']
  all_persons.append(persons)
  all_locations.append(locations)

In [None]:
df['Pre-trained_Spacy_Person'] = pd.Series(all_persons)
df['Pre-trained_Spacy_Location'] = pd.Series(all_locations)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14740 entries, 0 to 14739
Data columns (total 5 columns):
 #   Column                         Non-Null Count  Dtype 
---  ------                         --------------  ----- 
 0   text                           14740 non-null  object
 1   Custom-trained_Spacy_Person    14740 non-null  object
 2   Custom-trained_Spacy_Location  14740 non-null  object
 3   Pre-trained_Spacy_Person       14740 non-null  object
 4   Pre-trained_Spacy_Location     14740 non-null  object
dtypes: object(5)
memory usage: 575.9+ KB


In [None]:
df.tail(50)

Unnamed: 0,text,Custom-trained_Spacy_Person,Custom-trained_Spacy_Location,Pre-trained_Spacy_Person,Pre-trained_Spacy_Location
14690,asui. Verein „Jerusalem.,[],['Jerusalem'],[],[]
14691,Russland.,[],['Russland'],[],[]
14692,Aus 174 Orten liefen 256 Zuschriften und Teleg...,[],['Telegramme'],[],[]
14693,personen und 213 Gruppen und Corporationen ein.,[],['Gruppen'],[],[]
14694,Schweiz.,[],['Schweiz'],[],[]
14695,Bern. Zionistische Studierende.,[],['Bern'],[],[]
14696,Zionistische Ortsgruppe.,[],[],[],[]
14697,Genf. Bernard Mochenson.,['Genf. Bernard Mochenson'],[],[Bernard Mochenson.],[Genf]
14698,Zionistische Ortsgruppe.,[],[],[],[]
14699,Locle. Dr. Beredka.,['Dr. Beredka'],['Locle'],[Locle.],[]


In [None]:
df.to_csv("Custom_NER_All_Inference_results.csv", index=False) #saving final results which has results of custom model and pre-trained spacy large model.