In [2]:
# Loading required libraries
!pip install -U spacy
!pip install -U langdetect

# Loading small english models from: https://spacy.io/models
# For the test question, I'm supporting 4 languages:
# 1. English
# 2. Spanish
# 3. Portuguese
# 4. Russian
!python -m spacy download en_core_web_sm
!python -m spacy download es_core_news_sm
!python -m spacy download pt_core_news_sm
!python -m spacy download ru_core_news_sm

Collecting en-core-web-sm==3.3.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.3.0/en_core_web_sm-3.3.0-py3-none-any.whl (12.8 MB)
[K     |████████████████████████████████| 12.8 MB 1.0 MB/s eta 0:00:01


[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
Collecting es-core-news-sm==3.3.0
  Downloading https://github.com/explosion/spacy-models/releases/download/es_core_news_sm-3.3.0/es_core_news_sm-3.3.0-py3-none-any.whl (12.9 MB)
[K     |████████████████████████████████| 12.9 MB 477 kB/s eta 0:00:01
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('es_core_news_sm')
Collecting pt-core-news-sm==3.3.0
  Downloading https://github.com/explosion/spacy-models/releases/download/pt_core_news_sm-3.3.0/pt_core_news_sm-3.3.0-py3-none-any.whl (13.0 MB)


[K     |████████████████████████████████| 13.0 MB 215 kB/s eta 0:00:01
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('pt_core_news_sm')
Collecting ru-core-news-sm==3.3.0
  Downloading https://github.com/explosion/spacy-models/releases/download/ru_core_news_sm-3.3.0/ru_core_news_sm-3.3.0-py3-none-any.whl (15.3 MB)
[K     |████████████████████████████████| 15.3 MB 464 kB/s eta 0:00:01


[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('ru_core_news_sm')


In [3]:
# Importing the language libraries
import spacy
from langdetect import detect

In [4]:
# Loading all 4 language models as part of a dictionary
nlp={}    
for lang in ["en", "es", "pt", "ru"]: # Fill in the languages you want, hopefully they are supported by spacy.
    if lang == "en":
        nlp[lang]=spacy.load(lang + '_core_web_sm')
    else: 
        nlp[lang]=spacy.load(lang + '_core_news_sm')


In [11]:
# Defining a function to 
# 1. detect the language from the input
# 2. Load the appropriate language model for detected language
# 3. Returning the list of entities if language is supported
# 4. If language is not supported, it raises an exception.
def entites(text, language_code):
     lang = detect(text)
     #print(f"language detected as: {lang} and provided language code is: {language_code}")
     try:
         nlp2 =nlp[language_code]
     except KeyError:
         return Exception(language_code + " model is not loaded")
     return [{"text": str(x), "type": x.label_, "start_pos": x.start_char, "end_pos": x.end_char} for x in nlp2(str(text)).ents]

In [12]:
# Testing with the english language sentence.
text='Tony Stark owns the company StarkEnterprises . Emily Clark works at Microsoft and lives in Manchester. She loves to read the Bible and learn French'


In [13]:
ents = entites(text, "en")
print(ents)

# Output is a list of entities
# each entity is a python dictionary with four fields:
# 1. text: the indetified named entity
# 2. type: type of the entity
# 3. start_pos: starting position of entity in the text
# 4. end_pos: ending position of entity in the text


[{'text': 'Tony Stark', 'type': 'PERSON', 'start_pos': 0, 'end_pos': 10}, {'text': 'StarkEnterprises', 'type': 'ORG', 'start_pos': 28, 'end_pos': 44}, {'text': 'Emily Clark', 'type': 'PERSON', 'start_pos': 47, 'end_pos': 58}, {'text': 'Microsoft', 'type': 'ORG', 'start_pos': 68, 'end_pos': 77}, {'text': 'Manchester', 'type': 'LOC', 'start_pos': 91, 'end_pos': 101}, {'text': 'Bible', 'type': 'WORK_OF_ART', 'start_pos': 125, 'end_pos': 130}, {'text': 'French', 'type': 'NORP', 'start_pos': 141, 'end_pos': 147}]


In [17]:
sample_text = input("Enter the text here on which you want to run the NER system:")


Enter the text here on which you want to run the NER system:Tony Stark owns the company StarkEnterprises


In [18]:
sample_lang = input("Enter the language code here:")


Enter the language code here:en


In [20]:
ents = entites(sample_text, sample_lang)
print(ents)

[{'text': 'Tony Stark', 'type': 'PERSON', 'start_pos': 0, 'end_pos': 10}, {'text': 'StarkEnterprises', 'type': 'ORG', 'start_pos': 28, 'end_pos': 44}]
