# Named Entity Recognition

## Libraries 

In [158]:
from collections import Counter
import en_core_web_lg as english_large
import en_core_web_md as english_medium
import en_core_web_sm as english_small
from spacy import displacy
import warnings
warnings.filterwarnings("ignore")


### Example texts

In [159]:
TEXTS = [
    "New Zealand's South Island brims with majestic landscapes at every turn, from dramatic mountains to fjords to glaciers. Here, you can explore Fiordland National Park, a UNESCO World Heritage Area, or gaze at the starry skies at Mount John Observatory. You can also indulge your inner daredevil in Queenstown, explore two of the most accessible glaciers in the world on the country's west coast or sample delicious food and wine in the Marlborough region.",
    "Every day, the magnetic City of Light draws visitors from around the globe who come to see iconic attractions like the Eiffel Tower, the Louvre and the Arc de Triomphe. But what travelers really fall in love with are the city's quaint cafes, vibrant markets, trendy shopping districts and unmistakable je ne sais quoi charm. ",
    "London is a world unto itself. The eclectic neighborhoods, which house a blend of historical landmarks and modern-day attractions, can keep you occupied for days. If it's your first time in London, save time for a visit to the Tower of London, Tate Modern, Buckingham Palace or the British Museum before sitting down to a classic afternoon tea. The best time to travel to London is during the warmer months, but be warned that this is also the busiest and most expensive time of year.",
    "What this small island may lack in size it makes up for in sheer tropical beauty. Here, you'll find picturesque beaches, lush jungles and luxurious resorts. The island's dormant volcano, Mount Otemanu, makes for a great photo-op or challenging hike, and the friendly Bora Bora locals can help you catch a glimpse of the island's best sights during an off-road excursion. To relax, head to Matira Beach for crystal-clear water and soft sand. Although a trip to Bora Bora is very expensive, most travelers say it's worth every penny.",
    "Snow-capped peaks and azure lakes are just two reasons why Glacier National Park is one of the most-visited parks in the United States. There are more than 700 miles of hiking trails in this Montana park, plus 13 designated areas for camping. In winter, travelers can enjoy snowshoeing and skiing, while  summer is when vacationers can go swimming and whitewater rafting, among other popular activities. For those who'd rather admire their surroundings from their car, a scenic drive along Going-to-the-Sun Road can't be missed."
]

### Getting Background Named Entities from next

In [160]:
def get_ner(nlp, text):
    doc = nlp(text)
    filtred_doc = filter(is_background_label, doc.ents)
    print(" Background entities:\n", [(X.text, X.label_) for X in filtred_doc])
    print(" Entities count:\n", Counter([X.label_ for X in doc.ents]))
    print("\n")

### Background labels description

In [161]:
background_labels = ['GPE', 'LOC', 'FAC', 'EVENT', 'DATE']

def is_background_label(X):
    return 1 if background_labels.count(X.label_) > 0 else 0

## Working with models

### Differences in parsing of different models

In [162]:
nlps = [english_small.load(), english_medium.load(), english_large.load()]

for nlp in nlps:
    print('Parsing text by nlp: ', nlp._meta["name"])
    for text in TEXTS:
        get_ner(nlp, text)
    print('=======================') 

Parsing text by nlp:  core_web_sm
 Background entities:
 [("New Zealand's", 'GPE'), ('South Island', 'LOC'), ('Mount John Observatory', 'FAC'), ('Queenstown', 'GPE'), ('west coast', 'LOC'), ('Marlborough', 'GPE')]
 Entities count:
 Counter({'GPE': 3, 'LOC': 2, 'ORG': 2, 'FAC': 1, 'CARDINAL': 1})


 Background entities:
 [('Every day', 'DATE'), ('the Eiffel Tower', 'FAC'), ('the Arc de Triomphe', 'FAC'), ('sais', 'DATE')]
 Entities count:
 Counter({'DATE': 2, 'FAC': 2})


 Background entities:
 [('London', 'GPE'), ('modern-day', 'DATE'), ('days', 'DATE'), ('London', 'GPE'), ('the Tower of London', 'FAC'), ('Buckingham Palace', 'FAC'), ('the British Museum', 'FAC'), ('London', 'GPE'), ('the warmer months', 'DATE')]
 Entities count:
 Counter({'GPE': 3, 'DATE': 3, 'FAC': 3, 'ORDINAL': 1, 'PERSON': 1, 'TIME': 1})


 Background entities:
 [('Matira Beach', 'GPE')]
 Entities count:
 Counter({'PERSON': 4, 'GPE': 1})


 Background entities:
 [('Glacier National Park', 'FAC'), ('the United State

### Comparing named and unnamed entities
    

In [163]:
named_sentences = [
    "Go on vacation with your family on the Black Sea.",
    "The Swiss Alps are one of the most popular resorts in the world.",
    "How beautiful is the sunset on Miami Beach."
]

unnamed_sentences = [
    "Go on vacation with your family to the sea.",
    "The mountains in Switzerland are one of the most popular resorts in the world.",
    "How beautiful is the sunset on the beach."
]

sentences = list(zip(named_sentences, unnamed_sentences))

In [164]:
def compare_entities(nlp, text_1, text_2):
    doc_1 = nlp(text_1)
    doc_2 = nlp(text_2)
    filtred_doc_1 = filter(is_background_label, doc_1.ents)
    filtred_doc_2 = filter(is_background_label, doc_2.ents)
    print("Background entities for first text:\n", [(X.text, X.label_) for X in filtred_doc_1])
    print()
    print("Background entities for second text:\n", [(X.text, X.label_) for X in filtred_doc_2])
    
def show_entities(nlp, text):
    doc = nlp(text)
    print("Showing named entities for text:\n ", text)
    displacy.render(doc, jupyter=True, style='ent')


In [165]:
nlp = nlps[0]

for sentence in sentences:
    compare_entities(nlp, sentence[0], sentence[1])
    print('=======================')

Background entities for first text:
 [('the Black Sea', 'LOC')]

Background entities for second text:
 []
Background entities for first text:
 []

Background entities for second text:
 [('Switzerland', 'GPE')]
Background entities for first text:
 [('Hogwarts Beach', 'GPE')]

Background entities for second text:
 []


In [166]:
sentence = sentences[0][0]
show_entities(nlp, sentence)

Showing named entities for text:
  Go on vacation with your family on the Black Sea.


In [167]:
sentence = sentences[0][1]
show_entities(nlp, sentence)

Showing named entities for text:
  Go on vacation with your family to the sea.


In [168]:
sentence = sentences[1][0]
show_entities(nlp, sentence)

Showing named entities for text:
  The Swiss Alps are one of the most popular resorts in the world.


In [169]:
sentence = sentences[1][1]
show_entities(nlp, sentence)

Showing named entities for text:
  The mountains in Switzerland are one of the most popular resorts in the world.


In [170]:
sentence = sentences[2][0]
show_entities(nlp, sentence)

Showing named entities for text:
  How beautiful is the sunset on Hogwarts Beach.


In [171]:
sentence = sentences[2][1]
show_entities(nlp, sentence)

Showing named entities for text:
  How beautiful is the sunset on the beach.
