# Ner

#### Imports

In [1]:
from typing import Dict, List, Tuple, Optional
from collections import namedtuple
import os

In [2]:
from functional import seq, pseq
from functional.streams import Sequence
from IPython.core.display import HTML

def display_seq(sequence:Sequence,rows:int)-> None:
        sequence._repr_html_= lambda :sequence.tabulate(rows,tablefmt='html')
        display(sequence)
        sequence._repr_html_= lambda :sequence.tabulate(10,tablefmt='html')

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
import functools
def compose(*functions):
    def compose2(f, g):
        return lambda x: f(g(x))
    return functools.reduce(compose2, functions, lambda x: x)

## Spacy

In [13]:
! pip3 install spacy nltk --user

Collecting nltk
[?25l  Downloading https://files.pythonhosted.org/packages/f6/1d/d925cfb4f324ede997f6d47bea4d9babba51b49e87a767c170b77005889d/nltk-3.4.5.zip (1.5MB)
[K     |████████████████████████████████| 1.5MB 605kB/s eta 0:00:01
Installing collected packages: nltk
  Running setup.py install for nltk ... [?25ldone
[?25hSuccessfully installed nltk-3.4.5


In [98]:
! python -m spacy download en_core_web_lg --user

Collecting en_core_web_lg==2.1.0 from https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.1.0/en_core_web_lg-2.1.0.tar.gz#egg=en_core_web_lg==2.1.0
[?25l  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.1.0/en_core_web_lg-2.1.0.tar.gz (826.9MB)
[K     |████████████████████████████████| 826.9MB 2.2MB/s eta 0:00:01     |█████▎                          | 136.1MB 2.1MB/s eta 0:05:30     |██████                          | 155.7MB 2.5MB/s eta 0:04:29     |███████████████▎                | 396.1MB 1.7MB/s eta 0:04:15
[?25hInstalling collected packages: en-core-web-lg
  Running setup.py install for en-core-web-lg ... [?25ldone
[?25hSuccessfully installed en-core-web-lg-2.1.0
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_lg')


In [100]:
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_lg
nlp = en_core_web_lg.load()

In [101]:
doc = nlp('European authorities fined Google a record $5.1 billion on Wednesday '+
          'for abusing its power in the mobile phone market and ordered the company to alter its practices')

In [102]:
print([(X.text, X.label_) for X in doc.ents])

[('European', 'NORP'), ('Google', 'ORG'), ('$5.1 billion', 'MONEY'), ('Wednesday', 'DATE')]


In [104]:
def print_ners(text:str):
    d = nlp(text)
    print(text)
    print([(X.text, X.label_) for X in d.ents])
    print("")

In [107]:
names = seq([
    "Hello, I'm Alan Turing.",
    "My pesel number is 900303124",
    "I'd love to get a loan. My plan is to go on holiday to Russia.",
    "We will be fighting him in two weeks",
    " Hello. I'm John De Goes. I am 5 years old. My Address is Buckingham Street 5 Moscow.",
    "I want to take this loan for two years."
])

names.for_each(lambda text: print_ners(text))

Hello, I'm Alan Turing.
[('Alan Turing', 'PERSON')]

My pesel number is 900303124
[('900303124', 'CARDINAL')]

I was born on 1st of September 2019.
[('1st of September 2019', 'DATE')]

I'd love to get a loan. My plan is to go on holiday to Russia.
[('Russia', 'GPE')]

We will be fighting him in two weeks
[('two weeks', 'DATE')]

 Hello. I'm John De Goes. I am 5 years old. My Address is Buckingham Street 5 Moscow.
[('John De Goes', 'PERSON'), ('5 years old', 'DATE'), ('Buckingham Street 5', 'FAC'), ('Moscow', 'GPE')]

I want to take this loan for two years.
[('two years', 'DATE')]



In [109]:
birth_dates= seq([
    "I was born on 19.03.1990 in Kraków.",
    "I was born on 1st of September 2019.",
])
birth_dates.for_each(lambda text: print_ners(text))

I was born on 19.03.1990 in Kraków.
[('19.03.1990', 'CARDINAL'), ('Kraków', 'FAC')]

I was born on 1st of September 2019.
[('1st of September 2019', 'DATE')]



In [None]:
addresses = seq([
    "I live in Warsaw, street is Nałęczowska 8.",
    "Hello here, I'd like to take a loan. I'm Eve Munish, I live in Cracow on Rodakowskiego street."    ,
    "Living on Brodway Street 8 is the best thing that happened to me.",
    "My address is Baker Street 10, London."
    "My address is Baker Street 10 London."
    "My address is Warszawska 10 Moscow."
])
addresses.for_each(lambda text: print_ners(text))

In [116]:
money = seq([
    "I wat to take loan for 100$.",
    "I wat to take loan for 100 $ .",
    "I wat to take loan for 5500 euro.",
    "I wat to take loan for 1234 PLN.",
    "I wat to take loan for 1234 ZŁ.",
    "Please lend me 100 euro.",
])
money.for_each(lambda text: print_ners(text))

I wat to take loan for 100$.
[('100$.', 'CARDINAL')]

I wat to take loan for 100 $ .
[('100 $', 'MONEY')]

I wat to take loan for 5500 euro.
[('5500 euro', 'MONEY')]

I wat to take loan for 1234 PLN.
[('1234 PLN', 'MONEY')]

I wat to take loan for 1234 ZŁ.
[('1234', 'CARDINAL')]

Please lend me 100 euro.
[('100 euro', 'MONEY')]



### Regex for :
    1. Birth date
    2. PESEL
    3. Dowówd / passport

In [61]:
entity = doc.ents[0]

In [79]:
entity.label_, entity.text

('NORP', 'European')

In [81]:
type(entity.label_)

<class 'str'>

In [64]:
type(doc.ents[1])

<class 'spacy.tokens.span.Span'>

In [None]:
C