In [1]:
# Load Packages
from __future__ import unicode_literals, print_function

import plac #  wrapper over argparse
import random
from pathlib import Path
import spacy
from tqdm import tqdm, tqdm_notebook # loading bar 
import pandas as pd
import re
from pprint import pprint
from nltk.tokenize import sent_tokenize
import numpy as np

from spacy.util import minibatch, compounding

# Abrindo e gerando Datasets

### Entidade logradouro

In [2]:
# Obtendo dados crus de endereço

dataset = pd.read_csv("201906AGENCIAS.CSV")
dset = dataset.iloc[:-2,4:10].values  # tipo nparray object
pd_dset = pd.DataFrame(dset) # caso queira visualizar

pd_dset['Join'] = pd_dset[pd_dset.columns[0:]].apply(
    lambda x: ';'.join(x.dropna().astype(str)),
    axis=1
)
pd_dset.head(5)

Unnamed: 0,0,1,2,3,4,5,Join
0,"R.GUILHERME MOREIRA,315","SUBLOJA,LOJA E 2.ANDAR ...",CENTRO,69005-300,MANAUS ...,AM,"R.GUILHERME MOREIRA,315 ;SUBLO..."
1,"AV.PRES.VARGAS,248",1.E 2.ANDARES ...,CAMPINA,66010-900,BELEM ...,PA,"AV.PRES.VARGAS,248 ;1.E 2..."
2,"R.QUINZE DE NOVEMBRO,195",...,CENTRO,11010-908,SANTOS ...,SP,"R.QUINZE DE NOVEMBRO,195 ; ..."
3,"PCA.DAS QUATRO JORNADAS,11",MEZANINO ...,CENTRO,28010-000,CAMPOS DOS GOYTACAZES ...,RJ,"PCA.DAS QUATRO JORNADAS,11 ;MEZAN..."
4,"SEXTA AVENIDA,600",SECRETARIA DA EDUCACAO-TERREO ...,CAB,41745-002,SALVADOR ...,BA,"SEXTA AVENIDA,600 ;SECRE..."


In [3]:
# Tratando endereço completo do DataFrame
dset = np.array(pd_dset)
end_lista = []

for i in range(len(dset)):
    str_raw = dset[i][6]
    str_tratada = re.sub(r'[ ]{2,}', "",str_raw) # Tirando espaços excedentes no final do endereço
    str_tratada = re.sub(r'[;]{1,}', "; ", str_tratada) # Para complementos vazios, para não ter 2 ";"
    str_tratada = str_tratada.lower()
    
    end_lista.append(str_tratada)

end_lista[:3]

['r.guilherme moreira,315; subloja,loja e 2.andar; centro; 69005-300; manaus; am',
 'av.pres.vargas,248; 1.e 2.andares; campina; 66010-900; belem; pa',
 'r.quinze de novembro,195; centro; 11010-908; santos; sp']

In [4]:
# Lógica para extrair posição do logradouro:
# Dividir a String inteira por ";", pegar o len do primeiro split
# len de LOGRA é de 0 até len do primeiro split

print(end_lista[0])
split = end_lista[0].split(";")
print(split)
print(split[0])
print(len(split[0]))

r.guilherme moreira,315; subloja,loja e 2.andar; centro; 69005-300; manaus; am
['r.guilherme moreira,315', ' subloja,loja e 2.andar', ' centro', ' 69005-300', ' manaus', ' am']
r.guilherme moreira,315
23


In [5]:
# Extraindo iob com entidade apenas de logradouro

iob = []

for i in range(len(end_lista)):
    split = end_lista[i].split(";")
    len_logra = len(split[0])
    iob_dict = {"entities": [(0, len_logra-1,'LOGRA')]}
    end_lista[i] = end_lista[i].replace(";", " ")
    end_lista[i] = end_lista[i].replace(",", " ")
    end_lista[i] = re.sub(r'[ ]{2,}', " ", end_lista[i])

    
    tupla = (end_lista[i], iob_dict)
    iob.append(tupla)

FULL_DATA = iob

In [6]:
print(FULL_DATA[0])
print(FULL_DATA[1])
print(FULL_DATA[2])
print(FULL_DATA[5:15])

('r.guilherme moreira 315 subloja loja e 2.andar centro 69005-300 manaus am', {'entities': [(0, 22, 'LOGRA')]})
('av.pres.vargas 248 1.e 2.andares campina 66010-900 belem pa', {'entities': [(0, 17, 'LOGRA')]})
('r.quinze de novembro 195 centro 11010-908 santos sp', {'entities': [(0, 23, 'LOGRA')]})
[('av.rio branco 240 1.andar recife antigo 50030-310 recife pe', {'entities': [(0, 16, 'LOGRA')]}), ('av.santos dumont 2828 5.andar aldeota 60150-162 fortaleza ce', {'entities': [(0, 20, 'LOGRA')]}), ('pca.tiradentes 410 1.andar centro 80020-100 curitiba pr', {'entities': [(0, 17, 'LOGRA')]}), ('r.uruguai 185 5.andar centro 90010-901 porto alegre rs', {'entities': [(0, 12, 'LOGRA')]}), ('pca.1817 129 1.andar centro 58013-010 joao pessoa pb', {'entities': [(0, 11, 'LOGRA')]}), ('pca.odilon resende andrade 76 centro 37410-000 tres coracoes mg', {'entities': [(0, 28, 'LOGRA')]}), ('av fernandes lima 2591 terreo farol 57057-972 maceio al', {'entities': [(0, 22, 'LOGRA')]}), ('r.treze de junho 91

In [7]:
# Criação da base de teste e treinamento

n_test= 0.1 # Porcentagem para base de teste
test_n = round(len(FULL_DATA) * n_test)

# Divisao em Train Test Val

def gerador_bases(dataset, n):
    indices_random = random.sample(range(0,len(dataset)-1), n)
    base_teste_n = []
    base_treinamento_n = []
    
    for i in range(n):
        base_teste_n.append(dataset[indices_random[i]])

    for j in range(len(dataset)):
        if(j not in indices_random):
            base_treinamento_n.append(dataset[j])
            
    return base_teste_n, base_treinamento_n


base_teste, base_treinamento = gerador_bases(FULL_DATA, test_n)

random.shuffle(base_treinamento)
random.shuffle(base_teste)

print("Treinamento: " + str(len(base_treinamento)), "\nTeste: " + str(len(base_teste)), "\nTotal: " + str(len(FULL_DATA)))

Treinamento: 19202 
Teste: 2134 
Total: 21336


In [8]:
for i in range(5):
    print(base_treinamento[i])

('avenida �lvaro guimaraes 493 planalto 09890-002 sao bernardo do campo sp', {'entities': [(0, 28, 'LOGRA')]})
('rua 1. de maio 161 centro 85875-000 santa terezinha de itaipu pr', {'entities': [(0, 18, 'LOGRA')]})
('rua senador melo viana 158 centro 35740-000 esmeraldas mg', {'entities': [(0, 26, 'LOGRA')]})
('r.da conceicao 1270 centro 55540-000 palmares pe', {'entities': [(0, 18, 'LOGRA')]})
('r.jurubatuba 122 centro 09725-620 sao bernardo do campo sp', {'entities': [(0, 15, 'LOGRA')]})


# Carregando o modelo

In [13]:
# Define our variables

model = None
output_dir=Path(".")
n_iter= 100 # número de épocas
batch_size = 128

In [10]:
# Setting up the pipeline and entity recognizer.
if model is not None:
    nlp = spacy.load(model)  # load existing spacy model
    print("Loaded model '%s'" % model)
else:
    lan = 'pt'
    nlp = spacy.blank(lan)  # create blank Language class
    print("Created blank '%s' model" % lan)
    
if 'ner' not in nlp.pipe_names:
    ner = nlp.create_pipe('ner')
    nlp.add_pipe(ner)
    print('Added new NER')
else:
    ner = nlp.get_pipe('ner')
    print('Got an old NER')

Created blank 'pt' model
Added new NER


In [11]:
# create the built-in pipeline components and add them to the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
if 'ner' not in nlp.pipe_names:
    ner = nlp.create_pipe('ner')
    nlp.add_pipe(ner, last=True)
# otherwise, get it so we can add labels
else:
    ner = nlp.get_pipe('ner')

## Treinamento

In [14]:
print("Batch size: ", batch_size)
print("Épocas: ", n_iter)
print()

# add labels
for _, annotations in base_treinamento:
    for ent in annotations.get('entities'):
        ner.add_label(ent[2])


# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes):  # only train NER
    optimizer = nlp.begin_training()
    for itn in range(n_iter):
        random.shuffle(base_treinamento)
        losses = {}
        batches = minibatch(base_treinamento, size=batch_size)
        
        for batch in batches:
            texts, annotations = zip(*batch)
            try:
                nlp.update(texts, annotations, sgd=optimizer, drop=0.2,
                       losses=losses)
            except:
                pass     
        
        print(itn+1, ' Losses', losses)        

Batch size:  128
Épocas:  100

1  Losses {'ner': 606.939372012857}
2  Losses {'ner': 638.5511046016755}
3  Losses {'ner': 766.4913897201557}
4  Losses {'ner': 609.2352539531776}
5  Losses {'ner': 559.8214193022287}
6  Losses {'ner': 588.6410193874871}
7  Losses {'ner': 547.6508409419387}
8  Losses {'ner': 455.7239492321868}
9  Losses {'ner': 413.08838508261584}
10  Losses {'ner': 601.2567512689601}
11  Losses {'ner': 730.5993479874415}
12  Losses {'ner': 643.234945708369}
13  Losses {'ner': 649.6897828269329}
14  Losses {'ner': 442.4402852816947}
15  Losses {'ner': 565.8306506229433}
16  Losses {'ner': 565.1473455251715}
17  Losses {'ner': 541.1816364927284}
18  Losses {'ner': 489.57522128769125}
19  Losses {'ner': 408.41610793219195}
20  Losses {'ner': 541.9054432752819}
21  Losses {'ner': 535.6802834143369}
22  Losses {'ner': 442.6166852567742}
23  Losses {'ner': 584.8445779486016}
24  Losses {'ner': 537.4697011536322}
25  Losses {'ner': 507.19692543824726}
26  Losses {'ner': 542.165

In [15]:
# test the trained model
for text, _ in base_teste:
    doc = nlp(text)
    print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
    print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])
    print()

Entities [('av. ipiranga loja 149', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('ipiranga', 'LOGRA', 1), ('loja', 'LOGRA', 1), ('149', 'LOGRA', 1), ('jardim', '', 2), ('botanico', '', 2), ('90610-000', '', 2), ('porto', '', 2), ('alegre', '', 2), ('rs', '', 2)]

Entities [('avenida presidentegetuilio vargas bela', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('presidentegetuilio', 'LOGRA', 1), ('vargas', 'LOGRA', 1), ('bela', 'LOGRA', 1), ('vista', '', 2), ('ii', '', 2), ('45996-000', '', 2), ('teixeira', '', 2), ('de', '', 2), ('freitas', '', 2), ('ba', '', 2)]

Entities [('rua f quadra 128 - lote', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('f', 'LOGRA', 1), ('quadra', 'LOGRA', 1), ('128', 'LOGRA', 1), ('-', 'LOGRA', 1), ('lote', 'LOGRA', 1), ('13', '', 2), ('uniao', '', 2), ('68515-000', '', 2), ('parauapebas', '', 2), ('pa', '', 2)]

Entities [('rua brigadeiro franco 2300', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('brigadeiro', 'LOGRA', 1), ('franco', 'LOGRA', 1), ('2300', 

Entities [('pca.dr.henrique sampaio 150', 'LOGRA')]
Tokens [('pca.dr.henrique', 'LOGRA', 3), ('sampaio', 'LOGRA', 1), ('150', 'LOGRA', 1), ('centro', '', 2), ('45745-000', '', 2), ('ibicarai', '', 2), ('ba', '', 2)]

Entities [('av. expedito garcia 75', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('expedito', 'LOGRA', 1), ('garcia', 'LOGRA', 1), ('75', 'LOGRA', 1), ('campo', '', 2), ('grande', '', 2), ('29140-000', '', 2), ('cariacica', '', 2), ('es', '', 2)]

Entities [('avenida antonio cunha 700', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('antonio', 'LOGRA', 1), ('cunha', 'LOGRA', 1), ('700', 'LOGRA', 1), ('centro', '', 2), ('84200-000', '', 2), ('jaguariaiva', '', 2), ('pr', '', 2)]

Entities [('alameda dos maracatins 634/636', 'LOGRA')]
Tokens [('alameda', 'LOGRA', 3), ('dos', 'LOGRA', 1), ('maracatins', 'LOGRA', 1), ('634/636', 'LOGRA', 1), ('indian', '', 2), ('�', '', 2), ('polis', '', 2), ('04089-001', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Enti

Entities [('rua dona chiquinha de mattos', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('dona', 'LOGRA', 1), ('chiquinha', 'LOGRA', 1), ('de', 'LOGRA', 1), ('mattos', 'LOGRA', 1), ('centro', '', 2), ('12020-010', '', 2), ('taubate', '', 2), ('sp', '', 2)]

Entities [('av. em�lio bosco 201', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('em', 'LOGRA', 1), ('�', 'LOGRA', 1), ('lio', 'LOGRA', 1), ('bosco', 'LOGRA', 1), ('201', 'LOGRA', 1), ('condom', '', 2), ('�', '', 2), ('nio', '', 2), ('coronel', '', 2), ('jardim', '', 2), ('morumbi', '', 2), ('13180-000', '', 2), ('sumare', '', 2), ('sp', '', 2)]

Entities [('avenida das na��', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('das', 'LOGRA', 1), ('na', 'LOGRA', 1), ('�', 'LOGRA', 1), ('�', 'LOGRA', 1), ('es', '', 2), ('unidas', '', 2), ('12559', '', 2), ('brooklin', '', 2), ('paulista', '', 2), ('04578-903', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('pca.pref.mario carneiro 23', 'LOGRA')]
Tokens [('pca.pre

Entities [('rua carapinima num 2200', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('carapinima', 'LOGRA', 1), ('num', 'LOGRA', 1), ('2200', 'LOGRA', 1), ('loja', '', 2), ('115a', '', 2), ('benfica', '', 2), ('60015-290', '', 2), ('fortaleza', '', 2), ('ce', '', 2)]

Entities [('r rubens pagani', 'LOGRA')]
Tokens [('r', 'LOGRA', 3), ('rubens', 'LOGRA', 1), ('pagani', 'LOGRA', 1), ('/56', '', 2), ('jd', '', 2), ('estoril', '', 2), ('iv', '', 2), ('17016-210', '', 2), ('bauru', '', 2), ('sp', '', 2)]

Entities [('al. mamore 17.andar', 'LOGRA')]
Tokens [('al', 'LOGRA', 3), ('.', 'LOGRA', 1), ('mamore', 'LOGRA', 1), ('17.andar', 'LOGRA', 1), ('ed.crystal', '', 2), ('tower', '', 2), ('a', '', 2), ('industrial', '', 2), ('06454-040', '', 2), ('barueri', '', 2), ('sp', '', 2)]

Entities [('av. dr ademar de barros 800', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('dr', 'LOGRA', 1), ('ademar', 'LOGRA', 1), ('de', 'LOGRA', 1), ('barros', 'LOGRA', 1), ('800', 'LOGRA', 1), ('centro', '', 2

Entities [('rua nhonho do livramento', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('nhonho', 'LOGRA', 1), ('do', 'LOGRA', 1), ('livramento', 'LOGRA', 1), ('centro', '', 2), ('15910-000', '', 2), ('monte', '', 2), ('alto', '', 2), ('sp', '', 2)]

Entities [('rua do catete 355-a', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('do', 'LOGRA', 1), ('catete', 'LOGRA', 1), ('355-a', 'LOGRA', 1), ('e', '', 2), ('357', '', 2), ('catete', '', 2), ('22220-001', '', 2), ('rio', '', 2), ('de', '', 2), ('janeiro', '', 2), ('rj', '', 2)]

Entities [('av. brasil 92', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('brasil', 'LOGRA', 1), ('92', 'LOGRA', 1), ('centro', '', 2), ('83850-000', '', 2), ('agudos', '', 2), ('do', '', 2), ('sul', '', 2), ('pr', '', 2)]

Entities [('av.jeronimo de albuquerque s/n', 'LOGRA')]
Tokens [('av.jeronimo', 'LOGRA', 3), ('de', 'LOGRA', 1), ('albuquerque', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', 'LOGRA', 1), ('.', '', 2), ('cohabanili', '', 2), ('65051-2

Entities [('avenida maravilha n.� 1.059', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('maravilha', 'LOGRA', 1), ('n.', 'LOGRA', 1), ('�', 'LOGRA', 1), ('1.059', 'LOGRA', 1), ('n', '', 2), ('quadra', '', 2), ('r11', '', 2), ('centro', '', 2), ('78885-000', '', 2), ('feliz', '', 2), ('natal', '', 2), ('mt', '', 2)]

Entities [('av das nacoes 451', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('das', 'LOGRA', 1), ('nacoes', 'LOGRA', 1), ('451', 'LOGRA', 1), ('vila', '', 2), ('sampaio', '', 2), ('centro', '', 2), ('17201-300', '', 2), ('jau', '', 2), ('sp', '', 2)]

Entities [('avenida antonio afonso de lima', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('antonio', 'LOGRA', 1), ('afonso', 'LOGRA', 1), ('de', 'LOGRA', 1), ('lima', 'LOGRA', 1), ('centro', '', 2), ('07400-560', '', 2), ('aruja', '', 2), ('sp', '', 2)]

Entities [('rua valdomiro lins', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('valdomiro', 'LOGRA', 1), ('lins', 'LOGRA', 1), ('centro', '', 2), ('44600-000', '', 2), ('ipira', '', 2), ('ba

Entities [('rua bernardino de campos 2858', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('bernardino', 'LOGRA', 1), ('de', 'LOGRA', 1), ('campos', 'LOGRA', 1), ('2858', 'LOGRA', 1), ('centro', '', 2), ('15015-300', '', 2), ('sao', '', 2), ('jose', '', 2), ('do', '', 2), ('rio', '', 2), ('preto', '', 2), ('sp', '', 2)]

Entities [('av. couto de magalh�es 2.815', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('couto', 'LOGRA', 1), ('de', 'LOGRA', 1), ('magalh', 'LOGRA', 1), ('�', 'LOGRA', 1), ('es', 'LOGRA', 1), ('2.815', 'LOGRA', 1), ('centro', '', 2), ('norte', '', 2), ('78110-400', '', 2), ('varzea', '', 2), ('grande', '', 2), ('mt', '', 2)]

Entities [('alameda xingu 350', 'LOGRA')]
Tokens [('alameda', 'LOGRA', 3), ('xingu', 'LOGRA', 1), ('350', 'LOGRA', 1), ('21', '', 2), ('�', '', 2), ('andar', '', 2), ('-', '', 2), ('sala', '', 2), ('2103', '', 2), ('alphaville', '', 2), ('industrial', '', 2), ('06455-030', '', 2), ('barueri', '', 2), ('sp', '', 2)]

Entities [('av j 473 - s

Entities [('pc luiz nogueira', 'LOGRA')]
Tokens [('pc', 'LOGRA', 3), ('luiz', 'LOGRA', 1), ('nogueira', 'LOGRA', 1), ('centro', '', 2), ('48700-000', '', 2), ('serrinha', '', 2), ('ba', '', 2)]

Entities [('r.mal.floriano 340', 'LOGRA')]
Tokens [('r.mal.floriano', 'LOGRA', 3), ('340', 'LOGRA', 1), ('terreo', '', 2), ('centro', '', 2), ('28460-000', '', 2), ('miracema', '', 2), ('rj', '', 2)]

Entities [('rua do r�cio', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('do', 'LOGRA', 1), ('r', 'LOGRA', 1), ('�', 'LOGRA', 1), ('cio', 'LOGRA', 1), ('8', '', 2), ('�', '', 2), ('andar', '', 2), ('-', '', 2), ('conjuntos', '', 2), ('81', '', 2), ('e', '', 2), ('82', '', 2), ('-', '', 2), ('ed', '', 2), ('.', '', 2), ('atrium', '', 2), ('ix', '', 2), ('vila', '', 2), ('ol', '', 2), ('�', '', 2), ('mpia', '', 2), ('04552-000', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('r.rio de janeiro 1292', 'LOGRA')]
Tokens [('r.rio', 'LOGRA', 3), ('de', 'LOGRA', 1), ('janeiro', 'LOGRA', 1),

Entities [('av. guapira 2440', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('guapira', 'LOGRA', 1), ('2440', 'LOGRA', 1), ('jacana', '', 2), ('22650-020', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('pra�a da rep�blica 134', 'LOGRA')]
Tokens [('pra', 'LOGRA', 3), ('�', 'LOGRA', 1), ('a', 'LOGRA', 1), ('da', 'LOGRA', 1), ('rep', 'LOGRA', 1), ('�', 'LOGRA', 1), ('blica', 'LOGRA', 1), ('134', 'LOGRA', 1), ('centro', '', 2), ('87490-000', '', 2), ('nova', '', 2), ('olimpia', '', 2), ('pr', '', 2)]

Entities [('avenida minas gerais', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('minas', 'LOGRA', 1), ('gerais', 'LOGRA', 1), ('centro', '', 2), ('35010-151', '', 2), ('governador', '', 2), ('valadares', '', 2), ('mg', '', 2)]

Entities [('avenida do cinquentenario', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('do', 'LOGRA', 1), ('cinquentenario', 'LOGRA', 1), ('centro', '', 2), ('45600-083', '', 2), ('itabuna', '', 2), ('ba', '', 2)]

Entities [('av. afonso 

Entities [('r.pe.cacique 785', 'LOGRA')]
Tokens [('r.pe.cacique', 'LOGRA', 3), ('785', 'LOGRA', 1), ('centro', '', 2), ('98910-000', '', 2), ('tres', '', 2), ('de', '', 2), ('maio', '', 2), ('rs', '', 2)]

Entities [('safs qd.6 lt.1', 'LOGRA')]
Tokens [('safs', 'LOGRA', 3), ('qd.6', 'LOGRA', 1), ('lt.1', 'LOGRA', 1), ('zona', '', 2), ('civico-administrativa', '', 2), ('70095-900', '', 2), ('brasilia', '', 2), ('df', '', 2)]

Entities [('al.jose maria alkimin 1800', 'LOGRA')]
Tokens [('al.jose', 'LOGRA', 3), ('maria', 'LOGRA', 1), ('alkimin', 'LOGRA', 1), ('1800', 'LOGRA', 1), ('centro', '', 2), ('de', '', 2), ('convivencia', '', 2), ('serra', '', 2), ('verde', '', 2), ('31630-060', '', 2), ('belo', '', 2), ('horizonte', '', 2), ('mg', '', 2)]

Entities [('rua jacaranda 231', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('jacaranda', 'LOGRA', 1), ('231', 'LOGRA', 1), ('esq.c', '', 2), ('/', '', 2), ('rua', '', 2), ('inglaterra', '', 2), ('250', '', 2), ('eucaliptos', '', 2), ('83820-000', '',

Entities [('rodovia camilo calazans 951', 'LOGRA')]
Tokens [('rodovia', 'LOGRA', 3), ('camilo', 'LOGRA', 1), ('calazans', 'LOGRA', 1), ('951', 'LOGRA', 1), ('centro', '', 2), ('49250-000', '', 2), ('indiaroba', '', 2), ('se', '', 2)]

Entities [('r.hermenegildo alves 228', 'LOGRA')]
Tokens [('r.hermenegildo', 'LOGRA', 3), ('alves', 'LOGRA', 1), ('228', 'LOGRA', 1), ('pca.magalhaes', '', 2), ('barata', '', 2), ('centro', '', 2), ('68640-000', '', 2), ('ourem', '', 2), ('pa', '', 2)]

Entities [('avenida dom pedro i 3336', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('dom', 'LOGRA', 1), ('pedro', 'LOGRA', 1), ('i', 'LOGRA', 1), ('3336', 'LOGRA', 1), ('vila', '', 2), ('pires', '', 2), ('09130-400', '', 2), ('santo', '', 2), ('andre', '', 2), ('sp', '', 2)]

Entities [('pca silvio romero 156', 'LOGRA')]
Tokens [('pca', 'LOGRA', 3), ('silvio', 'LOGRA', 1), ('romero', 'LOGRA', 1), ('156', 'LOGRA', 1), ('cidade', '', 2), ('mae', '', 2), ('do', '', 2), ('ceu', '', 2), ('03323-000', '', 2), ('sa

Entities [('avenida paulista 1776', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('paulista', 'LOGRA', 1), ('1776', 'LOGRA', 1), ('loja', '', 2), ('e', '', 2), ('mezanino', '', 2), ('01310-200', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('rua santa catarina num 361', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('santa', 'LOGRA', 1), ('catarina', 'LOGRA', 1), ('num', 'LOGRA', 1), ('361', 'LOGRA', 1), ('centro', '', 2), ('95770-000', '', 2), ('feliz', '', 2), ('rs', '', 2)]

Entities [('av. 24de outubro 1310', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('24de', 'LOGRA', 1), ('outubro', 'LOGRA', 1), ('1310', 'LOGRA', 1), ('campinas', '', 2), ('74505-010', '', 2), ('goiania', '', 2), ('go', '', 2)]

Entities [('rua arara quadra 55 lote 4a', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('arara', 'LOGRA', 1), ('quadra', 'LOGRA', 1), ('55', 'LOGRA', 1), ('lote', 'LOGRA', 1), ('4a', 'LOGRA', 1), ('centro', '', 2), ('68473-000', '', 2), ('novo', '', 2), ('repartimento

Entities [('r.sete de setembro 100', 'LOGRA')]
Tokens [('r.sete', 'LOGRA', 3), ('de', 'LOGRA', 1), ('setembro', 'LOGRA', 1), ('100', 'LOGRA', 1), ('centro', '', 2), ('14300-000', '', 2), ('batatais', '', 2), ('sp', '', 2)]

Entities [('rua volunt�rios da p�tria 180', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('volunt', 'LOGRA', 1), ('�', 'LOGRA', 1), ('rios', 'LOGRA', 1), ('da', 'LOGRA', 1), ('p', 'LOGRA', 1), ('�', 'LOGRA', 1), ('tria', 'LOGRA', 1), ('180', 'LOGRA', 1), ('loja', '', 2), ('a', '', 2), ('botafogo', '', 2), ('22270-010', '', 2), ('rio', '', 2), ('de', '', 2), ('janeiro', '', 2), ('rj', '', 2)]

Entities [('av. brigadeiro faria lima s/n', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('brigadeiro', 'LOGRA', 1), ('faria', 'LOGRA', 1), ('lima', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', 'LOGRA', 1), ('�', '', 2), ('cocaia', '', 2), ('07130-000', '', 2), ('guarulhos', '', 2), ('sp', '', 2)]

Entities [('av. dorival caymi 14193', 'LOGRA')]
Tokens [('av',

Entities [('av ipiranga', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('ipiranga', 'LOGRA', 1), ('12', '', 2), ('�', '', 2), ('andar', '', 2), ('republica', '', 2), ('01046-010', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('rua alfredo gualandi da silva s/n', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('alfredo', 'LOGRA', 1), ('gualandi', 'LOGRA', 1), ('da', 'LOGRA', 1), ('silva', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', 'LOGRA', 1), ('centro', '', 2), ('29580-000', '', 2), ('dores', '', 2), ('do', '', 2), ('rio', '', 2), ('preto', '', 2), ('es', '', 2)]

Entities [('r.vinte e oito de outubro 483', 'LOGRA')]
Tokens [('r.vinte', 'LOGRA', 3), ('e', 'LOGRA', 1), ('oito', 'LOGRA', 1), ('de', 'LOGRA', 1), ('outubro', 'LOGRA', 1), ('483', 'LOGRA', 1), ('centro', '', 2), ('14960-000', '', 2), ('novo', '', 2), ('horizonte', '', 2), ('sp', '', 2)]

Entities [('avenida presidente m�dice 138/142', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('presidente', 'LOGRA', 1)

Entities [('av. angelica 1946', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('angelica', 'LOGRA', 1), ('1946', 'LOGRA', 1), ('s.', '', 2), ('cecilia', '', 2), ('01228-200', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('av. ataulfo de paiva', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('ataulfo', 'LOGRA', 1), ('de', 'LOGRA', 1), ('paiva', 'LOGRA', 1), ('lj', '', 2), ('b', '', 2), ('e', '', 2), ('ssl', '', 2), ('leblon', '', 2), ('22440-033', '', 2), ('rio', '', 2), ('de', '', 2), ('janeiro', '', 2), ('rj', '', 2)]

Entities [('av. jabaquara 1398', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('jabaquara', 'LOGRA', 1), ('1398', 'LOGRA', 1), ('mirandopolis', '', 2), ('04046-200', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('rua sampaio marques', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('sampaio', 'LOGRA', 1), ('marques', 'LOGRA', 1), ('8', '', 2), ('�', '', 2), ('andar', '', 2), ('-', '', 2), ('conjunto

Entities [('rua tupinambas 330', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('tupinambas', 'LOGRA', 1), ('330', 'LOGRA', 1), ('centro', '', 2), ('30120-070', '', 2), ('belo', '', 2), ('horizonte', '', 2), ('mg', '', 2)]

Entities [('avenida rio das pedras', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('rio', 'LOGRA', 1), ('das', 'LOGRA', 1), ('pedras', 'LOGRA', 1), ('jardim', '', 2), ('aricanduva', '', 2), ('�', '', 2), ('03453-000', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('rua joaquim romao de melo 17', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('joaquim', 'LOGRA', 1), ('romao', 'LOGRA', 1), ('de', 'LOGRA', 1), ('melo', 'LOGRA', 1), ('17', 'LOGRA', 1), ('centro', '', 2), ('58500-000', '', 2), ('monteiro', '', 2), ('pb', '', 2)]

Entities [('r.visc.de nacar 1440', 'LOGRA')]
Tokens [('r.visc.de', 'LOGRA', 3), ('nacar', 'LOGRA', 1), ('1440', 'LOGRA', 1), ('24.andar', '', 2), ('centro', '', 2), ('80410-201', '', 2), ('curitiba', '', 2), ('pr', '', 2)]

Entities [('ave

Entities [('rua manoelito de ornellas', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('manoelito', 'LOGRA', 1), ('de', 'LOGRA', 1), ('ornellas', 'LOGRA', 1), ('forum', '', 2), ('central', '', 2), ('predio', '', 2), ('ii', '', 2), ('praia', '', 2), ('de', '', 2), ('belas', '', 2), ('90110-160', '', 2), ('porto', '', 2), ('alegre', '', 2), ('rs', '', 2)]

Entities [('praca xv de novembro 298', 'LOGRA')]
Tokens [('praca', 'LOGRA', 3), ('xv', 'LOGRA', 1), ('de', 'LOGRA', 1), ('novembro', 'LOGRA', 1), ('298', 'LOGRA', 1), ('centro', '', 2), ('88010-400', '', 2), ('florianopolis', '', 2), ('sc', '', 2)]

Entities [('av. michel zaine 390', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('michel', 'LOGRA', 1), ('zaine', 'LOGRA', 1), ('390', 'LOGRA', 1), ('centro', '', 2), ('13540-000', '', 2), ('corumbatai', '', 2), ('sp', '', 2)]

Entities [('avenida ayrton senna', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('ayrton', 'LOGRA', 1), ('senna', 'LOGRA', 1), ('lj', '', 2), ('1113', '', 2), ('b

Entities [('pra�a marechal hermes 2', 'LOGRA')]
Tokens [('pra', 'LOGRA', 3), ('�', 'LOGRA', 1), ('a', 'LOGRA', 1), ('marechal', 'LOGRA', 1), ('hermes', 'LOGRA', 1), ('2', 'LOGRA', 1), ('loja', '', 2), ('santo', '', 2), ('cristo', '', 2), ('20220-430', '', 2), ('rio', '', 2), ('de', '', 2), ('janeiro', '', 2), ('rj', '', 2)]

Entities [('r tenente silveira 225', 'LOGRA')]
Tokens [('r', 'LOGRA', 3), ('tenente', 'LOGRA', 1), ('silveira', 'LOGRA', 1), ('225', 'LOGRA', 1), ('2.andar', '', 2), ('centro', '', 2), ('88010-300', '', 2), ('florianopolis', '', 2), ('sc', '', 2)]

Entities [('r amazonas 2901', 'LOGRA')]
Tokens [('r', 'LOGRA', 3), ('amazonas', 'LOGRA', 1), ('2901', 'LOGRA', 1), ('garcia', '', 2), ('89022-002', '', 2), ('blumenau', '', 2), ('sc', '', 2)]

Entities [('av.raul souto maior 338', 'LOGRA')]
Tokens [('av.raul', 'LOGRA', 3), ('souto', 'LOGRA', 1), ('maior', 'LOGRA', 1), ('338', 'LOGRA', 1), ('goytacazes', '', 2), ('28110-000', '', 2), ('campos', '', 2), ('dos', '', 2), ('g

Entities [('rua padre alfredo', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('padre', 'LOGRA', 1), ('alfredo', 'LOGRA', 1), ('loja', '', 2), ('e', '', 2), ('sobreloja', '', 2), ('centro', '', 2), ('27123-130', '', 2), ('barra', '', 2), ('do', '', 2), ('pirai', '', 2), ('rj', '', 2)]

Entities [('pca.pedro gomes 274', 'LOGRA')]
Tokens [('pca.pedro', 'LOGRA', 3), ('gomes', 'LOGRA', 1), ('274', 'LOGRA', 1), ('terreo', '', 2), ('centro', '', 2), ('42600-000', '', 2), ('madre', '', 2), ('de', '', 2), ('deus', '', 2), ('ba', '', 2)]

Entities [('av brasil 8050 / 8060', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('brasil', 'LOGRA', 1), ('8050', 'LOGRA', 1), ('/', 'LOGRA', 1), ('8060', 'LOGRA', 1), ('salas', '', 2), ('1', '', 2), ('e', '', 2), ('2', '', 2), ('qd', '', 2), ('0347', '', 2), ('lt', '', 2), ('1r-a', '', 2), ('centro', '', 2), ('85810-001', '', 2), ('cascavel', '', 2), ('pr', '', 2)]

Entities [('cj c 08 lote 16 parte taguatinga', 'LOGRA')]
Tokens [('cj', 'LOGRA', 3), ('c', 'LOGRA', 1), ('08

Entities [('rua s 1 quadra', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('s', 'LOGRA', 1), ('1', 'LOGRA', 1), ('quadra', 'LOGRA', 1), ('s', '', 2), ('4', '', 2), ('-', '', 2), ('lotes', '', 2), ('17', '', 2), ('/18', '', 2), ('setor', '', 2), ('bela', '', 2), ('vista', '', 2), ('74823-420', '', 2), ('goiania', '', 2), ('go', '', 2)]

Entities [('rua antonio campos 108', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('antonio', 'LOGRA', 1), ('campos', 'LOGRA', 1), ('108', 'LOGRA', 1), ('centro', '', 2), ('37275-000', '', 2), ('cristais', '', 2), ('mg', '', 2)]

Entities [('ruario verde qd 03 lt 03', 'LOGRA')]
Tokens [('ruario', 'LOGRA', 3), ('verde', 'LOGRA', 1), ('qd', 'LOGRA', 1), ('03', 'LOGRA', 1), ('lt', 'LOGRA', 1), ('03', 'LOGRA', 1), ('centro', '', 2), ('75915-000', '', 2), ('montividiu', '', 2), ('go', '', 2)]

Entities [('av. portugal', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('portugal', 'LOGRA', 1), ('centro', '', 2), ('09040-001', '', 2), ('santo', '', 2), ('andre', '

Entities [('av.getulio vargas 533-n', 'LOGRA')]
Tokens [('av.getulio', 'LOGRA', 3), ('vargas', 'LOGRA', 1), ('533-n', 'LOGRA', 1), ('esq.c', '', 2), ('/', '', 2), ('mal.deodoro', '', 2), ('centro', '', 2), ('89802-000', '', 2), ('chapeco', '', 2), ('sc', '', 2)]

Entities [('rua territ�rio do amapa 455', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('territ', 'LOGRA', 1), ('�', 'LOGRA', 1), ('rio', 'LOGRA', 1), ('do', 'LOGRA', 1), ('amapa', 'LOGRA', 1), ('455', 'LOGRA', 1), ('pituba', '', 2), ('41830-540', '', 2), ('salvador', '', 2), ('ba', '', 2)]

Entities [('av.primo alberto bodanese 786', 'LOGRA')]
Tokens [('av.primo', 'LOGRA', 3), ('alberto', 'LOGRA', 1), ('bodanese', 'LOGRA', 1), ('786', 'LOGRA', 1), ('sala', '', 2), ('centro', '', 2), ('89850-000', '', 2), ('quilombo', '', 2), ('sc', '', 2)]

Entities [('av. andre araujo', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('andre', 'LOGRA', 1), ('araujo', 'LOGRA', 1), ('aleixo', '', 2), ('69060-000', '', 2), ('manaus', '', 2),

Entities [('rua marechal deodoro', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('marechal', 'LOGRA', 1), ('deodoro', 'LOGRA', 1), ('centro', '', 2), ('84940-000', '', 2), ('siqueira', '', 2), ('campos', '', 2), ('pr', '', 2)]

Entities [('rua dos expedicionarios 144', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('dos', 'LOGRA', 1), ('expedicionarios', 'LOGRA', 1), ('144', 'LOGRA', 1), ('centro', '', 2), ('18500-000', '', 2), ('laranjal', '', 2), ('paulista', '', 2), ('sp', '', 2)]

Entities [('av.ragueb chohfi 3039/3047', 'LOGRA')]
Tokens [('av.ragueb', 'LOGRA', 3), ('chohfi', 'LOGRA', 1), ('3039/3047', 'LOGRA', 1), ('pq.boa', '', 2), ('esperanca', '', 2), ('08341-420', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('rod.br-316 861 km 8', 'LOGRA')]
Tokens [('rod.br-316', 'LOGRA', 3), ('861', 'LOGRA', 1), ('km', 'LOGRA', 1), ('8', 'LOGRA', 1), ('esq.c', '', 2), ('/', '', 2), ('r.ana', '', 2), ('cristina', '', 2), ('centro', '', 2), ('67033-000', '', 2), ('ananindeua', '', 2)

Entities [('av.papa paulo sexto 566', 'LOGRA')]
Tokens [('av.papa', 'LOGRA', 3), ('paulo', 'LOGRA', 1), ('sexto', 'LOGRA', 1), ('566', 'LOGRA', 1), ('jd.do', '', 2), ('trevo', '', 2), ('13040-000', '', 2), ('campinas', '', 2), ('sp', '', 2)]

Entities [('al.sampaio s/', 'LOGRA')]
Tokens [('al.sampaio', 'LOGRA', 3), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', '', 2), ('predio', '', 2), ('premoldado', '', 2), ('centro', '', 2), ('44830-000', '', 2), ('piritiba', '', 2), ('ba', '', 2)]

Entities [('avenida antonio carlos comitre', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('antonio', 'LOGRA', 1), ('carlos', 'LOGRA', 1), ('comitre', 'LOGRA', 1), ('parque', '', 2), ('campolim', '', 2), ('18047-620', '', 2), ('sorocaba', '', 2), ('sp', '', 2)]

Entities [('av.pastor martin luther king junior bloco 1 loja 211', 'LOGRA')]
Tokens [('av.pastor', 'LOGRA', 3), ('martin', 'LOGRA', 1), ('luther', 'LOGRA', 1), ('king', 'LOGRA', 1), ('junior', 'LOGRA', 1), ('bloco', 'LOGRA', 1), ('1', 'LOGRA', 1), ('

Entities [('av.independencia quadra 4 lote 21', 'LOGRA')]
Tokens [('av.independencia', 'LOGRA', 3), ('quadra', 'LOGRA', 1), ('4', 'LOGRA', 1), ('lote', 'LOGRA', 1), ('21', 'LOGRA', 1), ('cidade', '', 2), ('livre', '', 2), ('74970-295', '', 2), ('aparecida', '', 2), ('de', '', 2), ('goiania', '', 2), ('go', '', 2)]

Entities [('av. manoel dias da silva 1502', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('manoel', 'LOGRA', 1), ('dias', 'LOGRA', 1), ('da', 'LOGRA', 1), ('silva', 'LOGRA', 1), ('1502', 'LOGRA', 1), ('pituba', '', 2), ('41830-001', '', 2), ('salvador', '', 2), ('ba', '', 2)]

Entities [('av. paulista 302', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('paulista', 'LOGRA', 1), ('302', 'LOGRA', 1), ('bela', '', 2), ('vista', '', 2), ('01310-000', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('sdsbloco p lojas 14', 'LOGRA')]
Tokens [('sdsbloco', 'LOGRA', 3), ('p', 'LOGRA', 1), ('lojas', 'LOGRA', 1), ('14', 'LOGRA', 1), ('e', '', 2

Entities [('av.duque de caxias 527', 'LOGRA')]
Tokens [('av.duque', 'LOGRA', 3), ('de', 'LOGRA', 1), ('caxias', 'LOGRA', 1), ('527', 'LOGRA', 1), ('centro', '', 2), ('95750-000', '', 2), ('salvador', '', 2), ('do', '', 2), ('sul', '', 2), ('rs', '', 2)]

Entities [('pca conego alcindino', 'LOGRA')]
Tokens [('pca', 'LOGRA', 3), ('conego', 'LOGRA', 1), ('alcindino', 'LOGRA', 1), ('centro', '', 2), ('historico', '', 2), ('83203-330', '', 2), ('paranagua', '', 2), ('pr', '', 2)]

Entities [('rua conselheiro franco', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('conselheiro', 'LOGRA', 1), ('franco', 'LOGRA', 1), ('centro', '', 2), ('44002-272', '', 2), ('feira', '', 2), ('de', '', 2), ('santana', '', 2), ('ba', '', 2)]

Entities [('rua getulio vargas 1133', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('getulio', 'LOGRA', 1), ('vargas', 'LOGRA', 1), ('1133', 'LOGRA', 1), ('centro', '', 2), ('98600-000', '', 2), ('tres', '', 2), ('passos', '', 2), ('rs', '', 2)]

Entities [('av. maria amalia lopes de a

Entities [('av.dr.couto junior 248', 'LOGRA')]
Tokens [('av.dr.couto', 'LOGRA', 3), ('junior', 'LOGRA', 1), ('248', 'LOGRA', 1), ('centro', '', 2), ('17430-000', '', 2), ('alvinlandia', '', 2), ('sp', '', 2)]

Entities [('rua dinamarca quadra 104', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('dinamarca', 'LOGRA', 1), ('quadra', 'LOGRA', 1), ('104', 'LOGRA', 1), ('lotes', '', 2), ('6', '', 2), ('7', '', 2), ('8', '', 2), ('e', '', 2), ('9', '', 2), ('jardim', '', 2), ('europa', '', 2), ('74330-050', '', 2), ('goiania', '', 2), ('go', '', 2)]

Entities [('r.do comercio s/', 'LOGRA')]
Tokens [('r.do', 'LOGRA', 3), ('comercio', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', '', 2), ('complexo', '', 2), ('da', '', 2), ('pref.municipal', '', 2), ('centro', '', 2), ('68148-000', '', 2), ('brasil', '', 2), ('novo', '', 2), ('pa', '', 2)]

Entities [('av. prudente de morais', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('prudente', 'LOGRA', 1), ('de', 'LOGRA', 1), ('morais', 

Entities [('rua cel eugenio motta', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('cel', 'LOGRA', 1), ('eugenio', 'LOGRA', 1), ('motta', 'LOGRA', 1), ('centro', '', 2), ('18550-000', '', 2), ('boituva', '', 2), ('sp', '', 2)]

Entities [('av.barao do rio branco 2108', 'LOGRA')]
Tokens [('av.barao', 'LOGRA', 3), ('do', 'LOGRA', 1), ('rio', 'LOGRA', 1), ('branco', 'LOGRA', 1), ('2108', 'LOGRA', 1), ('terreo', '', 2), ('centro', '', 2), ('68743-050', '', 2), ('castanhal', '', 2), ('pa', '', 2)]

Entities [('r.dr.silva martins 210', 'LOGRA')]
Tokens [('r.dr.silva', 'LOGRA', 3), ('martins', 'LOGRA', 1), ('210', 'LOGRA', 1), ('centro', '', 2), ('65520-000', '', 2), ('brejo', '', 2), ('ma', '', 2)]

Entities [('rua bezerra paes 309', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('bezerra', 'LOGRA', 1), ('paes', 'LOGRA', 1), ('309', 'LOGRA', 1), ('centro', '', 2), ('13690-000', '', 2), ('descalvado', '', 2), ('sp', '', 2)]

Entities [('rua padre pedro pinto 825', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('p

Entities [('avenida conego joao lima', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('conego', 'LOGRA', 1), ('joao', 'LOGRA', 1), ('lima', 'LOGRA', 1), ('setor', '', 2), ('central', '', 2), ('77804-010', '', 2), ('araguaina', '', 2), ('to', '', 2)]

Entities [('rua padre rolim', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('padre', 'LOGRA', 1), ('rolim', 'LOGRA', 1), ('centro', '', 2), ('58900-000', '', 2), ('cajazeiras', '', 2), ('pb', '', 2)]

Entities [('r.joao batista 12', 'LOGRA')]
Tokens [('r.joao', 'LOGRA', 3), ('batista', 'LOGRA', 1), ('12', 'LOGRA', 1), ('centro', '', 2), ('55750-000', '', 2), ('surubim', '', 2), ('pe', '', 2)]

Entities [('rua jose cardoso de lima', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('jose', 'LOGRA', 1), ('cardoso', 'LOGRA', 1), ('de', 'LOGRA', 1), ('lima', 'LOGRA', 1), ('mimoso', '', 2), ('do', '', 2), ('oeste', '', 2), ('47850-000', '', 2), ('luis', '', 2), ('eduardo', '', 2), ('magalhaes', '', 2), ('ba', '', 2)]

Entities [('pca n sra da conceicao', 'LOGRA')

Entities [('r.cardoso de almeida 299', 'LOGRA')]
Tokens [('r.cardoso', 'LOGRA', 3), ('de', 'LOGRA', 1), ('almeida', 'LOGRA', 1), ('299', 'LOGRA', 1), ('perdizes', '', 2), ('05013-000', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('rua padre albuquerque 54', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('padre', 'LOGRA', 1), ('albuquerque', 'LOGRA', 1), ('54', 'LOGRA', 1), ('centro', '', 2), ('18200-220', '', 2), ('itapetininga', '', 2), ('sp', '', 2)]

Entities [('rua xv de novembro 2351', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('xv', 'LOGRA', 1), ('de', 'LOGRA', 1), ('novembro', 'LOGRA', 1), ('2351', 'LOGRA', 1), ('esq.c', '', 2), ('/', '', 2), ('rua', '', 2), ('goncalves', '', 2), ('dias', '', 2), ('centro', '', 2), ('83601-030', '', 2), ('campo', '', 2), ('largo', '', 2), ('pr', '', 2)]

Entities [('av. feira de santana 984', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('feira', 'LOGRA', 1), ('de', 'LOGRA', 1), ('santana', 'LOGRA', 1), ('984', 'LOGRA',

Entities [('rua marechal deodoro 1192', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('marechal', 'LOGRA', 1), ('deodoro', 'LOGRA', 1), ('1192', 'LOGRA', 1), ('higienopolis', '', 2), ('14100-000', '', 2), ('ribeirao', '', 2), ('preto', '', 2), ('sp', '', 2)]

Entities [('av. ester 253', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('ester', 'LOGRA', 1), ('253', 'LOGRA', 1), ('centro', '', 2), ('13150-000', '', 2), ('cosmopolis', '', 2), ('sp', '', 2)]

Entities [('rua s�o paulo patrim', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('s', 'LOGRA', 1), ('�', 'LOGRA', 1), ('o', 'LOGRA', 1), ('paulo', 'LOGRA', 1), ('patrim', 'LOGRA', 1), ('�', '', 2), ('nio', '', 2), ('novo', '', 2), ('15500-010', '', 2), ('votuporanga', '', 2), ('sp', '', 2)]

Entities [('av. ism�nia batista ribeiro velho', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('ism', 'LOGRA', 1), ('�', 'LOGRA', 1), ('nia', 'LOGRA', 1), ('batista', 'LOGRA', 1), ('ribeiro', 'LOGRA', 1), ('velho', 'LOGRA', 1), ('centro', 

Entities [('av sao paulo s/', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('sao', 'LOGRA', 1), ('paulo', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', '', 2), ('lt', '', 2), ('13', '', 2), ('q', '', 2), ('35', '', 2), ('a', '', 2), ('vila', '', 2), ('brasilia', '', 2), ('74905-770', '', 2), ('aparecida', '', 2), ('de', '', 2), ('goiania', '', 2), ('go', '', 2)]

Entities [('av.ataulfo de paiva 23', 'LOGRA')]
Tokens [('av.ataulfo', 'LOGRA', 3), ('de', 'LOGRA', 1), ('paiva', 'LOGRA', 1), ('23', 'LOGRA', 1), ('lojas', '', 2), ('a', '', 2), ('/', '', 2), ('b', '', 2), ('leblon', '', 2), ('22440-032', '', 2), ('rio', '', 2), ('de', '', 2), ('janeiro', '', 2), ('rj', '', 2)]

Entities [('avenida antonio abrahao caram 820', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('antonio', 'LOGRA', 1), ('abrahao', 'LOGRA', 1), ('caram', 'LOGRA', 1), ('820', 'LOGRA', 1), ('lojas', '', 2), ('04', '', 2), ('05', '', 2), ('06', '', 2), ('07', '', 2), ('11', '', 2), ('12', '', 2), ('13', '', 2), ('e', ''

Entities [('r.do comercio 239', 'LOGRA')]
Tokens [('r.do', 'LOGRA', 3), ('comercio', 'LOGRA', 1), ('239', 'LOGRA', 1), ('centro', '', 2), ('23900-565', '', 2), ('angra', '', 2), ('dos', '', 2), ('reis', '', 2), ('rj', '', 2)]

Entities [('r heraclito villar sn', 'LOGRA')]
Tokens [('r', 'LOGRA', 3), ('heraclito', 'LOGRA', 1), ('villar', 'LOGRA', 1), ('sn', 'LOGRA', 1), ('centro', '', 2), ('59570-000', '', 2), ('ceara-mirim', '', 2), ('rn', '', 2)]

Entities [('rua comendador parada 137', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('comendador', 'LOGRA', 1), ('parada', 'LOGRA', 1), ('137', 'LOGRA', 1), ('centro', '', 2), ('18170-000', '', 2), ('piedade', '', 2), ('sp', '', 2)]

Entities [('avenida dos italianos 340', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('dos', 'LOGRA', 1), ('italianos', 'LOGRA', 1), ('340', 'LOGRA', 1), ('rocha', '', 2), ('miranda', '', 2), ('21510-104', '', 2), ('rio', '', 2), ('de', '', 2), ('janeiro', '', 2), ('rj', '', 2)]

Entities [('rua capit�o frederico teixei

Entities [('q acsv se 22 lote 23 av lo 05 (206', 'LOGRA')]
Tokens [('q', 'LOGRA', 3), ('acsv', 'LOGRA', 1), ('se', 'LOGRA', 1), ('22', 'LOGRA', 1), ('lote', 'LOGRA', 1), ('23', 'LOGRA', 1), ('av', 'LOGRA', 1), ('lo', 'LOGRA', 1), ('05', 'LOGRA', 1), ('(', 'LOGRA', 1), ('206', 'LOGRA', 1), ('s', '', 2), ('-', '', 2), ('a', '', 2), ('sn', '', 2), ('centro', '', 2), ('77020-504', '', 2), ('palmas', '', 2), ('to', '', 2)]

Entities [('avenida cavalheiro paschoal innechi 1460', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('cavalheiro', 'LOGRA', 1), ('paschoal', 'LOGRA', 1), ('innechi', 'LOGRA', 1), ('1460', 'LOGRA', 1), ('independ', '', 2), ('�', '', 2), ('ncia', '', 2), ('14076-010', '', 2), ('ribeirao', '', 2), ('preto', '', 2), ('sp', '', 2)]

Entities [('rua maria freitas 103 a', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('maria', 'LOGRA', 1), ('freitas', 'LOGRA', 1), ('103', 'LOGRA', 1), ('a', 'LOGRA', 1), ('madureira', '', 2), ('21351-010', '', 2), ('rio', '', 2), ('de', '', 2), ('janeiro

Entities [('rodovia amaral peixoto 5251', 'LOGRA')]
Tokens [('rodovia', 'LOGRA', 3), ('amaral', 'LOGRA', 1), ('peixoto', 'LOGRA', 1), ('5251', 'LOGRA', 1), ('novo', '', 2), ('rio', '', 2), ('das', '', 2), ('ostras', '', 2), ('28890-000', '', 2), ('rio', '', 2), ('das', '', 2), ('ostras', '', 2), ('rj', '', 2)]

Entities [('av. ernani do amaral peixoto 363', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('ernani', 'LOGRA', 1), ('do', 'LOGRA', 1), ('amaral', 'LOGRA', 1), ('peixoto', 'LOGRA', 1), ('363', 'LOGRA', 1), ('centro', '', 2), ('24020-072', '', 2), ('niteroi', '', 2), ('rj', '', 2)]

Entities [('rua engenheiro antunes 222', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('engenheiro', 'LOGRA', 1), ('antunes', 'LOGRA', 1), ('222', 'LOGRA', 1), ('360305', '', 2), ('centro', '', 2), ('39800-019', '', 2), ('teofilo', '', 2), ('otoni', '', 2), ('mg', '', 2)]

Entities [('rua julio de castilhos 370', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('julio', 'LOGRA', 1), ('de', 'LOGRA', 1), (

Entities [('av mister hull padre andrade', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('mister', 'LOGRA', 1), ('hull', 'LOGRA', 1), ('padre', 'LOGRA', 1), ('andrade', 'LOGRA', 1), ('antonio', '', 2), ('bezerra', '', 2), ('60356-412', '', 2), ('fortaleza', '', 2), ('ce', '', 2)]

Entities [('avenida ministro geraldo barreto sobral', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('ministro', 'LOGRA', 1), ('geraldo', 'LOGRA', 1), ('barreto', 'LOGRA', 1), ('sobral', 'LOGRA', 1), ('jardins', '', 2), ('49026-010', '', 2), ('aracaju', '', 2), ('se', '', 2)]

Entities [('rua olavo barreto viana', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('olavo', 'LOGRA', 1), ('barreto', 'LOGRA', 1), ('viana', 'LOGRA', 1), ('moinhos', '', 2), ('de', '', 2), ('vento', '', 2), ('90570-070', '', 2), ('porto', '', 2), ('alegre', '', 2), ('rs', '', 2)]

Entities [('av julio borella 1085', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('julio', 'LOGRA', 1), ('borella', 'LOGRA', 1), ('1085', 'LOGRA', 1), ('centro', '', 2), ('99150-000

In [20]:
phrase = "QNE 20 12  teste de endereço pedro leonardo"

doc = nlp(phrase)
print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])

Entities [('QNE 20 12  ', 'LOGRA')]
Tokens [('QNE', 'LOGRA', 3), ('20', 'LOGRA', 1), ('12', 'LOGRA', 1), (' ', 'LOGRA', 1), ('teste', '', 2), ('de', '', 2), ('endereço', '', 2), ('pedro', '', 2), ('leonardo', '', 2)]


In [None]:
from spacy.gold import GoldParse
from spacy.scorer import Scorer

def evaluate(ner_model, examples):
    scorer = Scorer()
    for input_, annot in examples:
        doc_gold_text = ner_model.make_doc(input_)
        gold = GoldParse(doc_gold_text, entities=annot)
        pred_value = ner_model(input_)
        scorer.score(pred_value, gold)
    return scorer.scores

In [None]:
# save model to output directory
if output_dir is not None:
    output_dir = Path(output_dir)
    if not output_dir.exists():
        output_dir.mkdir()
    nlp.to_disk(output_dir)
    print("Saved model to", output_dir)

In [None]:
loaded_model = spacy.load(output_dir)
evaluate(loaded_model, base_teste_final)