In [1]:
# Load Packages
from __future__ import unicode_literals, print_function

import plac #  wrapper over argparse
import random
from pathlib import Path
import spacy
from tqdm import tqdm, tqdm_notebook # loading bar 
import pandas as pd
import re
from pprint import pprint
from nltk.tokenize import sent_tokenize
import numpy as np

from spacy.util import minibatch, compounding

# Abrindo e gerando Datasets

### Entidade logradouro

In [2]:
# Obtendo dados crus de endereço

dataset = pd.read_csv("201906AGENCIAS.CSV")
dset = dataset.iloc[:-2,4:10].values  # tipo nparray object
pd_dset = pd.DataFrame(dset) # caso queira visualizar

pd_dset['Join'] = pd_dset[pd_dset.columns[0:]].apply(
    lambda x: ';'.join(x.dropna().astype(str)),
    axis=1
)
pd_dset.head(5)

Unnamed: 0,0,1,2,3,4,5,Join
0,"R.GUILHERME MOREIRA,315","SUBLOJA,LOJA E 2.ANDAR ...",CENTRO,69005-300,MANAUS ...,AM,"R.GUILHERME MOREIRA,315 ;SUBLO..."
1,"AV.PRES.VARGAS,248",1.E 2.ANDARES ...,CAMPINA,66010-900,BELEM ...,PA,"AV.PRES.VARGAS,248 ;1.E 2..."
2,"R.QUINZE DE NOVEMBRO,195",...,CENTRO,11010-908,SANTOS ...,SP,"R.QUINZE DE NOVEMBRO,195 ; ..."
3,"PCA.DAS QUATRO JORNADAS,11",MEZANINO ...,CENTRO,28010-000,CAMPOS DOS GOYTACAZES ...,RJ,"PCA.DAS QUATRO JORNADAS,11 ;MEZAN..."
4,"SEXTA AVENIDA,600",SECRETARIA DA EDUCACAO-TERREO ...,CAB,41745-002,SALVADOR ...,BA,"SEXTA AVENIDA,600 ;SECRE..."


In [3]:
# Tratando endereço completo do DataFrame
dset = np.array(pd_dset)
end_lista = []

for i in range(len(dset)):
    str_raw = dset[i][6]
    str_tratada = re.sub(r'[ ]{2,}', "",str_raw) # Tirando espaços excedentes no final do endereço
    str_tratada = re.sub(r'[;]{1,}', "; ", str_tratada) # Para complementos vazios, para não ter 2 ";"
    str_tratada = str_tratada.lower()
    
    end_lista.append(str_tratada)

end_lista[:3]

['r.guilherme moreira,315; subloja,loja e 2.andar; centro; 69005-300; manaus; am',
 'av.pres.vargas,248; 1.e 2.andares; campina; 66010-900; belem; pa',
 'r.quinze de novembro,195; centro; 11010-908; santos; sp']

In [4]:
# Lógica para extrair posição do logradouro:
# Dividir a String inteira por ";", pegar o len do primeiro split
# len de LOGRA é de 0 até len do primeiro split

print(end_lista[0])
split = end_lista[0].split(";")
print(split)
print(split[0])
print(len(split[0]))

r.guilherme moreira,315; subloja,loja e 2.andar; centro; 69005-300; manaus; am
['r.guilherme moreira,315', ' subloja,loja e 2.andar', ' centro', ' 69005-300', ' manaus', ' am']
r.guilherme moreira,315
23


In [5]:
# Extraindo iob com entidade apenas de logradouro

iob = []

for i in range(len(end_lista)):
    split = end_lista[i].split(";")
    len_logra = len(split[0])
    iob_dict = {"entities": [(0, len_logra-1,'LOGRA')]}
    end_lista[i] = end_lista[i].replace(";", " ")
    end_lista[i] = end_lista[i].replace(",", " ")
    end_lista[i] = re.sub(r'[ ]{2,}', " ", end_lista[i])

    
    tupla = (end_lista[i], iob_dict)
    iob.append(tupla)

FULL_DATA = iob

In [6]:
print(FULL_DATA[0])
print(FULL_DATA[1])
print(FULL_DATA[2])
print(FULL_DATA[5:15])

('r.guilherme moreira 315 subloja loja e 2.andar centro 69005-300 manaus am', {'entities': [(0, 22, 'LOGRA')]})
('av.pres.vargas 248 1.e 2.andares campina 66010-900 belem pa', {'entities': [(0, 17, 'LOGRA')]})
('r.quinze de novembro 195 centro 11010-908 santos sp', {'entities': [(0, 23, 'LOGRA')]})
[('av.rio branco 240 1.andar recife antigo 50030-310 recife pe', {'entities': [(0, 16, 'LOGRA')]}), ('av.santos dumont 2828 5.andar aldeota 60150-162 fortaleza ce', {'entities': [(0, 20, 'LOGRA')]}), ('pca.tiradentes 410 1.andar centro 80020-100 curitiba pr', {'entities': [(0, 17, 'LOGRA')]}), ('r.uruguai 185 5.andar centro 90010-901 porto alegre rs', {'entities': [(0, 12, 'LOGRA')]}), ('pca.1817 129 1.andar centro 58013-010 joao pessoa pb', {'entities': [(0, 11, 'LOGRA')]}), ('pca.odilon resende andrade 76 centro 37410-000 tres coracoes mg', {'entities': [(0, 28, 'LOGRA')]}), ('av fernandes lima 2591 terreo farol 57057-972 maceio al', {'entities': [(0, 22, 'LOGRA')]}), ('r.treze de junho 91

In [7]:
# Criação da base de teste e treinamento

n_test= 0.1 # Porcentagem para base de teste
test_n = round(len(FULL_DATA) * n_test)

# Divisao em Train Test Val

def gerador_bases(dataset, n):
    indices_random = random.sample(range(0,len(dataset)-1), n)
    base_teste_n = []
    base_treinamento_n = []
    
    for i in range(n):
        base_teste_n.append(dataset[indices_random[i]])

    for j in range(len(dataset)):
        if(j not in indices_random):
            base_treinamento_n.append(dataset[j])
            
    return base_teste_n, base_treinamento_n


base_teste, base_treinamento = gerador_bases(FULL_DATA, test_n)

random.shuffle(base_treinamento)
random.shuffle(base_teste)

print("Treinamento: " + str(len(base_treinamento)), "\nTeste: " + str(len(base_teste)), "\nTotal: " + str(len(FULL_DATA)))

Treinamento: 19202 
Teste: 2134 
Total: 21336


In [8]:
for i in range(5):
    print(base_treinamento[i])

('rua fernando luzzatto 208 95320-000 nova prata rs', {'entities': [(0, 25, 'LOGRA')]})
('av.olinto meireles milionarios 30620-330 belo horizonte mg', {'entities': [(0, 17, 'LOGRA')]})
('av.maj.felizardo pinho pessoa 101 terreo centro 62300-000 vicosa do ceara ce', {'entities': [(0, 32, 'LOGRA')]})
('av.paschoal ardito 726 sao vito 13472-130 americana sp', {'entities': [(0, 21, 'LOGRA')]})
('rua do catete 117 parte gloria 22220-000 rio de janeiro rj', {'entities': [(0, 17, 'LOGRA')]})


# Carregando o modelo

In [14]:
# Define our variables

model = None
output_dir=Path(".")
n_iter= 100 # número de épocas
batch_size = 64

In [10]:
# Setting up the pipeline and entity recognizer.
if model is not None:
    nlp = spacy.load(model)  # load existing spacy model
    print("Loaded model '%s'" % model)
else:
    lan = 'pt'
    nlp = spacy.blank(lan)  # create blank Language class
    print("Created blank '%s' model" % lan)
    
if 'ner' not in nlp.pipe_names:
    ner = nlp.create_pipe('ner')
    nlp.add_pipe(ner)
    print('Added new NER')
else:
    ner = nlp.get_pipe('ner')
    print('Got an old NER')

Created blank 'pt' model
Added new NER


In [11]:
# create the built-in pipeline components and add them to the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
if 'ner' not in nlp.pipe_names:
    ner = nlp.create_pipe('ner')
    nlp.add_pipe(ner, last=True)
# otherwise, get it so we can add labels
else:
    ner = nlp.get_pipe('ner')

## Treinamento

In [15]:
print("Batch size: ", batch_size)
print("Épocas: ", n_iter)
print()

# add labels
for _, annotations in base_treinamento:
    for ent in annotations.get('entities'):
        ner.add_label(ent[2])


# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes):  # only train NER
    optimizer = nlp.begin_training()
    for itn in range(n_iter):
        random.shuffle(base_treinamento)
        losses = {}
        batches = minibatch(base_treinamento, size=batch_size)
        
        for batch in batches:
            texts, annotations = zip(*batch)
            try:
                nlp.update(texts, annotations, sgd=optimizer, drop=0.2,
                       losses=losses)
            except:
                pass     
        
        print(itn+1, ' Losses', losses)        

Batch size:  64
Épocas:  100

1  Losses {'ner': 760.4642904412153}
2  Losses {'ner': 687.9467526326686}
3  Losses {'ner': 571.2996361747396}
4  Losses {'ner': 584.0007234295372}
5  Losses {'ner': 709.6346988221127}
6  Losses {'ner': 677.8147405720304}
7  Losses {'ner': 751.0372388633491}
8  Losses {'ner': 654.6342283573957}
9  Losses {'ner': 807.3264651178127}
10  Losses {'ner': 648.1604003574575}
11  Losses {'ner': 725.218250008531}
12  Losses {'ner': 710.8368854340899}
13  Losses {'ner': 694.51988705475}
14  Losses {'ner': 640.6430908711286}
15  Losses {'ner': 613.810349366108}
16  Losses {'ner': 655.73425533827}
17  Losses {'ner': 590.9776571386733}
18  Losses {'ner': 563.9133672648928}
19  Losses {'ner': 445.7783034450096}
20  Losses {'ner': 662.9166887719142}
21  Losses {'ner': 563.531455903086}
22  Losses {'ner': 450.82243060089115}
23  Losses {'ner': 477.7843401953881}
24  Losses {'ner': 502.37731827908556}
25  Losses {'ner': 484.23391088189686}
26  Losses {'ner': 534.8800630784

In [16]:
# test the trained model
for text, _ in base_teste:
    doc = nlp(text)
    print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
    print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])
    print()

Entities [('av. mal rondon', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('mal', 'LOGRA', 1), ('rondon', 'LOGRA', 1), ('/', '', 2), ('rua', '', 2), ('princesa', '', 2), ('isabel', '', 2), ('centro', '', 2), ('78968-000', '', 2), ('pimenta', '', 2), ('bueno', '', 2), ('ro', '', 2)]

Entities [('r frei caneca 569', 'LOGRA')]
Tokens [('r', 'LOGRA', 3), ('frei', 'LOGRA', 1), ('caneca', 'LOGRA', 1), ('569', 'LOGRA', 1), ('bela', '', 2), ('vista', '', 2), ('01307-001', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('av. governador carlos de lima cavalcante num 4100', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('governador', 'LOGRA', 1), ('carlos', 'LOGRA', 1), ('de', 'LOGRA', 1), ('lima', 'LOGRA', 1), ('cavalcante', 'LOGRA', 1), ('num', 'LOGRA', 1), ('4100', 'LOGRA', 1), ('casa', '', 2), ('caiada', '', 2), ('53040-000', '', 2), ('olinda', '', 2), ('pe', '', 2)]

Entities [('av. paulista 1429', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA

Entities [('praca eleoterio g andrade 537', 'LOGRA')]
Tokens [('praca', 'LOGRA', 3), ('eleoterio', 'LOGRA', 1), ('g', 'LOGRA', 1), ('andrade', 'LOGRA', 1), ('537', 'LOGRA', 1), ('sala', '', 2), ('3', '', 2), ('centro', '', 2), ('87250-000', '', 2), ('peabiru', '', 2), ('pr', '', 2)]

Entities [('av. brigadeiro lima e silva 1685', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('brigadeiro', 'LOGRA', 1), ('lima', 'LOGRA', 1), ('e', 'LOGRA', 1), ('silva', 'LOGRA', 1), ('1685', 'LOGRA', 1), ('centro', '', 2), ('25070-235', '', 2), ('duque', '', 2), ('de', '', 2), ('caxias', '', 2), ('rj', '', 2)]

Entities [('praca costa pereira', 'LOGRA')]
Tokens [('praca', 'LOGRA', 3), ('costa', 'LOGRA', 1), ('pereira', 'LOGRA', 1), ('53', '', 2), ('-', '', 2), ('salas', '', 2), ('501/505', '', 2), ('(', '', 2), ('parte', '', 2), (')', '', 2), ('centro', '', 2), ('29010-080', '', 2), ('vitoria', '', 2), ('es', '', 2)]

Entities [('av japao', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('japao', 'LOG

Entities [('r do humaita', 'LOGRA')]
Tokens [('r', 'LOGRA', 3), ('do', 'LOGRA', 1), ('humaita', 'LOGRA', 1), ('humaita', '', 2), ('22261-005', '', 2), ('rio', '', 2), ('de', '', 2), ('janeiro', '', 2), ('rj', '', 2)]

Entities [('campus 1 - s/n', 'LOGRA')]
Tokens [('campus', 'LOGRA', 3), ('1', 'LOGRA', 1), ('-', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', 'LOGRA', 1), ('cidade', '', 2), ('universitaria', '', 2), ('58051-970', '', 2), ('joao', '', 2), ('pessoa', '', 2), ('pb', '', 2)]

Entities [('tv.mal.deodoro da fonseca 22', 'LOGRA')]
Tokens [('tv.mal.deodoro', 'LOGRA', 3), ('da', 'LOGRA', 1), ('fonseca', 'LOGRA', 1), ('22', 'LOGRA', 1), ('centro', '', 2), ('49600-000', '', 2), ('nossa', '', 2), ('senhora', '', 2), ('das', '', 2), ('dores', '', 2), ('se', '', 2)]

Entities [('praca cel. pedro osorio 164', 'LOGRA')]
Tokens [('praca', 'LOGRA', 3), ('cel', 'LOGRA', 1), ('.', 'LOGRA', 1), ('pedro', 'LOGRA', 1), ('osorio', 'LOGRA', 1), ('164', 'LOGRA', 1), ('centro', '', 2), 

Entities [('rua edgar raimundo da costa 108', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('edgar', 'LOGRA', 1), ('raimundo', 'LOGRA', 1), ('da', 'LOGRA', 1), ('costa', 'LOGRA', 1), ('108', 'LOGRA', 1), ('centro', '', 2), ('16800-000', '', 2), ('mirandopolis', '', 2), ('sp', '', 2)]

Entities [('rua leopoldo freiberger 567', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('leopoldo', 'LOGRA', 1), ('freiberger', 'LOGRA', 1), ('567', 'LOGRA', 1), ('centro', '', 2), ('88160-000', '', 2), ('biguacu', '', 2), ('sc', '', 2)]

Entities [('av.general flores da cunha 1738', 'LOGRA')]
Tokens [('av.general', 'LOGRA', 3), ('flores', 'LOGRA', 1), ('da', 'LOGRA', 1), ('cunha', 'LOGRA', 1), ('1738', 'LOGRA', 1), ('vila', '', 2), ('city', '', 2), ('velha', '', 2), ('94910-003', '', 2), ('cachoeirinha', '', 2), ('rs', '', 2)]

Entities [('r.bernardo sayao 440', 'LOGRA')]
Tokens [('r.bernardo', 'LOGRA', 3), ('sayao', 'LOGRA', 1), ('440', 'LOGRA', 1), ('rod.br-010', '', 2), ('centro', '', 2), ('68675-000', '', 2), ('

Entities [('shcs qd.504 bl.b lj.55/56', 'LOGRA')]
Tokens [('shcs', 'LOGRA', 3), ('qd.504', 'LOGRA', 1), ('bl.b', 'LOGRA', 1), ('lj.55/56', 'LOGRA', 1), ('terreo', '', 2), ('e', '', 2), ('subsolo', '', 2), ('asa', '', 2), ('sul', '', 2), ('70331-525', '', 2), ('brasilia', '', 2), ('df', '', 2)]

Entities [('rua dr brandao 54', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('dr', 'LOGRA', 1), ('brandao', 'LOGRA', 1), ('54', 'LOGRA', 1), ('360305', '', 2), ('centro', '', 2), ('37400-000', '', 2), ('campanha', '', 2), ('mg', '', 2)]

Entities [('av.central 625/635', 'LOGRA')]
Tokens [('av.central', 'LOGRA', 3), ('625/635', 'LOGRA', 1), ('nuc', '', 2), ('bandeirante', '', 2), ('71710-520', '', 2), ('brasilia', '', 2), ('(', '', 2), ('nucleo', '', 2), ('bandeirante', '', 2), (')', '', 2), ('df', '', 2)]

Entities [('av. general canabarro 1103', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('general', 'LOGRA', 1), ('canabarro', 'LOGRA', 1), ('1103', 'LOGRA', 1), ('centro', '', 2), ('9901

Entities [('av. nilo pe�anha 12', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('nilo', 'LOGRA', 1), ('pe', 'LOGRA', 1), ('�', 'LOGRA', 1), ('anha', 'LOGRA', 1), ('12', 'LOGRA', 1), ('lj', '', 2), ('slj', '', 2), ('ssl', '', 2), ('centro', '', 2), ('20020-100', '', 2), ('rio', '', 2), ('de', '', 2), ('janeiro', '', 2), ('rj', '', 2)]

Entities [('estrada do coqueiro grande 2025', 'LOGRA')]
Tokens [('estrada', 'LOGRA', 3), ('do', 'LOGRA', 1), ('coqueiro', 'LOGRA', 1), ('grande', 'LOGRA', 1), ('2025', 'LOGRA', 1), ('fazenda', '', 2), ('grande', '', 2), ('ii', '', 2), ('cajazeiras', '', 2), ('41340-050', '', 2), ('salvador', '', 2), ('ba', '', 2)]

Entities [('rua frei matias tevis 280', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('frei', 'LOGRA', 1), ('matias', 'LOGRA', 1), ('tevis', 'LOGRA', 1), ('280', 'LOGRA', 1), ('lojas', '', 2), ('02', '', 2), ('03', '', 2), ('04', '', 2), ('07', '', 2), ('ilha', '', 2), ('do', '', 2), ('leite', '', 2), ('50070-450', '', 2), ('recife', '', 

Entities [('r.dr.pedro cortes s/', 'LOGRA')]
Tokens [('r.dr.pedro', 'LOGRA', 3), ('cortes', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', '', 2), ('centro', '', 2), ('44340-000', '', 2), ('muritiba', '', 2), ('ba', '', 2)]

Entities [('av capitao gomes 231', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('capitao', 'LOGRA', 1), ('gomes', 'LOGRA', 1), ('231', 'LOGRA', 1), ('237', '', 2), ('boa', '', 2), ('vista', '', 2), ('37505-028', '', 2), ('itajuba', '', 2), ('mg', '', 2)]

Entities [('setor com. norte', 'LOGRA')]
Tokens [('setor', 'LOGRA', 3), ('com', 'LOGRA', 1), ('.', 'LOGRA', 1), ('norte', 'LOGRA', 1), ('qd', '', 2), ('01', '', 2), ('bl', '', 2), ('c', '', 2), ('modulo', '', 2), ('b', '', 2), ('ed', '', 2), ('brasilia', '', 2), ('asa', '', 2), ('norte', '', 2), ('70711-902', '', 2), ('brasilia', '', 2), ('df', '', 2)]

Entities [('r gomes de carvalho 1195', 'LOGRA')]
Tokens [('r', 'LOGRA', 3), ('gomes', 'LOGRA', 1), ('de', 'LOGRA', 1), ('carvalho', 'LOGRA', 1), ('1195', 'LOGR

Entities [('av. senador lemos 1268', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('senador', 'LOGRA', 1), ('lemos', 'LOGRA', 1), ('1268', 'LOGRA', 1), ('telegrafo', '', 2), ('66050-000', '', 2), ('belem', '', 2), ('pa', '', 2)]

Entities [('avenida vereador jo�o batista fitipaldi', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('vereador', 'LOGRA', 1), ('jo', 'LOGRA', 1), ('�', 'LOGRA', 1), ('o', 'LOGRA', 1), ('batista', 'LOGRA', 1), ('fitipaldi', 'LOGRA', 1), ('vila', '', 2), ('maluf', '', 2), ('08685-000', '', 2), ('suzano', '', 2), ('sp', '', 2)]

Entities []
Tokens [(' ', '', 2), ('est', '', 2), ('m', '', 2), ('boi', '', 2), ('mirim', '', 2), ('909/913', '', 2), ('jardim', '', 2), ('das', '', 2), ('flores', '', 2), ('04905-020', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('praca coronel anibal castanha 168', 'LOGRA')]
Tokens [('praca', 'LOGRA', 3), ('coronel', 'LOGRA', 1), ('anibal', 'LOGRA', 1), ('castanha', 'LOGRA', 1), ('168', 'LOGRA', 1), ('ce

Entities [('rua sao clemente 206', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('sao', 'LOGRA', 1), ('clemente', 'LOGRA', 1), ('206', 'LOGRA', 1), ('loja', '', 2), ('a', '', 2), ('esq.c', '', 2), ('/', '', 2), ('r.eduardo', '', 2), ('guinle', '', 2), ('centro', '', 2), ('22260-000', '', 2), ('rio', '', 2), ('de', '', 2), ('janeiro', '', 2), ('rj', '', 2)]

Entities [('estrada francisco da cruz nunes 9.121', 'LOGRA')]
Tokens [('estrada', 'LOGRA', 3), ('francisco', 'LOGRA', 1), ('da', 'LOGRA', 1), ('cruz', 'LOGRA', 1), ('nunes', 'LOGRA', 1), ('9.121', 'LOGRA', 1), ('lojas', '', 2), ('101/102', '', 2), ('e', '', 2), ('201', '', 2), ('piratininga', '', 2), ('24350-310', '', 2), ('niteroi', '', 2), ('rj', '', 2)]

Entities [('rua santa iria 242', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('santa', 'LOGRA', 1), ('iria', 'LOGRA', 1), ('242', 'LOGRA', 1), ('centro', '', 2), ('14815-000', '', 2), ('ibate', '', 2), ('sp', '', 2)]

Entities [('rua vinte e um de abril 120/160', 'LOGRA')]
Tokens [('rua', '

Entities [('av. dr. joao candido 266', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('dr', 'LOGRA', 1), ('.', 'LOGRA', 1), ('joao', 'LOGRA', 1), ('candido', 'LOGRA', 1), ('266', 'LOGRA', 1), ('centro', '', 2), ('83280-000', '', 2), ('guaratuba', '', 2), ('pr', '', 2)]

Entities [('av.penha de franca 549', 'LOGRA')]
Tokens [('av.penha', 'LOGRA', 3), ('de', 'LOGRA', 1), ('franca', 'LOGRA', 1), ('549', 'LOGRA', 1), ('penha', '', 2), ('de', '', 2), ('franca', '', 2), ('03606-010', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('rua frederico michaelsen 443', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('frederico', 'LOGRA', 1), ('michaelsen', 'LOGRA', 1), ('443', 'LOGRA', 1), ('centro', '', 2), ('95150-000', '', 2), ('nova', '', 2), ('petropolis', '', 2), ('rs', '', 2)]

Entities [('rua waldemar cartarozzi 9', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('waldemar', 'LOGRA', 1), ('cartarozzi', 'LOGRA', 1), ('9', 'LOGRA', 1), ('centro', '', 2), ('13920-000', '', 2), 

Entities [('av. porto seguro 510', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('porto', 'LOGRA', 1), ('seguro', 'LOGRA', 1), ('510', 'LOGRA', 1), ('eunapolis', '', 2), ('45825-000', '', 2), ('eunapolis', '', 2), ('ba', '', 2)]

Entities [('av.comendador alexandrino garcia 1610', 'LOGRA')]
Tokens [('av.comendador', 'LOGRA', 3), ('alexandrino', 'LOGRA', 1), ('garcia', 'LOGRA', 1), ('1610', 'LOGRA', 1), ('marta', '', 2), ('helena', '', 2), ('38405-323', '', 2), ('uberlandia', '', 2), ('mg', '', 2)]

Entities [('av. couto de magalh�es 2.815', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('couto', 'LOGRA', 1), ('de', 'LOGRA', 1), ('magalh', 'LOGRA', 1), ('�', 'LOGRA', 1), ('es', 'LOGRA', 1), ('2.815', 'LOGRA', 1), ('centro', '', 2), ('norte', '', 2), ('78110-400', '', 2), ('varzea', '', 2), ('grande', '', 2), ('mt', '', 2)]

Entities [('rua. itingussu 69', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('.', 'LOGRA', 1), ('itingussu', 'LOGRA', 1), ('69', 'LOGRA', 1), ('vil


Entities [('rua eleonora cintra 111', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('eleonora', 'LOGRA', 1), ('cintra', 'LOGRA', 1), ('111', 'LOGRA', 1), ('vila', '', 2), ('regente', '', 2), ('feij', '', 2), ('�', '', 2), ('03337-000', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('avenida santos dumont n. 2750', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('santos', 'LOGRA', 1), ('dumont', 'LOGRA', 1), ('n.', 'LOGRA', 1), ('2750', 'LOGRA', 1), ('aldeota', '', 2), ('60150-161', '', 2), ('fortaleza', '', 2), ('ce', '', 2)]

Entities [('pca.min.salgado filho s/', 'LOGRA')]
Tokens [('pca.min.salgado', 'LOGRA', 3), ('filho', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', '', 2), ('terreo', '', 2), ('portao', '', 2), ('a2', '', 2), ('aeroporto', '', 2), ('imbiribeira', '', 2), ('51210-902', '', 2), ('recife', '', 2), ('pe', '', 2)]

Entities [('rua guilherme silva 183', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('guilherme', 'LOGRA', 1), ('silva', 'LOGRA', 1), ('183'

Entities [('av.municipal 254 256 e 266', 'LOGRA')]
Tokens [('av.municipal', 'LOGRA', 3), ('254', 'LOGRA', 1), ('256', 'LOGRA', 1), ('e', 'LOGRA', 1), ('266', 'LOGRA', 1), ('jardim', '', 2), ('silveira', '', 2), ('06433-000', '', 2), ('barueri', '', 2), ('sp', '', 2)]

Entities [('av.quatro de julho 431', 'LOGRA')]
Tokens [('av.quatro', 'LOGRA', 3), ('de', 'LOGRA', 1), ('julho', 'LOGRA', 1), ('431', 'LOGRA', 1), ('centro', '', 2), ('64260-000', '', 2), ('piripiri', '', 2), ('pi', '', 2)]

Entities [('av inglaterra 680', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('inglaterra', 'LOGRA', 1), ('680', 'LOGRA', 1), ('centro', '', 2), ('86181-000', '', 2), ('cambe', '', 2), ('pr', '', 2)]

Entities [('r.das flores 3', 'LOGRA')]
Tokens [('r.das', 'LOGRA', 3), ('flores', 'LOGRA', 1), ('3', 'LOGRA', 1), ('centro', '', 2), ('73860-000', '', 2), ('sao', '', 2), ('domingos', '', 2), ('go', '', 2)]

Entities [('rua do a��car 363', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('do', 'LOGRA', 1), ('a', 'LOGRA', 

Entities [('quadra qnm 34 area especial 1 s/', 'LOGRA')]
Tokens [('quadra', 'LOGRA', 3), ('qnm', 'LOGRA', 1), ('34', 'LOGRA', 1), ('area', 'LOGRA', 1), ('especial', 'LOGRA', 1), ('1', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', '', 2), ('lj', '', 2), ('100/101', '', 2), ('taguatinga', '', 2), ('norte', '', 2), ('72145-450', '', 2), ('brasilia', '', 2), ('(', '', 2), ('taguatinga', '', 2), (')', '', 2), ('df', '', 2)]

Entities [('av. nossa sra.dos navegantes 675', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('nossa', 'LOGRA', 1), ('sra.dos', 'LOGRA', 1), ('navegantes', 'LOGRA', 1), ('675', 'LOGRA', 1), ('lotes', '', 2), ('1', '', 2), ('e', '', 2), ('2', '', 2), ('enseada', '', 2), ('da', '', 2), ('sua', '', 2), ('29050-420', '', 2), ('vitoria', '', 2), ('es', '', 2)]

Entities [('r.barao do rio branco 390-a', 'LOGRA')]
Tokens [('r.barao', 'LOGRA', 3), ('do', 'LOGRA', 1), ('rio', 'LOGRA', 1), ('branco', 'LOGRA', 1), ('390-a', 'LOGRA', 1), ('centro', '', 2), ('1

Entities [('qne 27 lote 10 loja 1', 'LOGRA')]
Tokens [('qne', 'LOGRA', 3), ('27', 'LOGRA', 1), ('lote', 'LOGRA', 1), ('10', 'LOGRA', 1), ('loja', 'LOGRA', 1), ('1', 'LOGRA', 1), ('taguatinga', '', 2), ('norte', '', 2), ('72125-270', '', 2), ('brasilia', '', 2), ('(', '', 2), ('taguatinga', '', 2), (')', '', 2), ('df', '', 2)]

Entities [('av santos dumont 1� andar', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('santos', 'LOGRA', 1), ('dumont', 'LOGRA', 1), ('1', 'LOGRA', 1), ('�', 'LOGRA', 1), ('andar', 'LOGRA', 1), ('aldeota', '', 2), ('60150-165', '', 2), ('fortaleza', '', 2), ('ce', '', 2)]

Entities [('r miguel calmon', 'LOGRA')]
Tokens [('r', 'LOGRA', 3), ('miguel', 'LOGRA', 1), ('calmon', 'LOGRA', 1), ('sobreloja', '', 2), ('comercio', '', 2), ('40015-901', '', 2), ('salvador', '', 2), ('ba', '', 2)]

Entities [('avenida stara 519', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('stara', 'LOGRA', 1), ('519', 'LOGRA', 1), ('sala', '', 2), ('a', '', 2), ('99470-000', '', 2), ('nao-me-toque'

Entities [('av. major amarantes', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('major', 'LOGRA', 1), ('amarantes', 'LOGRA', 1), ('centro', '', 2), ('76980-000', '', 2), ('vilhena', '', 2), ('ro', '', 2)]

Entities [('r visconde do rio branco sn', 'LOGRA')]
Tokens [('r', 'LOGRA', 3), ('visconde', 'LOGRA', 1), ('do', 'LOGRA', 1), ('rio', 'LOGRA', 1), ('branco', 'LOGRA', 1), ('sn', 'LOGRA', 1), ('centro', '', 2), ('46500-000', '', 2), ('macaubas', '', 2), ('ba', '', 2)]

Entities [('rua prudente de morais 332', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('prudente', 'LOGRA', 1), ('de', 'LOGRA', 1), ('morais', 'LOGRA', 1), ('332', 'LOGRA', 1), ('centro', '', 2), ('15900-000', '', 2), ('taquaritinga', '', 2), ('sp', '', 2)]

Entities [('av professor roberto frade monte', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('professor', 'LOGRA', 1), ('roberto', 'LOGRA', 1), ('frade', 'LOGRA', 1), ('monte', 'LOGRA', 1), ('marieta', '', 2), ('14783-226', '', 2), ('barretos', '', 2), ('sp', '', 2)]


Entities [('rua dr. alfredo backer', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('dr', 'LOGRA', 1), ('.', 'LOGRA', 1), ('alfredo', 'LOGRA', 1), ('backer', 'LOGRA', 1), ('alcantara', '', 2), ('24452-005', '', 2), ('sao', '', 2), ('goncalo', '', 2), ('rj', '', 2)]

Entities [('av. imperatriz leopoldina 640', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('imperatriz', 'LOGRA', 1), ('leopoldina', 'LOGRA', 1), ('640', 'LOGRA', 1), ('vila', '', 2), ('nova', '', 2), ('13073-010', '', 2), ('campinas', '', 2), ('sp', '', 2)]

Entities [('praca dr. souto soares s/n', 'LOGRA')]
Tokens [('praca', 'LOGRA', 3), ('dr', 'LOGRA', 1), ('.', 'LOGRA', 1), ('souto', 'LOGRA', 1), ('soares', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', 'LOGRA', 1), ('.', '', 2), ('centro', '', 2), ('46930-000', '', 2), ('palmeiras', '', 2), ('ba', '', 2)]

Entities [('av.dom pedro segundo s/', 'LOGRA')]
Tokens [('av.dom', 'LOGRA', 3), ('pedro', 'LOGRA', 1), ('segundo', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/'

Entities [('r.otilia barichelo zardo 97', 'LOGRA')]
Tokens [('r.otilia', 'LOGRA', 3), ('barichelo', 'LOGRA', 1), ('zardo', 'LOGRA', 1), ('97', 'LOGRA', 1), ('centro', '', 2), ('89590-000', '', 2), ('arroio', '', 2), ('trinta', '', 2), ('sc', '', 2)]

Entities [('av.goias 980', 'LOGRA')]
Tokens [('av.goias', 'LOGRA', 3), ('980', 'LOGRA', 1), ('2.andar', '', 2), ('st', '', 2), ('central', '', 2), ('74010-010', '', 2), ('goiania', '', 2), ('go', '', 2)]

Entities [('rua deputado ribeiro de resende 287', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('deputado', 'LOGRA', 1), ('ribeiro', 'LOGRA', 1), ('de', 'LOGRA', 1), ('resende', 'LOGRA', 1), ('287', 'LOGRA', 1), ('centro', '', 2), ('37002-100', '', 2), ('varginha', '', 2), ('mg', '', 2)]

Entities [('av. dr. epitacio pessoa 110', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('dr', 'LOGRA', 1), ('.', 'LOGRA', 1), ('epitacio', 'LOGRA', 1), ('pessoa', 'LOGRA', 1), ('110', 'LOGRA', 1), ('boqueir', '', 2), ('�', '', 2), ('o', '', 2), ('1

Entities [('r sete de setembro', 'LOGRA')]
Tokens [('r', 'LOGRA', 3), ('sete', 'LOGRA', 1), ('de', 'LOGRA', 1), ('setembro', 'LOGRA', 1), ('centro', '', 2), ('13465-320', '', 2), ('americana', '', 2), ('sp', '', 2)]

Entities [('rua bom jesus 18 and cj 1.805', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('bom', 'LOGRA', 1), ('jesus', 'LOGRA', 1), ('18', 'LOGRA', 1), ('and', 'LOGRA', 1), ('cj', 'LOGRA', 1), ('1.805', 'LOGRA', 1), ('ed', '', 2), ('ar', '', 2), ('3000', '', 2), ('cabral', '', 2), ('corp', '', 2), ('&', '', 2), ('office', '', 2), ('juvev', '', 2), ('�', '', 2), ('80035-010', '', 2), ('curitiba', '', 2), ('pr', '', 2)]

Entities [('r. sete de setembro', 'LOGRA')]
Tokens [('r.', 'LOGRA', 3), ('sete', 'LOGRA', 1), ('de', 'LOGRA', 1), ('setembro', 'LOGRA', 1), ('lote', '', 2), ('26', '', 2), ('a', '', 2), ('29', '', 2), ('qd', '', 2), ('85', '', 2), ('centro', '', 2), ('68540-000', '', 2), ('conceicao', '', 2), ('do', '', 2), ('araguaia', '', 2), ('pa', '', 2)]

Entities [('r.laure

Entities [('rua da assembleia ss', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('da', 'LOGRA', 1), ('assembleia', 'LOGRA', 1), ('ss', 'LOGRA', 1), ('lj', '', 2), ('a', '', 2), ('slj', '', 2), ('centro', '', 2), ('20011-000', '', 2), ('rio', '', 2), ('de', '', 2), ('janeiro', '', 2), ('rj', '', 2)]

Entities [('rua alvaro gomes de castro', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('alvaro', 'LOGRA', 1), ('gomes', 'LOGRA', 1), ('de', 'LOGRA', 1), ('castro', 'LOGRA', 1), ('loja', '', 2), ('j', '', 2), ('porto', '', 2), ('seco', '', 2), ('piraj', '', 2), ('�', '', 2), ('00000-000', '', 2), ('salvador', '', 2), ('ba', '', 2)]

Entities [('rua moretti foggia', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('moretti', 'LOGRA', 1), ('foggia', 'LOGRA', 1), ('qd', '', 2), ('.', '', 2), ('10', '', 2), ('lt', '', 2), ('.', '', 2), ('13', '', 2), ('n', '', 2), ('�', '', 2), ('07', '', 2), ('setor', '', 2), ('01', '', 2), ('centro', '', 2), ('76600-000', '', 2), ('goias', '', 2), ('go', '', 2)]

Entities [('av vi

Entities [('av marechal floriano peixoto', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('marechal', 'LOGRA', 1), ('floriano', 'LOGRA', 1), ('peixoto', 'LOGRA', 1), ('lj', '', 2), ('5', '', 2), ('hauer', '', 2), ('81630-000', '', 2), ('curitiba', '', 2), ('pr', '', 2)]

Entities [('r.portela salles 13', 'LOGRA')]
Tokens [('r.portela', 'LOGRA', 3), ('salles', 'LOGRA', 1), ('13', 'LOGRA', 1), ('centro', '', 2), ('28250-000', '', 2), ('italva', '', 2), ('rj', '', 2)]

Entities [('av.leandro de almeida', 'LOGRA')]
Tokens [('av.leandro', 'LOGRA', 3), ('de', 'LOGRA', 1), ('almeida', 'LOGRA', 1), ('centro', '', 2), ('96750-000', '', 2), ('butia', '', 2), ('rs', '', 2)]

Entities [('rua nossa senhora da lapa 432', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('nossa', 'LOGRA', 1), ('senhora', 'LOGRA', 1), ('da', 'LOGRA', 1), ('lapa', 'LOGRA', 1), ('432', 'LOGRA', 1), ('lapa', '', 2), ('05072-000', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('rua maranh�o pinheiro', 'LOGRA')]
Tokens

Entities [('av. champagnat 1089', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('champagnat', 'LOGRA', 1), ('1089', 'LOGRA', 1), ('360305', '', 2), ('centro', '', 2), ('29100-011', '', 2), ('vila', '', 2), ('velha', '', 2), ('es', '', 2)]

Entities [('rua alceu amoroso lima', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('alceu', 'LOGRA', 1), ('amoroso', 'LOGRA', 1), ('lima', 'LOGRA', 1), ('loja', '', 2), ('01', '', 2), ('caminho', '', 2), ('das', '', 2), ('arvores', '', 2), ('41820-770', '', 2), ('salvador', '', 2), ('ba', '', 2)]

Entities [('av. julio antonio thruler 260', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('julio', 'LOGRA', 1), ('antonio', 'LOGRA', 1), ('thruler', 'LOGRA', 1), ('260', 'LOGRA', 1), ('lojas', '', 2), ('3/8', '', 2), ('olaria', '', 2), ('28620-000', '', 2), ('nova', '', 2), ('friburgo', '', 2), ('rj', '', 2)]

Entities [('estrada do mendanha lot', 'LOGRA')]
Tokens [('estrada', 'LOGRA', 3), ('do', 'LOGRA', 1), ('mendanha', 'LOGRA', 1), ('lo

Entities [('avenida dos bragas 115', 'LOGRA')]
Tokens [('avenida', 'LOGRA', 3), ('dos', 'LOGRA', 1), ('bragas', 'LOGRA', 1), ('115', 'LOGRA', 1), ('centro', '', 2), ('39700-000', '', 2), ('pecanha', '', 2), ('mg', '', 2)]

Entities [('av.sao benedito s/', 'LOGRA')]
Tokens [('av.sao', 'LOGRA', 3), ('benedito', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', '', 2), ('centro', '', 2), ('68300-970', '', 2), ('gurupa', '', 2), ('pa', '', 2)]

Entities [('av. 9 de julho 1249', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('9', 'LOGRA', 1), ('de', 'LOGRA', 1), ('julho', 'LOGRA', 1), ('1249', 'LOGRA', 1), ('centro', '', 2), ('15690-000', '', 2), ('indiapora', '', 2), ('sp', '', 2)]

Entities [('rua chapot prevost 158', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('chapot', 'LOGRA', 1), ('prevost', 'LOGRA', 1), ('158', 'LOGRA', 1), ('centro', '', 2), ('28500-000', '', 2), ('cantagalo', '', 2), ('rj', '', 2)]

Entities [('rua aristides lobo 243', 'LOGRA')]
Tokens [('rua', 'LOGRA

Entities [('av. borges leal s/n�', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('borges', 'LOGRA', 1), ('leal', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', 'LOGRA', 1), ('�', 'LOGRA', 1), ('aparecida', '', 2), ('68040-080', '', 2), ('santarem', '', 2), ('pa', '', 2)]

Entities [('pca.dr.jose goncalves 210', 'LOGRA')]
Tokens [('pca.dr.jose', 'LOGRA', 3), ('goncalves', 'LOGRA', 1), ('210', 'LOGRA', 1), ('centro', '', 2), ('48970-000', '', 2), ('senhor', '', 2), ('do', '', 2), ('bonfim', '', 2), ('ba', '', 2)]

Entities [('rua sete de setembro', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('sete', 'LOGRA', 1), ('de', 'LOGRA', 1), ('setembro', 'LOGRA', 1), ('centro', '', 2), ('90010-191', '', 2), ('porto', '', 2), ('alegre', '', 2), ('rs', '', 2)]

Entities [('rua antonio jos� dos santos 541', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('antonio', 'LOGRA', 1), ('jos', 'LOGRA', 1), ('�', 'LOGRA', 1), ('dos', 'LOGRA', 1), ('santos', 'LOGRA', 1), ('541', 'LOGRA', 1), ('ceu', 

Entities [('av.doutor joao pinheiro 2911', 'LOGRA')]
Tokens [('av.doutor', 'LOGRA', 3), ('joao', 'LOGRA', 1), ('pinheiro', 'LOGRA', 1), ('2911', 'LOGRA', 1), ('santo', '', 2), ('antonio', '', 2), ('34800-000', '', 2), ('caete', '', 2), ('mg', '', 2)]

Entities [('av. c 255 - quadra 586 - lote 6 s/n', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('c', 'LOGRA', 1), ('255', 'LOGRA', 1), ('-', 'LOGRA', 1), ('quadra', 'LOGRA', 1), ('586', 'LOGRA', 1), ('-', 'LOGRA', 1), ('lote', 'LOGRA', 1), ('6', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', 'LOGRA', 1), ('.', '', 2), ('nova', '', 2), ('suica', '', 2), ('74280-010', '', 2), ('goiania', '', 2), ('go', '', 2)]

Entities [('av. duque de caxias 1874', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('duque', 'LOGRA', 1), ('de', 'LOGRA', 1), ('caxias', 'LOGRA', 1), ('1874', 'LOGRA', 1), ('vila', '', 2), ('brasil', '', 2), ('86010-200', '', 2), ('londrina', '', 2), ('pr', '', 2)]

Entities [('av.castro alves 465',

Entities [('av. jo�o gomes vieira 28', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('jo', 'LOGRA', 1), ('�', 'LOGRA', 1), ('o', 'LOGRA', 1), ('gomes', 'LOGRA', 1), ('vieira', 'LOGRA', 1), ('28', 'LOGRA', 1), ('centro', '', 2), ('35277-000', '', 2), ('sao', '', 2), ('joao', '', 2), ('do', '', 2), ('manteninha', '', 2), ('mg', '', 2)]

Entities [('alameda joaquim eugenio de lima 133', 'LOGRA')]
Tokens [('alameda', 'LOGRA', 3), ('joaquim', 'LOGRA', 1), ('eugenio', 'LOGRA', 1), ('de', 'LOGRA', 1), ('lima', 'LOGRA', 1), ('133', 'LOGRA', 1), ('bela', '', 2), ('vista', '', 2), ('01403-001', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('av. vicente ferreira 539', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('vicente', 'LOGRA', 1), ('ferreira', 'LOGRA', 1), ('539', 'LOGRA', 1), ('centro', '', 2), ('75555-000', '', 2), ('vicentinopolis', '', 2), ('go', '', 2)]

Entities [('rua padre francisco', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('padre', 'LO

Entities [('r.da rodoviaria s/n', 'LOGRA')]
Tokens [('r.da', 'LOGRA', 3), ('rodoviaria', 'LOGRA', 1), ('s', 'LOGRA', 1), ('/', 'LOGRA', 1), ('n', 'LOGRA', 1), ('terminal', '', 2), ('rodoviario', '', 2), ('centro', '', 2), ('42800-400', '', 2), ('camacari', '', 2), ('ba', '', 2)]

Entities [('av.sao carlos 1040', 'LOGRA')]
Tokens [('av.sao', 'LOGRA', 3), ('carlos', 'LOGRA', 1), ('1040', 'LOGRA', 1), ('centro', '', 2), ('13560-010', '', 2), ('sao', '', 2), ('carlos', '', 2), ('sp', '', 2)]

Entities [('estrada da gavea 899 lj.103', 'LOGRA')]
Tokens [('estrada', 'LOGRA', 3), ('da', 'LOGRA', 1), ('gavea', 'LOGRA', 1), ('899', 'LOGRA', 1), ('lj.103', 'LOGRA', 1), ('sao', '', 2), ('conrado', '', 2), ('22619-900', '', 2), ('rio', '', 2), ('de', '', 2), ('janeiro', '', 2), ('rj', '', 2)]

Entities [('av.santa marina 2651', 'LOGRA')]
Tokens [('av.santa', 'LOGRA', 3), ('marina', 'LOGRA', 1), ('2651', 'LOGRA', 1), ('conjunto', '', 2), ('2', '', 2), ('vila', '', 2), ('albertina', '', 2), ('02732-0

Entities [('av. ver.dr.jose marques penteado 446', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('ver.dr.jose', 'LOGRA', 1), ('marques', 'LOGRA', 1), ('penteado', 'LOGRA', 1), ('446', 'LOGRA', 1), ('centro', '', 2), ('18560-000', '', 2), ('ipero', '', 2), ('sp', '', 2)]

Entities [('av portugal 2949', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('portugal', 'LOGRA', 1), ('2949', 'LOGRA', 1), ('sta', '', 2), ('amelia', '', 2), ('31555-000', '', 2), ('belo', '', 2), ('horizonte', '', 2), ('mg', '', 2)]

Entities [('rua dos andradas 449', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('dos', 'LOGRA', 1), ('andradas', 'LOGRA', 1), ('449', 'LOGRA', 1), ('centro', '', 2), ('97573-001', '', 2), ('santana', '', 2), ('do', '', 2), ('livramento', '', 2), ('rs', '', 2)]

Entities [('av.principal 479 cj.joao alves filho', 'LOGRA')]
Tokens [('av.principal', 'LOGRA', 3), ('479', 'LOGRA', 1), ('cj.joao', 'LOGRA', 1), ('alves', 'LOGRA', 1), ('filho', 'LOGRA', 1), ('taicoca', '', 2), ('49160-000', '', 2

Entities [('av.nove de abril 2246', 'LOGRA')]
Tokens [('av.nove', 'LOGRA', 3), ('de', 'LOGRA', 1), ('abril', 'LOGRA', 1), ('2246', 'LOGRA', 1), ('centro', '', 2), ('11510-003', '', 2), ('cubatao', '', 2), ('sp', '', 2)]

Entities [('av academia de sao paulo 310 e 312', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('academia', 'LOGRA', 1), ('de', 'LOGRA', 1), ('sao', 'LOGRA', 1), ('paulo', 'LOGRA', 1), ('310', 'LOGRA', 1), ('e', 'LOGRA', 1), ('312', 'LOGRA', 1), ('jardim', '', 2), ('camargo', '', 2), ('novo', '', 2), ('08121-400', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('r.duque de caxias 438', 'LOGRA')]
Tokens [('r.duque', 'LOGRA', 3), ('de', 'LOGRA', 1), ('caxias', 'LOGRA', 1), ('438', 'LOGRA', 1), ('centro', '', 2), ('19300-000', '', 2), ('presidente', '', 2), ('bernardes', '', 2), ('sp', '', 2)]

Entities [('rua prudente de morais', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('prudente', 'LOGRA', 1), ('de', 'LOGRA', 1), ('morais', 'LOGRA', 1), ('abadia', '', 2), ('

Entities [('av.sapopemba 2969', 'LOGRA')]
Tokens [('av.sapopemba', 'LOGRA', 3), ('2969', 'LOGRA', 1), ('v.regente', '', 2), ('feijo', '', 2), ('03345-001', '', 2), ('sao', '', 2), ('paulo', '', 2), ('sp', '', 2)]

Entities [('rua floriano peixoto 131', 'LOGRA')]
Tokens [('rua', 'LOGRA', 3), ('floriano', 'LOGRA', 1), ('peixoto', 'LOGRA', 1), ('131', 'LOGRA', 1), ('sto', '', 2), ('antonio', '', 2), ('50020-060', '', 2), ('recife', '', 2), ('pe', '', 2)]

Entities [('av.vinte e cinco de abril 787', 'LOGRA')]
Tokens [('av.vinte', 'LOGRA', 3), ('e', 'LOGRA', 1), ('cinco', 'LOGRA', 1), ('de', 'LOGRA', 1), ('abril', 'LOGRA', 1), ('787', 'LOGRA', 1), ('sala', '', 2), ('1', '', 2), ('centro', '', 2), ('99370-000', '', 2), ('fontoura', '', 2), ('xavier', '', 2), ('rs', '', 2)]

Entities [('av. iguacu 2345', 'LOGRA')]
Tokens [('av', 'LOGRA', 3), ('.', 'LOGRA', 1), ('iguacu', 'LOGRA', 1), ('2345', 'LOGRA', 1), ('agua', '', 2), ('verde', '', 2), ('80240-030', '', 2), ('curitiba', '', 2), ('pr', '',

In [20]:
phrase = "SHVP Rua 12 435 casa 22 lote 1"

doc = nlp(phrase)
print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])

Entities [('SHVP Rua 12 435', 'LOGRA')]
Tokens [('SHVP', 'LOGRA', 3), ('Rua', 'LOGRA', 1), ('12', 'LOGRA', 1), ('435', 'LOGRA', 1), ('casa', '', 2), ('22', '', 2), ('lote', '', 2), ('1', '', 2)]


In [None]:
# save model to output directory
if output_dir is not None:
    output_dir = Path(output_dir)
    if not output_dir.exists():
        output_dir.mkdir()
    nlp.to_disk(output_dir)
    print("Saved model to", output_dir)

In [None]:
loaded_model = spacy.load(output_dir)
evaluate(loaded_model, base_teste_final)