Converte arquivos em .PDF para texto puro .TXT

In [45]:
%%writefile HelperFunctions/TextMining/TextMining.py
            
# 1.Extração de texto dos pdf das sentenças baixas
#
# Cria Corpus (DatasetClassification.CSV) das Sentenças do TRT1
# Le os arquivos .txt e armazena-os em uma lista e depois persiste-o em disco no formato .csv.
#
# Ver notebook CriaCorpus.ipynb
#
# corpusTRT1 = Corpus()
# corpusTRT1.setDsFolder("\home\info\MyNotebooks\SentencasTRT1\Corpus")
# corpusTRT1.createTXT()  # Salva na subpasta \txt. Podemos criar um txt para cada arquivo .pdf ou um .txt gigante com o conteudo de todos os .pdf mas isso pode gerar problemas de memoria
# corpusTRT1.createCSV()  # Salva na subpasta \csv
# corpusTRT1.createDSDict() # Salva na subpasta \DS. Pega um .csv ou uma pasta com txt e cria o DatasetDict.
#
from pdfminer3.layout import LAParams, LTTextBox
from pdfminer3.pdfpage import PDFPage
from pdfminer3.pdfinterp import PDFResourceManager
from pdfminer3.pdfinterp import PDFPageInterpreter
from pdfminer3.converter import PDFPageAggregator
from pdfminer3.converter import TextConverter
import io
import pathlib
import pandas as pd
from datasets import Dataset
from os import listdir
from os.path import isfile, join
import re
from datasets import load_dataset
    
class TM():
    
    def __init__(self):
        self.FOLDER_BASE = "/home/info/MyNotebooks/Datasets/SentencasTRT1/"
        self.PDFs_FOLDER = self.FOLDER_BASE + "PDFs/"
        self.TXTs_FOLDER = self.FOLDER_BASE + "TXTs/"
        self.CSV_FOLDER  = self.FOLDER_BASE + "CSV/"
        self.DS_FOLDER   = self.FOLDER_BASE + "DS/"
        self.ARQ_CSV     = "Corpus_Sentencas.csv"
        self.ARQ_DS      = "Corpus_Sentencas"
        
        
    # Dado um pdf especifico (PDF_FILE) le seu conteudo e retorna como texto
    def reading_pdf(self, PDF_FILE):
        resource_manager = PDFResourceManager()
        fake_file_handle = io.StringIO()
        converter = TextConverter(resource_manager, fake_file_handle, laparams=LAParams())
        # detect_vertical=False, line_margin=0.4
        page_interpreter = PDFPageInterpreter(resource_manager, converter)

        with open(PDF_FILE, 'rb') as fh:

            #for page in PDFPage.get_pages(fh, caching=True, check_extractable=True):
            for i, page in enumerate(PDFPage.get_pages(fh)):
                page_interpreter.process_page(page)

            text = fake_file_handle.getvalue()

        # close open handles
        converter.close()
        fake_file_handle.close()

        return(str(text))

    
    def getFileList(self):
        file_list = list()
        
        # Pega a lista de todos os arquivos na pasta PDFs_FOLDER
        files = [f for f in listdir(self.PDFs_FOLDER) if isfile(join(self.PDFs_FOLDER, f))]
        
        # Guarda na lista apenas aqueles arquivos que possuem extensão .PDF.
        for file in files:
            if pathlib.Path(file).suffix == ".pdf":
                file_list.append(file)
                
        return(file_list)

    
    def replacingExt(self, file_in):    
        # text = extract_text(join(folder_base, file))  # funcionou porem vem com assinatura digital na vertical
        arq = join(self.PDFs_FOLDER, file_in)
        print(arq)
        print(file_in)

        file_out = file_in
        file_out = file_out.replace(".pdf",".txt")
        file_out = join(self.TXTs_FOLDER, file_out)
        print(file_out)
        
        return(str(file_out))

    
    def removingSpeCarac(self, text_in):
        out = text_in.replace("\n", " ")
        out = out.replace("\t", " ")
        out = out.replace("\r", " ")
        out = out.replace("\f", " ")
        out = out.replace("\v", " ")
        out = re.sub(r'[^a-zA-Z0-9áéíóúÁÉÍÓÚâêîôÂÊÎÔãõÃÕçÇ: ]', '', out)
        
        return(str(out))

    
    def createTXT(self):
        # Processa os arquivos .pdf da pasta_base. Arquivos .txt servem também para o treinamento dos Tokenizadores
        
        files = self.getFileList()
     
        for file in files:
            
            arq = join(self.PDFs_FOLDER, file)
                
            text     = self.reading_pdf(arq)
            text     = self.removingSpeCarac(text)
            file_txt = self.replacingExt(file)

            with open(file_txt, 'w') as f:
                f.write(text)
            f.close()
                

Overwriting HelperFunctions/TextMining/TextMining.py


In [46]:
# 2. Criação do objeto
from HelperFunctions.TextMining.TextMining import TM
%run -i HelperFunctions/TextMining/TextMining.py
tm = TM()

In [47]:
%%time
# 3. Converção dos arquivos em .PDF para texto puro .TXT
tm.createTXT()

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01012895920185010432-DEJT-13-05-2019.pdf
01012895920185010432-DEJT-13-05-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01012895920185010432-DEJT-13-05-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01005865120175010081-DEJT-01-12-2017.pdf
01005865120175010081-DEJT-01-12-2017.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01005865120175010081-DEJT-01-12-2017.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003254520185010342-DEJT-06-06-2018.pdf
01003254520185010342-DEJT-06-06-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003254520185010342-DEJT-06-06-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01001941520185010522-DOERJ-03-04-2018.pdf
01001941520185010522-DOERJ-03-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01001941520185010522-DOERJ-03-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01013825720175010561-DEJT-08-12-2018.pdf
01013

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/00104983120155010050-DOERJ-26-07-2016.pdf
00104983120155010050-DOERJ-26-07-2016.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/00104983120155010050-DOERJ-26-07-2016.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/00106407920145010079-DEJT-06-08-2019.pdf
00106407920145010079-DEJT-06-08-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/00106407920145010079-DEJT-06-08-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01007224620175010017-DOERJ-18-04-2018.pdf
01007224620175010017-DOERJ-18-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01007224620175010017-DOERJ-18-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01006356020215010014-DEJT-27-10-2021.pdf
01006356020215010014-DEJT-27-10-2021.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01006356020215010014-DEJT-27-10-2021.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003224520185010066-DEJT-15-05-2018.pdf
01

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01001609820185010341-DEJT-13-03-2018.pdf
01001609820185010341-DEJT-13-03-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01001609820185010341-DEJT-13-03-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002450920185010075-DEJT-25-07-2018.pdf
01002450920185010075-DEJT-25-07-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002450920185010075-DEJT-25-07-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01008721020185010561-DEJT-28-05-2019.pdf
01008721020185010561-DEJT-28-05-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01008721020185010561-DEJT-28-05-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01001367020185010341-DEJT-22-03-2018.pdf
01001367020185010341-DEJT-22-03-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01001367020185010341-DEJT-22-03-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002255020185010226-DEJT-16-06-2018.pdf
01002255

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01005931120195010069-DEJT-02-08-2019.pdf
01005931120195010069-DEJT-02-08-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01005931120195010069-DEJT-02-08-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01001997720185010056-DEJT-15-05-2018.pdf
01001997720185010056-DEJT-15-05-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01001997720185010056-DEJT-15-05-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003960720185010226-DEJT-04-07-2018.pdf
01003960720185010226-DEJT-04-07-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003960720185010226-DEJT-04-07-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003822020185010421-DEJT-15-06-2018.pdf
01003822020185010421-DEJT-15-06-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003822020185010421-DEJT-15-06-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003246020185010342-DOERJ-10-05-2018.pdf
0100324

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01006946720215010040-DEJT-09-03-2022.pdf
01006946720215010040-DEJT-09-03-2022.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01006946720215010040-DEJT-09-03-2022.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002601020185010032-DEJT-14-04-2018.pdf
01002601020185010032-DEJT-14-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002601020185010032-DEJT-14-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002037720185010036-DEJT-03-04-2018.pdf
01002037720185010036-DEJT-03-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002037720185010036-DEJT-03-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003094720185010001-DEJT-25-04-2018.pdf
01003094720185010001-DEJT-25-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003094720185010001-DEJT-25-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003727320185010421-DEJT-05-11-2018.pdf
01003727

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002364220185010012-DEJT-09-04-2018.pdf
01002364220185010012-DEJT-09-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002364220185010012-DEJT-09-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01016407620185010482-DEJT-15-05-2019.pdf
01016407620185010482-DEJT-15-05-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01016407620185010482-DEJT-15-05-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002044020185010302-DEJT-06-08-2018.pdf
01002044020185010302-DEJT-06-08-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002044020185010302-DEJT-06-08-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002670220185010323-DEJT-25-09-2018.pdf
01002670220185010323-DEJT-25-09-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002670220185010323-DEJT-25-09-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01014866920175010037-DEJT-03-04-2018.pdf
01014866

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01000623620195010032-DEJT-31-08-2019.pdf
01000623620195010032-DEJT-31-08-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01000623620195010032-DEJT-31-08-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01001654820185010074-DEJT-16-07-2018.pdf
01001654820185010074-DEJT-16-07-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01001654820185010074-DEJT-16-07-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/00750000420095010045-DOERJ-02-02-2018.pdf
00750000420095010045-DOERJ-02-02-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/00750000420095010045-DOERJ-02-02-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01012003920185010431-DEJT-04-06-2019.pdf
01012003920185010431-DEJT-04-06-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01012003920185010431-DEJT-04-06-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003156120185010031-DOERJ-15-05-2018.pdf
0100

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01000506820175010201-DEJT-12-02-2017.pdf
01000506820175010201-DEJT-12-02-2017.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01000506820175010201-DEJT-12-02-2017.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003133120185010051-DEJT-08-08-2018.pdf
01003133120185010051-DEJT-08-08-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003133120185010051-DEJT-08-08-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01004415920185010501-DEJT-03-07-2018.pdf
01004415920185010501-DEJT-03-07-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01004415920185010501-DEJT-03-07-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01009473220185010017-DEJT-12-02-2019.pdf
01009473220185010017-DEJT-12-02-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01009473220185010017-DEJT-12-02-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/00090003520035010044-12-08-2016.pdf
0009000352003

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002148120185010012-DEJT-09-04-2018.pdf
01002148120185010012-DEJT-09-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002148120185010012-DEJT-09-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01001822220185010030-DEJT-24-06-2019.pdf
01001822220185010030-DEJT-24-06-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01001822220185010030-DEJT-24-06-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01013949320165010080-DEJT-26-05-2017.pdf
01013949320165010080-DEJT-26-05-2017.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01013949320165010080-DEJT-26-05-2017.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/00116443720145010020-DOERJ-21-07-2016.pdf
00116443720145010020-DOERJ-21-07-2016.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/00116443720145010020-DOERJ-21-07-2016.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01695004820025010032-DEJT-19-06-2019.pdf
01695

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003143620185010012-DEJT-25-04-2018.pdf
01003143620185010012-DEJT-25-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003143620185010012-DEJT-25-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003079020185010029-DEJT-25-04-2018.pdf
01003079020185010029-DEJT-25-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003079020185010029-DEJT-25-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01011009820195010027-DEJT-30-04-2020.pdf
01011009820195010027-DEJT-30-04-2020.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01011009820195010027-DEJT-30-04-2020.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003167020185010023-DEJT-30-07-2018.pdf
01003167020185010023-DEJT-30-07-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003167020185010023-DEJT-30-07-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002175820185010522-DEJT-09-04-2018.pdf
01002175

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003545220185010421-DEJT-23-07-2018.pdf
01003545220185010421-DEJT-23-07-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003545220185010421-DEJT-23-07-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01001481020185010301-DOERJ-01-05-2018.pdf
01001481020185010301-DOERJ-01-05-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01001481020185010301-DOERJ-01-05-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003421920165010062-DEJT-09-04-2018.pdf
01003421920165010062-DEJT-09-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003421920165010062-DEJT-09-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/00014912420115010060-26-01-2016.pdf
00014912420115010060-26-01-2016.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/00014912420115010060-26-01-2016.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/00115861520155010015-DEJT-07-02-2017.pdf
00115861520155010015

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003064820175010221-DEJT-11-04-2017.pdf
01003064820175010221-DEJT-11-04-2017.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003064820175010221-DEJT-11-04-2017.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01009332620165010047-DEJT-02-12-2019.pdf
01009332620165010047-DEJT-02-12-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01009332620165010047-DEJT-02-12-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01011295020185010265-DEJT-19-03-2019.pdf
01011295020185010265-DEJT-19-03-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01011295020185010265-DEJT-19-03-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01007383920195010046-DEJT-03-09-2019.pdf
01007383920195010046-DEJT-03-09-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01007383920195010046-DEJT-03-09-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002529520185010076-DEJT-31-07-2018.pdf
01002529

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003352220185010041-DEJT-24-10-2018.pdf
01003352220185010041-DEJT-24-10-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003352220185010041-DEJT-24-10-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002167020185010038-DEJT-09-07-2018.pdf
01002167020185010038-DEJT-09-07-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002167020185010038-DEJT-09-07-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01014856920185010451-DEJT-17-09-2019.pdf
01014856920185010451-DEJT-17-09-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01014856920185010451-DEJT-17-09-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/00021819220125010262-DEJT-16-11-2018.pdf
00021819220125010262-DEJT-16-11-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/00021819220125010262-DEJT-16-11-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01007224620175010017-DEJT-09-04-2018.pdf
01007224

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01018364920165010245-DOERJ-25-08-2017.pdf
01018364920165010245-DOERJ-25-08-2017.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01018364920165010245-DOERJ-25-08-2017.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01001955520185010342-DEJT-09-04-2019.pdf
01001955520185010342-DEJT-09-04-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01001955520185010342-DEJT-09-04-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01015428320175010011-DEJT-05-04-2018.pdf
01015428320175010011-DEJT-05-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01015428320175010011-DEJT-05-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01001114920175010064-DEJT-19-03-2019.pdf
01001114920175010064-DEJT-19-03-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01001114920175010064-DEJT-19-03-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/00008176320105010001-DOERJ-30-01-2017.pdf
0000

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002541220185010029-DOERJ-18-04-2018.pdf
01002541220185010029-DOERJ-18-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002541220185010029-DOERJ-18-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/00543005220005010035-DEJT-16-12-2019.pdf
00543005220005010035-DEJT-16-12-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/00543005220005010035-DEJT-16-12-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/00374002220095010441-DEJT-09-08-2019.pdf
00374002220095010441-DEJT-09-08-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/00374002220095010441-DEJT-09-08-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002861920185010481-DEJT-29-10-2018.pdf
01002861920185010481-DEJT-29-10-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002861920185010481-DEJT-29-10-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002106320185010038-DEJT-28-08-2018.pdf
01002

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01001888920185010010-DEJT-03-08-2018.pdf
01001888920185010010-DEJT-03-08-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01001888920185010010-DEJT-03-08-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003553320185010002-DEJT-22-10-2018.pdf
01003553320185010002-DEJT-22-10-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003553320185010002-DEJT-22-10-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01001882920185010030-DEJT-27-09-2018.pdf
01001882920185010030-DEJT-27-09-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01001882920185010030-DEJT-27-09-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002272320185010031-DEJT-25-04-2018.pdf
01002272320185010031-DEJT-25-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002272320185010031-DEJT-25-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003761320185010421-DEJT-31-07-2018.pdf
01003761

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/00115369020145010025-DEJT-27-05-2020.pdf
00115369020145010025-DEJT-27-05-2020.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/00115369020145010025-DEJT-27-05-2020.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01501002520075010080-DEJT-30-08-2019.pdf
01501002520075010080-DEJT-30-08-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01501002520075010080-DEJT-30-08-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01007340220215010282-DEJT-23-11-2021.pdf
01007340220215010282-DEJT-23-11-2021.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01007340220215010282-DEJT-23-11-2021.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01008764720185010561-DEJT-28-05-2019.pdf
01008764720185010561-DEJT-28-05-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01008764720185010561-DEJT-28-05-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01007557320195010079-DEJT-07-10-2020.pdf
01007557

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002982720185010483-DEJT-29-03-2019.pdf
01002982720185010483-DEJT-29-03-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002982720185010483-DEJT-29-03-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01015831020175010283-DEJT-12-07-2018.pdf
01015831020175010283-DEJT-12-07-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01015831020175010283-DEJT-12-07-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01004697320185010421-DEJT-25-10-2018.pdf
01004697320185010421-DEJT-25-10-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01004697320185010421-DEJT-25-10-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003321820185010025-DEJT-17-06-2019.pdf
01003321820185010025-DEJT-17-06-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003321820185010025-DEJT-17-06-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002208720185010077-DOERJ-08-05-2018.pdf
0100220

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003985820175010081-DEJT-21-03-2019.pdf
01003985820175010081-DEJT-21-03-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003985820175010081-DEJT-21-03-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003143620185010012-DOERJ-25-04-2018.pdf
01003143620185010012-DOERJ-25-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003143620185010012-DOERJ-25-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002710720185010075-DEJT-06-12-2018.pdf
01002710720185010075-DEJT-06-12-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002710720185010075-DEJT-06-12-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01012913920175010246-DEJT-11-10-2018.pdf
01012913920175010246-DEJT-11-10-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01012913920175010246-DEJT-11-10-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01012790620175010511-DOERJ-08-03-2018.pdf
0101

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01016363920185010482-DEJT-23-05-2019.pdf
01016363920185010482-DEJT-23-05-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01016363920185010482-DEJT-23-05-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01013255820185010511-DEJT-22-05-2019.pdf
01013255820185010511-DEJT-22-05-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01013255820185010511-DEJT-22-05-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01006942720195010076-DEJT-20-10-2021.pdf
01006942720195010076-DEJT-20-10-2021.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01006942720195010076-DEJT-20-10-2021.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002538720185010203-DEJT-02-08-2018.pdf
01002538720185010203-DEJT-02-08-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002538720185010203-DEJT-02-08-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002366220185010070-DEJT-13-07-2018.pdf
01002366

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01014016320175010561-DEJT-16-08-2019.pdf
01014016320175010561-DEJT-16-08-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01014016320175010561-DEJT-16-08-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002989520185010040-DEJT-25-04-2018.pdf
01002989520185010040-DEJT-25-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002989520185010040-DEJT-25-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01013111920195010421-DEJT-20-01-2020.pdf
01013111920195010421-DEJT-20-01-2020.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01013111920195010421-DEJT-20-01-2020.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003133820185010081-DEJT-17-07-2018.pdf
01003133820185010081-DEJT-17-07-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003133820185010081-DEJT-17-07-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002583320185010002-DOERJ-10-04-2018.pdf
0100258

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01008026320205010030-DEJT-23-05-2022.pdf
01008026320205010030-DEJT-23-05-2022.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01008026320205010030-DEJT-23-05-2022.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01000144320195010011-DEJT-24-07-2019.pdf
01000144320195010011-DEJT-24-07-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01000144320195010011-DEJT-24-07-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002192820185010522-DOERJ-10-04-2018.pdf
01002192820185010522-DOERJ-10-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002192820185010522-DOERJ-10-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01000700320185010079-DOERJ-09-03-2018.pdf
01000700320185010079-DOERJ-09-03-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01000700320185010079-DOERJ-09-03-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002011320185010035-DEJT-27-11-2018.pdf
01

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01010567920165010061-DEJT-23-09-2017.pdf
01010567920165010061-DEJT-23-09-2017.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01010567920165010061-DEJT-23-09-2017.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002295620215010461-DEJT-04-08-2021.pdf
01002295620215010461-DEJT-04-08-2021.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002295620215010461-DEJT-04-08-2021.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01018707820175010248-DEJT-13-03-2019.pdf
01018707820175010248-DEJT-13-03-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01018707820175010248-DEJT-13-03-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01011127120185010246-DEJT-21-01-2019.pdf
01011127120185010246-DEJT-21-01-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01011127120185010246-DEJT-21-01-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002237820185010065-DEJT-30-07-2018.pdf
01002237

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01015338520175010411-DEJT-21-03-2019.pdf
01015338520175010411-DEJT-21-03-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01015338520175010411-DEJT-21-03-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003570720185010421-DEJT-27-06-2018.pdf
01003570720185010421-DEJT-27-06-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003570720185010421-DEJT-27-06-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01009794520175010058-DEJT-29-09-2018.pdf
01009794520175010058-DEJT-29-09-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01009794520175010058-DEJT-29-09-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/00112481620145010067-DOERJ-10-08-2017.pdf
00112481620145010067-DOERJ-10-08-2017.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/00112481620145010067-DOERJ-10-08-2017.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002133120185010066-DEJT-25-04-2018.pdf
01002

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01001692020185010031-DEJT-25-04-2018.pdf
01001692020185010031-DEJT-25-04-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01001692020185010031-DEJT-25-04-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01002739820185010261-DEJT-31-10-2018.pdf
01002739820185010261-DEJT-31-10-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01002739820185010261-DEJT-31-10-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01009088120165010283-DEJT-06-03-2018.pdf
01009088120165010283-DEJT-06-03-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01009088120165010283-DEJT-06-03-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01004093520195010302-DEJT-23-09-2021.pdf
01004093520195010302-DEJT-23-09-2021.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01004093520195010302-DEJT-23-09-2021.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01013996520175010053-DEJT-21-03-2019.pdf
01013996

/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01017885920175010244-DEJT-02-03-2021.pdf
01017885920175010244-DEJT-02-03-2021.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01017885920175010244-DEJT-02-03-2021.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01016775320185010531-DEJT-05-04-2019.pdf
01016775320185010531-DEJT-05-04-2019.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01016775320185010531-DEJT-05-04-2019.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01003325620185010077-DEJT-18-10-2018.pdf
01003325620185010077-DEJT-18-10-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01003325620185010077-DEJT-18-10-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01015616220175010020-DEJT-15-12-2018.pdf
01015616220175010020-DEJT-15-12-2018.pdf
/home/info/MyNotebooks/Datasets/SentencasTRT1/TXTs/01015616220175010020-DEJT-15-12-2018.txt
/home/info/MyNotebooks/Datasets/SentencasTRT1/PDFs/01007271020195010046-DEJT-26-09-2019.pdf
01007271