In [34]:
import urllib.request, urllib.error, urllib.parse
import os
import sys
OEUVRES = list()
from bs4 import BeautifulSoup

In [35]:
class Ouvrage():
    """ 
    Classe permettant de définir 
    les caractéristiques d'un ouvrage
    """
    
    def __init__(self, author, title, identifier, firstpage, lastpage):

        self.author = author
        self.title = title
        self.identifier = identifier
        self.firstpage = int(firstpage)
        self.lastpage = int(lastpage)

In [36]:
def data_reader(filename):

    """
    Créé un objet ouvrage composé des 
    informations de chaque ouvrage depuis fichier filename
    """

    with open(filename, "r") as f:

        for line in f:
            # extrait les informations du fichier
            (author, title, identifier, firstpage, lastpage) = line.split(",")
            # créer un object ouvrage par ligne
            ouvrage = Ouvrage(author, title, identifier, firstpage, lastpage)
            # ajout à liste de tous les ouvrages
            OEUVRES.append(ouvrage)

In [47]:
def make_dir(path_, dir_):

        """ 
        créer un dossier dir_ dans path_ 
        s'il n'existe pas 
        """
        
        new_path = os.path.join(path_, dir_)
        
        if not os.path.exists(new_path):

            os.mkdir(new_path)
        return(new_path)


In [38]:
def make_metadonnees(path_ouvrage, identifier, name='metadonnees.xml'):

    """ 
    télécharge depuis gallica le xml dublin core des metadonnees 
    d'un ouvrage nommé depuis un identifiant ark 
    dans le dossier path_
    """ 

    metadonnee_path = os.path.join(path_ouvrage, name)

    if not os.path.isfile(metadonnee_path):

        url = 'https://gallica.bnf.fr/services/OAIRecord?ark='+identifier[7:]
        urllib.request.urlretrieve(url, metadonnee_path)

    return metadonnee_path

In [39]:
def isocr(path_xml_file):

    """ 
    cherche dans balise des métadonnées d'un ouvrage gallica 
    si une version océrisée de l'ouvrage existe
    """
    
    
    if path_xml_file.endswith(".xml"):

        xml_file = open(path_xml_file, 'r')
        doc = xml_file.read()
        soup = BeautifulSoup(doc, 'lxml')

        ocr = soup.find('nqamoyen') # balise responsable de l'ocr
        ocr = ocr.contents
        ocr = float(ocr[0])

        if ocr >= 50:

            return True

        return False

In [40]:
def scrapper(ouvrage, path_, ext):

    """ 
    récupère les images ou l'xml d'un ouvrage depuis gallica 
    et les dépose dans le répertoire correspondant
    """

    listpage = range(ouvrage.firstpage, ouvrage.lastpage + 1)
    len_last = len(str(ouvrage.lastpage))

    for page in listpage:

        size = len_last - len(str(page))
        zero = "0" * size

        file_ = zero + str(page) + ext 

        path_file = os.path.join(path_, file_)
        
        if not os.path.isfile(path_file):

            if ext == '.jpg':
                # requête pour les jpg
                url = 'http://gallica.bnf.fr/iiif/ark:' + ouvrage.identifier + '/f' + str(page) + '/full/5000/0/native.jpg'
            
            if ext == '.xml':
            # requête pour les xml
                url = 'https://gallica.bnf.fr/RequestDigitalElement?O='+ouvrage.identifier[7:]+'&E=ALTO&Deb='+str(page)

            try:

                urllib.request.urlretrieve(url, path_file)

            except urllib.error.HTTPError as err:

                print('Error -> ({})'.format(err))
                
        # les lignes suivantes gèrent juste un affichage pour suivre l'evolution des requêtes
        print_path = path_.replace('/home/lf/Bureau/Mémoire/Corpus/','')
        percent = round((page / ouvrage.lastpage)*100)
        sys.stdout.write("\r"+ print_path + " : " + str(percent) + "% " + zero+str(page)+"/"+str(ouvrage.lastpage)+ext)
        sys.stdout.flush()
    sys.stdout.write("\r\n")

In [41]:
def xml_alto_to_txt(xml_path, txt_path):

    """ 
    Transforme les xml téléchargés depuis gallica en plein texte txt
    
    f : nom du fichier xml
    xml_dir : dossier du fichier xml
    txt_dir : dossier du fichier txt créé"""

    files = sorted(os.listdir(xml_path))

    firstpage = int(files[0][0:-4])
    lastpage = int(files[-1][0:-4])

    for f in files:

        if f.endswith(".xml"):

            xml_file = os.path.join(xml_path,f)
            ftxt = f.replace(".xml",".txt")
            txt_file = os.path.join(txt_path,ftxt)

            if not os.path.isfile(txt_file):


                fd = open(xml_file, 'r')
                doc = fd.read()
                fd.close()

                soup = BeautifulSoup(doc, 'lxml')
                strin = soup.find_all('string')
                content = ""

                for data in strin:
                
                    content+=data['content']+" "

                with open(txt_file,'w') as txtfile:
                    txtfile.write(content)

        print_path = txt_path.replace('/home/lf/Bureau/Mémoire/Corpus/','')
        percent = round((int(f[0:-4]) / lastpage)*100)
        sys.stdout.write("\r"+ print_path + " : " + str(percent) + "% " + str(f[0:-4])+"/"+str(files[-1].replace(".xml",'.txt')))
        sys.stdout.flush()
    sys.stdout.write("\r\n\n")

In [42]:
# CONSTANTS

CWD = '/home/lf/Bureau/Memoire/'
cwd = '/home/lf/Bureau/Memoire/Corpus'

DATA = 'ark_gallica.txt'
	
MAIN_DIR = 'Corpus'

cwd = os.path.join(CWD, MAIN_DIR)

IMG_DIR = 'IMG'
XML_DIR = 'XML'
TXT_DIR = 'TXT_GALLICA'
GROUNDTRUE_DIR = 'TXT_GROUNDTRUE'
OUTPUT_TXT__DIR = 'TXT_TESSERACT'

In [48]:
# construit un corpus des textes en format jpg, xml alto et plein text 
# depuis un fichier de données avec les identifiant gallica(data)

print("Main ->", cwd)
data_reader(DATA)
#corpus_path = make_dir(CWD, MAIN_DIR)

for ouvrage in OEUVRES:
    print(ouvrage.title)

    path_author_dir = make_dir(cwd, ouvrage.author)
    path_author_dir = os.path.join(cwd, ouvrage.author)
    path_ouvrage_dir = make_dir(path_author_dir, ouvrage.title)


    path_img_dir = make_dir(path_ouvrage_dir, IMG_DIR)
    scrapper(ouvrage, path_img_dir, '.jpg')
        
    ocr_flag = make_metadonnees(path_ouvrage_dir, ouvrage.identifier)

    if isocr(ocr_flag) == True:

        path_xml_dir = make_dir(path_ouvrage_dir, XML_DIR)
        path_txt_dir = make_dir(path_ouvrage_dir, TXT_DIR)
        scrapper(ouvrage, path_xml_dir, '.xml')
        xml_alto_to_txt(path_xml_dir, path_txt_dir)

Main -> /home/lf/Bureau/Memoire/Corpus
le_chemin_de_la_croix_des_ames
/home/lf/Bureau/Memoire/Corpus/Georges_Bernanos/le_chemin_de_la_croix_des_ames/IMG : 100% 513/513.jpg
la_france_contre_les_robots
/home/lf/Bureau/Memoire/Corpus/Georges_Bernanos/la_france_contre_les_robots/IMG : 100% 230/230.jpg
scandale_de_la_verite
/home/lf/Bureau/Memoire/Corpus/Georges_Bernanos/scandale_de_la_verite/IMG : 100% 92/92.jpg
scenes_et_doctrines_du_nationalisme
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/scenes_et_doctrines_du_nationalisme/IMG : 100% 532/532.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/scenes_et_doctrines_du_nationalisme/XML : 100% 532/532.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/scenes_et_doctrines_du_nationalisme/TXT_GALLICA : 100% 532/532.txt

lame_francaise_et_la_guerre_1
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_1/IMG : 100% 406/406.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_1/XML : 47% 189/406.xml

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_3/IMG : 100% 467/467.jpg
lame_francaise_et_la_guerre_4
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_4/IMG : 100% 326/326.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_4/XML : 100% 326/326.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_4/TXT_GALLICA : 100% 326/326.txt

lame_francaise_et_la_guerre_5
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_5/IMG : 100% 425/425.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_5/XML : 100% 425/425.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_5/TXT_GALLICA : 100% 425/425.txt

lame_francaise_et_la_guerre_6
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_6/IMG : 100% 347/347.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_6/XML : 100% 347/347.xml
/

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_8/XML : 100% 336/336.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_8/TXT_GALLICA : 100% 336/336.txt

lame_francaise_et_la_guerre_9
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_9/IMG : 100% 436/436.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_9/XML : 100% 436/436.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_9/TXT_GALLICA : 100% 436/436.txt

lame_francaise_et_la_guerre_10
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_10/IMG : 100% 390/390.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_10/XML : 100% 390/390.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_10/TXT_GALLICA : 100% 390/390.txt

lame_francaise_et_la_guerre_12
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_12/IMG : 18%

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/souvenirs_dun_officier_de_la_grande_armee/TXT_GALLICA : 100% 360/360.txt

les_amities_francaises
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/les_amities_francaises/IMG : 100% 290/290.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/les_amities_francaises/XML : 100% 290/290.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/les_amities_francaises/TXT_GALLICA : 100% 290/290.txt

Le_salut_par_les_juifs
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Le_salut_par_les_juifs/IMG : 100% 163/163.jpg
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Le_salut_par_les_juifs/XML : 100% 163/163.xml
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Le_salut_par_les_juifs/TXT_GALLICA : 100% 163/163.txt

Entrepreneur_de_démolitions
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Entrepreneur_de_démolitions/IMG : 100% 294/294.jpg
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Entrepreneur_de_démolitions/XML : 100% 294/294.xml
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Entrepreneur_de_démolition

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/ L_Invendable/XML : 100% 320/320.xml
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/ L_Invendable/TXT_GALLICA : 100% 320/320.txt

Le_stupide_XIXe_siecle
/home/lf/Bureau/Memoire/Corpus/Léon_Daudet/Le_stupide_XIXe_siecle/IMG : 100% 312/312.jpg
le_chemin_de_la_croix_des_ames
/home/lf/Bureau/Memoire/Corpus/Georges_Bernanos/le_chemin_de_la_croix_des_ames/IMG : 100% 513/513.jpg
la_france_contre_les_robots
/home/lf/Bureau/Memoire/Corpus/Georges_Bernanos/la_france_contre_les_robots/IMG : 100% 230/230.jpg
scandale_de_la_verite
/home/lf/Bureau/Memoire/Corpus/Georges_Bernanos/scandale_de_la_verite/IMG : 100% 92/92.jpg
scenes_et_doctrines_du_nationalisme
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/scenes_et_doctrines_du_nationalisme/IMG : 100% 532/532.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/scenes_et_doctrines_du_nationalisme/XML : 100% 532/532.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/scenes_et_doctrines_du_nationalisme/TXT_GALLICA : 100% 532

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_4/IMG : 100% 326/326.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_4/XML : 100% 326/326.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_4/TXT_GALLICA : 100% 326/326.txt

lame_francaise_et_la_guerre_5
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_5/IMG : 100% 425/425.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_5/XML : 100% 425/425.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_5/TXT_GALLICA : 100% 425/425.txt

lame_francaise_et_la_guerre_6
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_6/IMG : 100% 347/347.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_6/XML : 100% 347/347.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_6/TXT_GALLICA : 100% 347/347.txt

lame_francaise_et_la_g

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_9/XML : 100% 436/436.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_9/TXT_GALLICA : 100% 436/436.txt

lame_francaise_et_la_guerre_10
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_10/IMG : 100% 390/390.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_10/XML : 100% 390/390.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_10/TXT_GALLICA : 100% 390/390.txt

lame_francaise_et_la_guerre_12
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_12/IMG : 100% 292/292.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_12/XML : 100% 292/292.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_12/TXT_GALLICA : 100% 292/292.txt

les_diverses_familles_spirituelles_de_la_france
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/les_diverses_famill

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/les_amities_francaises/TXT_GALLICA : 100% 290/290.txt

Le_salut_par_les_juifs
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Le_salut_par_les_juifs/IMG : 100% 163/163.jpg
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Le_salut_par_les_juifs/XML : 100% 163/163.xml
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Le_salut_par_les_juifs/TXT_GALLICA : 100% 163/163.txt

Entrepreneur_de_démolitions
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Entrepreneur_de_démolitions/IMG : 100% 294/294.jpg
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Entrepreneur_de_démolitions/XML : 100% 294/294.xml
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Entrepreneur_de_démolitions/TXT_GALLICA : 100% 294/294.txt

Exégèse_des_lieux_communs
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Exégèse_des_lieux_communs/IMG : 100% 298/298.jpg
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Exégèse_des_lieux_communs/XML : 100% 298/298.xml
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Exégèse_des_lieux_communs/TXT_GALLICA : 100% 2

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Georges_Bernanos/le_chemin_de_la_croix_des_ames/IMG : 100% 513/513.jpg
la_france_contre_les_robots
/home/lf/Bureau/Memoire/Corpus/Georges_Bernanos/la_france_contre_les_robots/IMG : 100% 230/230.jpg
scandale_de_la_verite
/home/lf/Bureau/Memoire/Corpus/Georges_Bernanos/scandale_de_la_verite/IMG : 100% 92/92.jpg
scenes_et_doctrines_du_nationalisme
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/scenes_et_doctrines_du_nationalisme/IMG : 100% 532/532.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/scenes_et_doctrines_du_nationalisme/XML : 100% 532/532.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/scenes_et_doctrines_du_nationalisme/TXT_GALLICA : 100% 532/532.txt

lame_francaise_et_la_guerre_1
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_1/IMG : 100% 406/406.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_1/XML : 100% 406/406.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_gue

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_4/TXT_GALLICA : 100% 326/326.txt

lame_francaise_et_la_guerre_5
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_5/IMG : 100% 425/425.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_5/XML : 100% 425/425.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_5/TXT_GALLICA : 100% 425/425.txt

lame_francaise_et_la_guerre_6
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_6/IMG : 100% 347/347.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_6/XML : 100% 347/347.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_6/TXT_GALLICA : 100% 347/347.txt

lame_francaise_et_la_guerre_7
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_7/IMG : 100% 440/440.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_7/XML : 41% 181/4

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/souvenirs_dun_officier_de_la_grande_armee/XML : 100% 360/360.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/les_amities_francaises/XML : 100% 290/290.xml_GALLICA : 99% 357/360.txt
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/les_amities_francaises/TXT_GALLICA : 100% 290/290.txt

Le_salut_par_les_juifs
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Le_salut_par_les_juifs/IMG : 100% 163/163.jpg
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Le_salut_par_les_juifs/XML : 100% 163/163.xml
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Le_salut_par_les_juifs/TXT_GALLICA : 100% 163/163.txt

Entrepreneur_de_démolitions
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Entrepreneur_de_démolitions/TXT_GALLICA : 100% 294/294.txt

Exégèse_des_lieux_communs
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Exégèse_des_lieux_communs/IMG : 100% 298/298.jpg
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Exégèse_des_lieux_communs/XML : 100% 298/298.xml
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/E

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_1/IMG : 100% 406/406.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_1/XML : 100% 406/406.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_2/IMG : 100% 389/389.jpg406.txt
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_2/TXT_GALLICA : 100% 389/389.txt

lame_francaise_et_la_guerre_3
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_3/IMG : 100% 467/467.jpg
lame_francaise_et_la_guerre_4
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_4/IMG : 100% 326/326.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_4/XML : 100% 326/326.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_4/TXT_GALLICA : 76% 248/326.txt

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_7/XML : 100% 440/440.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_10/TXT_GALLICA : 100% 390/390.txt

lame_francaise_et_la_guerre_12
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_12/IMG : 100% 292/292.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_12/XML : 100% 292/292.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_12/TXT_GALLICA : 100% 292/292.txt

les_diverses_familles_spirituelles_de_la_france
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/les_diverses_familles_spirituelles_de_la_france/IMG : 100% 322/322.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/les_diverses_familles_spirituelles_de_la_france/XML : 100% 322/322.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/les_diverses_familles_spirituelles_de_la_france/TXT_GALLICA : 100% 322/322.txt

une_visite_a_larme_anglaise
/home/lf/Bureau/

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Léon_Daudet/Le_stupide_XIXe_siecle/IMG : 100% 312/312.jpg
le_chemin_de_la_croix_des_ames
/home/lf/Bureau/Memoire/Corpus/Georges_Bernanos/la_france_contre_les_robots/IMG : 100% 230/230.jpgpg
scandale_de_la_verite
/home/lf/Bureau/Memoire/Corpus/Georges_Bernanos/scandale_de_la_verite/IMG : 100% 92/92.jpg
scenes_et_doctrines_du_nationalisme
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/scenes_et_doctrines_du_nationalisme/TXT_GALLICA : 100% 532/532.txt

lame_francaise_et_la_guerre_1
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_1/XML : 100% 406/406.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_1/TXT_GALLICA : 100% 406/406.txt

lame_francaise_et_la_guerre_2
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_2/IMG : 51% 200/389.jpg

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_7/XML : 100% 440/440.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_8/IMG : 100% 336/336.jpg440.txt
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_8/TXT_GALLICA : 100% 336/336.txt

lame_francaise_et_la_guerre_9
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_9/IMG : 100% 436/436.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_9/XML : 100% 436/436.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_9/TXT_GALLICA : 100% 436/436.txt

lame_francaise_et_la_guerre_10
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_10/IMG : 100% 390/390.jpg
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_10/XML : 100% 390/390.xml
/home/lf/Bureau/Memoire/Corpus/Maurice_Barrès/lame_francaise_et_la_guerre_10/TXT_GALLICA : 2% 006/390.txt

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Christophe_Colomb_devant_les_taureaux/TXT_GALLICA : 100% 240/240.txt

Je_m_accuse
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Je_m_accuse/IMG : 100% 185/185.jpg
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Je_m_accuse/XML : 100% 185/185.xml
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Je_m_accuse/TXT_GALLICA : 100% 185/185.txt

Les_funérailles_du_naturalisme
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/Les_funérailles_du_naturalisme/IMG : 100% 44/44.jpg
lettres_à_Pierre_Termier
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/lettres_à_Pierre_Termier/IMG : 100% 312/312.jpg
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/lettres_à_Pierre_Termier/XML : 100% 312/312.xml
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/lettres_à_Pierre_Termier/TXT_GALLICA : 100% 312/312.txt

 L_Invendable
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/ L_Invendable/IMG : 100% 320/320.jpg
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/ L_Invendable/XML : 100% 320/320.xml
/home/lf/Bureau/Memoire/Corpus/Léon_Bloy/ L_I