In [6]:
import os
from mammoth import convert_to_html
import xml.etree.ElementTree as ET
import json
import shutil

In [None]:
root_path = './Manuscrito'
output_path = './epub/raw/'

routes:dict = {
    'pre': [],
    'parts': {},
    'post': [],
}

shutil.rmtree(f'{output_path}/OEBPS/Text')
os.mkdir(f'{output_path}/OEBPS/Text')

has_readed_chapters = False




def add_title_and_index(raw, title, index, is_chapter):
    if is_chapter:
        return b'<body>' + \
            b'<h3>' + bytes(str(index), encoding='utf-8') + b'</h3>' + \
            b'<h2>' + bytes(title, encoding='utf-8') + b'</h2>' + \
            raw + \
            b'</body>'
    else :
        if title != 'Sinópsis' and title != 'Dedicatoria':
            return b'<body>' + \
                b'<h1>' + bytes(title, encoding='utf-8') + b'</h1>' + \
                raw + \
                b'</body>'
        else: return b'<body>' + raw + b'</body>'

def add_parafernalia_nav(parafernalia):

    global has_readed_chapters
    text_parafernalia = parafernalia[:-6]

    if not has_readed_chapters:
        routes['pre'].append({
            'path' : parafernalia,
            'nav_text' : text_parafernalia
        })
    else:
        routes['post'].append({
            'path' : parafernalia,
            'nav_text' : text_parafernalia
        })


def prepare_and_save_page(origin, is_chapter=True) -> str:

    ET.register_namespace('', 'http://www.w3.org/1999/xhtml')
    chapter = os.path.basename(origin)
    chapter_template:ET.ElementTree = ET.parse('./templates/chapter.xhtml')
    
    chapter_xml = chapter_template.getroot()
    number:int = int(chapter[:2]) if is_chapter else None
    title:str = chapter[4:-5]
    content:ET.Element

    with open(origin, 'rb') as f:
        raw = convert_to_html(f).value.encode('utf-8')
        raw = add_title_and_index(raw, title, number, is_chapter)
        raw = raw.replace(b'<p>#</p>', b'<hr />')

        content = ET.fromstring(bytes(raw))

    chapter_xml.find('{http://www.w3.org/1999/xhtml}head').\
    find('{http://www.w3.org/1999/xhtml}title').\
    text = title

    chapter_xml.append(content)

    save_name = f'Cap{number}.xhtml' if is_chapter else f'{title}.xhtml'
    save_name = save_name.replace(' ', '_')

    ET.indent(chapter_template, '    ')
    with open(f'{output_path}/OEBPS/Text/{save_name}', 'wb') as out:
        out.write(b'<?xml version="1.0" encoding="UTF-8"?>\n')
        out.write(b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" \n\t'
                b'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n\n')
        chapter_template.write(out, 'utf-8')
    
    if is_chapter : print(number, title)
    else : print(title)

    return save_name


def prepare_part_page(number, title):
    number = number
    ET.register_namespace('', 'http://www.w3.org/1999/xhtml')
    part_template:ET.ElementTree = ET.parse('./templates/part.xhtml')
    
    chapter_xml = part_template.getroot()

    chapter_xml.find('{http://www.w3.org/1999/xhtml}body').\
    find('{http://www.w3.org/1999/xhtml}h2').text = f'{number} Parte'
    chapter_xml.find('{http://www.w3.org/1999/xhtml}body').\
    find('{http://www.w3.org/1999/xhtml}h1').text = title

    chapter_xml.find('{http://www.w3.org/1999/xhtml}head').\
    find('{http://www.w3.org/1999/xhtml}title').text = f'{number} Parte'


    save_name = f'Parte{number}.xhtml'

    ET.indent(part_template, '    ')
    with open(f'{output_path}/OEBPS/Text/{save_name}', 'wb') as out:
        out.write(b'<?xml version="1.0" encoding="UTF-8"?>\n')
        out.write(b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" \n\t'
                b'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n\n')
        part_template.write(out, 'utf-8')
    
    print(f'{number} Parte')

    return save_name




shutil.copy('./templates/cubierta.xhtml', f'{output_path}OEBPS/Text/Portada.xhtml')
routes['pre'].append({
    'path' : 'Portada.xhtml',
    'nav_text' : 'Portada'
})
print('Portada')


for part in os.listdir(root_path):

    if os.path.isdir(f"{root_path}/{part}") and part != 'Parafernalia':

        part_number = part.split('.')[1].split(' ')[1]
        part_title = part.split('.')[2].strip()

        routes['parts'][f'{part_number}: {part_title}'] = []
        part_nav:list = routes['parts'][f'{part_number}: {part_title}']

        prepare_part_page(number=part_number, title=part_title)
        has_readed_chapters = True

        for chapter in os.listdir(f"{root_path}/{part}"):

            path = f"{root_path}/{part}/{chapter}"
            result_name = prepare_and_save_page(origin=path)
            
            part_nav.append({
                'path' : result_name,
                'nav_text' : str(int(chapter[:2])) + chapter[2:-5]
            })
            
    elif f"{root_path}/{part}"[-5:] == '.docx':

        path = f"{root_path}/{part}"
        result_name = prepare_and_save_page(origin=path, is_chapter=False)

        add_parafernalia_nav(result_name)


with open('./reference.json', 'w') as js:
    json.dump(routes, js, indent='    ')
    

Portada
Sinópsis
Dedicatoria
Agradecimientos
Prólogo
Primera Parte
1 Un corazón estremecido
2 El sangrado
3 Trasnochados
4 Cabras montesas
5 En el techo del carruaje
6 Marcando territorio
7 Desastre
Sobre el autor


In [None]:
ET.register_namespace('', 'http://www.daisy.org/z3986/2005/ncx/')

ncx_tree:ET.ElementTree = ET.parse('./templates/toc.ncx')
ncx = ncx_tree.getroot()

base:ET.Element = ET.Element('navMap')
count = 0

def createNavElement(element_data:str, parte=False):

    global count
    count += 1
    navItem = ET.Element('navPoint', {
            'id' : f'navPoint-{count}',
            'playOrder' : str(count)
        })
    
    label = ET.Element('navLabel')
    text_label = ET.Element('text')
    if not parte:
        text_label.text = str(element_data['nav_text']).replace('_', ' ')
    else:
        split_part_title = element_data['nav_text'].split(':')
        text_label.text = str(split_part_title[0]) + ' Parte:' + str(split_part_title[1])
    label.append(text_label)
    navItem.append(label)

    if not parte:
        content = ET.Element('content', {'src': f'Text/{element_data['path']}'})
    else:
        content = ET.Element('content', {'src': f'Text/Parte{element_data['nav_text'].split(':')[0]}.xhtml'})
    navItem.append(content)

    return navItem

for key in routes:
    if key != 'parts' :
        for parafernalia in routes[key]:

            base.append(createNavElement(parafernalia))
    
    else :

        for part in routes[key]:

            partElement = createNavElement({'nav_text': part}, parte=True)

            for chapter in routes[key][part]:
                chapterElement = createNavElement(chapter)
                partElement.append(chapterElement)

            base.append(partElement)


with open(f'{output_path}/OEBPS/toc.ncx', 'wb') as out:
    ncx.append(base)
    ET.indent(ncx_tree, '    ')
    out.write(b'<?xml version="1.0" encoding="UTF-8"?>\n')
    out.write(b'<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" \n\t'
                b'"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">\n\n')
    ncx_tree.write(out, 'utf-8')



In [None]:
ET.register_namespace('', 'http://www.idpf.org/2007/opf')

content_opf = ET.parse('./templates/content.opf')
opf = content_opf.getroot()

manifest = opf.find('{http://www.idpf.org/2007/opf}manifest')
spine = opf.find('{http://www.idpf.org/2007/opf}spine')

def add_mani (x):
    manifest.append(ET.Element('item', {
        'id' : x,
        'href' : f'Text/{x}.xhtml',
        'media-type' : "application/xhtml+xml"
    }))
    spine.append(ET.Element('itemref', {
        'idref' : x
    }))

def autogenerate_manifest():
    for section in routes:
        if section == 'parts':
            for part in routes[section]:

                add_mani(f'Parte{part.split(':')[0]}')

                for chapter in routes[section][part]:
                    add_mani(chapter['path'][:-6])
        else:
            for parafernalia in routes[section]:
                add_mani(parafernalia['path'][:-6])

    for file in os.listdir(f'{output_path}/OEBPS/Styles'):
        mani = ET.Element('item', {
            'id' : file,
            'href' : f'Styles/{file}',
            'media-type' : "text/css"
        })
        manifest.append(mani)

    for file in os.listdir(f'{output_path}/OEBPS/Fonts'):
        mani = ET.Element('item', {
            'id' : file,
            'href' : f'Fonts/{file}',
            'media-type' : "font/ttf"
        })
        manifest.append(mani)

    for file in os.listdir(f'{output_path}/OEBPS/Images'):
        mani = ET.Element('item', {
            'id' : file,
            'href' : f'Images/{file}',
            'media-type' : "image/jpeg"
        })
        manifest.append(mani)
    
    with open(f'{output_path}/OEBPS/content.opf', 'wb') as out:
        ET.indent(content_opf, '    ')
        content_opf.write(out, 'utf-8')


In [None]:
os.remove(f'{output_path[:-1]}.epub')
shutil.make_archive(f'./Soul_Noise', 'zip', output_path)
os.rename(f'{output_path[:-1]}.zip', f'{output_path[:-1]}.epub')