In [1]:
from dataclasses import dataclass
import os
from pathlib import Path
import re
import subprocess

In [5]:
DOCS = Path('private/docs.dopus.com')
PAGES = DOCS / 'pages'

REPO = Path('..')
MANUAL = REPO / 'Manual'

In [3]:
# Fix syntax errors
def file_replace(path: Path, old, new):
    # with open(path, mode='r+t', encoding='utf-8') as f:
    #     s = f.read()
    #     print(s.find(old))
    #     s = s.replace(old, new)
    #     f.seek(0)
    #     f.write(s)

    s = path.read_text('utf-8')
    print(s.find(old))
    s = s.replace(old, new)
    path.write_text(s)

    # with open(path, mode='r+b') as f:
    #     s = f.read()
    #     print(s.find(old.encode('utf-8')))
    #     s = s.replace(old.encode('utf-8'), new.encode('utf-8'), 1)
    #     f.seek(0)
    #     f.write(s)

file_replace(
    PAGES / r'file_operations\copying_moving_and_deleting_files\copying_using_the_toolbar_buttons.txt',
    r'|{{:media:move_menu.png?nolink|}} | |The drop-down menu attached to the **Move** button contains the following commands:\\ <WRAP><wrap topspace></wrap>' + '\n',
    r'|{{:media:move_menu.png?nolink|}} | |The drop-down menu attached to the **Move** button contains the following commands:\\ <WRAP><wrap topspace></wrap>|' + '\n'
)

file_replace(
    PAGES / r'reference\command_reference\command_modifier_reference.txt',
    r'|**@nolocalizefiles**|//do not automatically localize (download) remote files//' + '\n',
    r'|**@nolocalizefiles**|//do not automatically localize (download) remote files//|' + '\n'
)

# Link errors
file_replace(
    PAGES / r'toc.txt',
    r'[[:basic_concepts:the_lister:navigation:paired folders]]',
    r'[[:basic_concepts:the_lister:navigation:paired_folders]]'
)
file_replace(
    PAGES / r'file_operations\creating_archives\adding_to_archives.txt',
    r'[[:pr[[erences:',
    r'[[:preferences:'
)
file_replace(
    PAGES / r'file_operations\creating_archives\adding_to_archives.txt',
    r'[[:fi[[_operations:',
    r'[[:file_operations:'
)

# file_replace(
#     PAGES / r'toc.txt',
#     r'      * [[:basic_concepts:selecting_files:selecting_cells|Selecting Cells]]' + '\n',
#     r''
# )
# file_replace(
#     PAGES / r'toc.txt',
#     r'    * [[:scripting:script_log|Script Log]]' + '\n',
#     r''
# )
for p in (r'scripting\script_log', r'basic_concepts\selecting_files\selecting_cells'):
    (PAGES / p).write_text(
'''# This topic does not exist yet
You've followed a link to a topic that doesn't exist yet.''')

# test **first** page [second](/blah/second)
Path(PAGES / 'start.txt').unlink(True)

-1
-1
-1
-1
-1


In [11]:
@dataclass
class Page:
    title: str
    old_name: str
    new_name: str
    file_path: Path
    content: str = None

pages = {}

dirname_paths = {}
pagename_paths = {}

for root, dirs, files in os.walk(PAGES):
    for dir in dirs:
        file_path = Path(root, dir)
        page_path = file_path.relative_to(PAGES).as_posix().removesuffix('.txt')

        if dir not in dirname_paths:
            dirname_paths[dir] = { page_path }
        else:
            dirname_paths[dir].add(page_path)
    
    for file in files:
        file_path = Path(root, file)
        page_path = file_path.relative_to(PAGES).as_posix().removesuffix('.txt')

        name = file.removesuffix('.txt')
        if name not in pagename_paths:
            pagename_paths[name] = { page_path }
        else:
            pagename_paths[name].add(page_path)

        with open(file_path, mode='r', encoding='utf-8') as f:
            line = f.readline()
            m = re.search(r'~~Title:\s*(.*?)\s*~~', line)
            if m:
                title = m.group(1)
            else:
                title = name
                print(page_path, line)
        
        new_name = None
        if name == 'toc':
            new_name = 'SUMMARY'
        elif name in dirs:
            new_name = f'{name}/RAEDME'

        pages[page_path] = Page(
            title=title,
            old_name=name,
            new_name=new_name,
            file_path=file_path,
        )

deleted_pages Put deleted / moved pages here, then at the end we need to check for broken links.

sidebar {{navi>toc}}

toc   * [[:copyrights|Copyrights]]

basic_concepts/selecting_files/selecting_cells # This topic does not exist yet

evaluator/applicable_contexts/functions/standard_variables A number of variables are available to the evaluator in this context. Note that some are only applicable to toolbars in the [[:additional_functionality:viewing_images|standalone image viewer]], and some only apply to Lister toolbars.

scripting/script_log # This topic does not exist yet



In [14]:
def dokuwiki_to_gfm(file_path) -> str:
    return subprocess.check_output(['pandoc', '--from=dokuwiki', '--to=gfm', '--wrap=none', str(file_path)]).decode('utf-8').replace('\r\n', '\n')

def multiline_to_singleline(s: str) -> str:
    return s.replace('\n', '<br />')

def gfm_postprocess(s: str, page_path: str) -> str:
    page = pages[page_path]

    # \~~Title: Introduction \~~ \<pageheader/\>
    # \~~Title: Licenses\~~ \<pageheader/\>
    # additional_functionality\viewing_images\viewer_keys_and_toolbar.txt
    s = re.sub(r'\\~~Title:\s*.*?\s*\\~~\s*', '', s)
    s = re.sub(r'\\<pageheader/\\>', f'# {page.title}', s)

    # \<kbd\>F1\</kbd\>
    s = re.sub(r'\\<kbd\\>(.*?)\\</kbd\\>', r'<kbd>\1</kbd>', s)

    # TODO: https://github.com/jgm/pandoc/issues/9154

    # TODO: What does '(#)' mean?
    s = s.replace('(#)', '')

    '''
    \<commandtable columns="4" id="cmdtable_1"\> \$\$ Argument \$\$ Type \$\$ Possible values \$\$ Description \$\$ (#)*(no argument)* \$\$ - \$\$ - \$\$ Calculate the total size of all selected folders. If there are no folders currently selected, all folders in the current file display will have their sizes calculated.

    \`\`GetSizes\`\` \</commandtable\>
    '''
    def repl(m: re.Match[str]) -> str:
        columns = int(m.group(1))
        cells = m.group(2).split(r'\$\$')
        s = '\n\n'
        for (row, row_cells) in enumerate((cells[i:i+columns] for i in range(0, len(cells), columns))):
            # print(row_cells)
            def process_cell(s: str) -> str:
                s = s.strip()

                # \`\`GetSizes\`\`
                s = re.sub(r'\\`\\`(.*?)\\`\\`', r'*Example:* `\1`', s)

                s = multiline_to_singleline(s)
                return s
            row_cells = [process_cell(cell) for cell in row_cells]
            s += '| ' + ' | '.join(row_cells) + ' |\n'
            if row == 0:
                s += '| ' + ' | '.join(['---'] * len(row_cells)) + ' |\n'
        return s
    s = re.sub(r'\\<commandtable columns="([^"]+)" id="[^"]+"\\> \\\$\\\$([\S\s]*?) \\</commandtable\\>', repl, s)

    # {{:media:zip_extensions.png?nolink|}}
    # <img src="/media/zip_extensions.png" data-query="?nolink" />
    # {{:release_history:meta_decgps.png?nolink|}}
    # <img src="/release_history/viewer_selorig.gif" data-query="?nolink" />
    s = re.sub(r'<img src="([^"]+)" data-query="\?nolink" />', r'![](/Manual/images\1)', s)

    # <img src="/media/13/prefs_images.png" data-query="?nolink" alt="prefs_images.png" />
    s = re.sub(r'<img src="([^"]+)" data-query="\?nolink" alt="([^"]+)" />', r'![\2](/Manual/images\1)', s)

    def process_link(m: re.Match[str]) -> str:
        # print(m.group(0))
        
        title = m.group(1)
        url = m.group(2)
        anchor = m.group(3)

        # [[mailto:info@mediaarea.net|MediaArea.net SARL]]
        # [MediaArea.net SARL](/mailto/info@mediaarea.net)
        if url.startswith('/mailto/'):
            url = url.replace('/mailto/', 'mailto:')
        # {{anchor:convertpng:}}
        # ![](/anchor/convertpng/)
        elif url.startswith('/anchor/'):
            return f'<a id="{re.fullmatch(r"/anchor/([^/]+)/", url).group(1)}"></a>'
        elif url.find(r'//%%') != -1:
            # TODO: https://github.com/jgm/pandoc/issues/9153
            title = title.replace(r'%%', '')
            url = url.replace(r'%%', '')
        elif url.find('://') == -1 and not url.startswith('/Manual/'):
            # [[::scripting:script_editor]]
            # [script_editor](//scripting/script_editor)
            url = url.replace('//', '/')

            # [[:basic_concepts:the_lister:navigation:up,_forwards,_back|Up, Forwards, Back]]
            # [Up, Forwards, Back](/Manual/basic_concepts/the_lister/navigation/up,_forwards,_back.md)
            url = url.replace(',', '')

            def resolve_path(possiable_paths: set[str]) -> str:
                if len(possiable_paths) == 1:
                    return next(iter(possiable_paths))
                for path in possiable_paths:
                    if path.startswith(page_path):
                        return path
                else:
                    print(m.group(0), page_path, possiable_paths)

            # [[~copying_moving_and_deleting_files|Copying, Moving and Deleting Files]]
            # [Copying, Moving and Deleting Files](~copying_moving_and_deleting_files)
            if url.startswith('~'):
                url = '/' + resolve_path(pagename_paths[url[1:].lower()])

            # [Actions](/~filetype_editor/actions)
            if url.startswith('/~'):
                m = re.match(r'/~([^/]+)/(.*)', url)
                url = '/' + resolve_path(dirname_paths[m.group(1).lower()]) + '/' + m.group(2)
            
            if url.startswith('/'):
                # /FTP
                link_page = pages[url[1:].lower()]
            else:
                def resolve_relative_url(url):
                    # absolute() will not resolve '..'
                    absolute_page_path = ((PAGES / page_path).parent / Path(url)).resolve()
                    link_page_path = absolute_page_path.relative_to(PAGES.absolute()).as_posix()
                    # print('Relative path:', m.group(0), absolute_page_path, link_page_path)
                    return pages.get(link_page_path.lower())
                link_page = resolve_relative_url(url)
                # TODO: Typos
                if not link_page:
                    '''
                    file_replace(
                        PAGES / r'additional_functionality\cli.txt',
                        r'[[scripting|scripts]]',
                        r'[[:scripting|scripts]]'
                    )
                    file_replace(
                        PAGES / r'file_operations\renaming_files\advanced_rename\rename_scripts.txt',
                        r'[[scripting|Scripting]]',
                        r'[[:scripting|Scripting]]'
                    )
                    for p in (
                        r'additional_functionality\making_links_and_junctions.txt',
                        r'additional_functionality\system-wide_hotkeys.txt',
                        r'additional_functionality\floating_toolbars\controlling_floating_toolbars.txt',
                        r'file_operations\creating_archives\zip_files\read-only_mode.txt',
                        r'release_history\opus13_detailed\default_toolbars.txt',
                        r'additional_functionality\playing_sounds.txt'
                    ):
                        file_replace(
                            PAGES / p,
                            r'[[customize|Customize]]',
                            r'[[:customize|Customize]]'
                        )
                    '''
                    link_page = pages.get(url.lower())
                if not link_page:
                    '''
                    file_replace(
                        PAGES / r'preferences\preferences_categories\file_display_modes\power_mode\appearance.txt',
                        r'[[..:display:display_options|Display / Options]]',
                        r'[[..:..:display:display_options|Display / Options]]'
                    )
                    '''
                    link_page = resolve_relative_url('../' + url)
                assert link_page, (m.group(0), page_path)

            # [[~end_user]]
            # [~end_user](~end_user)
            if title.startswith('~'):
                title = link_page.title

            # [[:preferences:preferences_categories:folders:folder_sizes]]
            # [folder_sizes](/preferences/preferences_categories/folders/folder_sizes)
            # [[columns|columns]]
            # [columns](columns)
            # TODO: 'Whether the title is specified' is lost.
            if '_' in title:
                title = link_page.title
            
            if link_page.new_name:
                url = url.replace(link_page.old_name, link_page.new_name)

            url += '.md'

            # [[:additional_functionality:exporting_to_usb|run from a USB drive]]
            # [run from a USB drive](/additional_functionality/exporting_to_usb)
            if url.startswith('/'):
                url = '/Manual' + url
        
        return f'[{title}]({url}{anchor if anchor else ""})'
    s = re.sub(r'(?<![!`])\[([^\]]+)\]\(([^)#]+)(#[^\)]+)?\)', process_link, s)

    s = s.replace('<!-- -->', '')

    s = re.sub(r'\n{3,}', '\n\n', s)

    return s

print(gfm_postprocess(
    dokuwiki_to_gfm(r'private\docs.dopus.com\pages\reference\command_reference\internal_commands\getsizes.txt'),
    'reference/command_reference/internal_commands/getsizes'
))

# GetSizes

The **GetSizes** internal command can be used to:

- Calculate and display the total sizes of selected folders
- Calculate the total sizes of all folders in the current file display
- Calculate the MD5 checksum for all selected files

**Command Arguments:** 

| Argument | Type | Possible values | Description |
| --- | --- | --- | --- |
| *(no argument)* | - | - | Calculate the total size of all selected folders. If there are no folders currently selected, all folders in the current file display will have their sizes calculated.<br /><br />*Example:* `GetSizes` |
| EVERYTHING | /O | *(no value)* | Calculates the sizes of selected folders using [Everything](/Manual/additional_functionality/everything_integration.md) if possible. If Everything is not installed, or the current folder is not indexed, the sizes will be calculated manually (by recursively scanning the folder contents).<br /><br />*Example:* `GetSizes EVERYTHING` |
|  |  | **no** | If the option is set on the [Fold

In [15]:
for page in pages.values():
    page.content = None

In [16]:
for page_path, page in pages.items():
    if page.content:
        continue
    
    # print(page)
    
    page.content = gfm_postprocess(dokuwiki_to_gfm(page.file_path), page_path)

In [17]:
for path in MANUAL.glob('**/*.md'):
    path.unlink()

for page_path, page in pages.items():
    page_name = page.new_name if page.new_name else page.old_name
    target_path = ((MANUAL / page_path).parent / page_name).with_suffix('.md')
    
    os.makedirs(target_path.parent, exist_ok=True)
    target_path.write_text(page.content, encoding='utf-8')

In [18]:
toc = pages['toc'].content
SUMMARY = (REPO / 'SUMMARY.md')
sum = SUMMARY.read_text('utf-8')
sum = re.sub('## Manual\n[\S\s]*', '## Manual\n' + toc, sum)
SUMMARY.write_text(sum, encoding='utf-8')

77028