# Translation

In [3]:
from pathlib import Path
import re

In [4]:
REPO = Path('../..')
MANUAL = REPO / 'Manual'

## Gemini

In [3]:
import google.generativeai as genai

In [26]:
genai.configure(api_key=Path('google_api_key.private.txt').read_text())
# genai.configure(api_key='AIzaSyBAsAdiHYQxkv4kYhO94aoK3xq2qbcB5QQ')

1430 4336
..\..\Manual\reference\evaluator\README.md


In [15]:
for m in genai.list_models():
    if 'generateContent' in m.supported_generation_methods:
        print(m.name)

models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-pro
models/gemini-pro-vision


In [16]:
model = genai.GenerativeModel('gemini-1.0-pro-latest')
MAX_TOKEN = 2048

In [17]:
page = MANUAL / r'Manual\basic_concepts\the_lister\tabs\README.md'.removeprefix('Manual\\')
content = page.read_text()

async def split_by_max_tokens(s: str) -> list[str]:
    slices = []
    async def append_slice(slice: str):
        tokens = (await model.count_tokens_async(buf)).total_tokens
        print(tokens, len(buf))
        assert tokens < MAX_TOKEN

        slices.append(slice)
    
    buf = ''
    for line in s.splitlines(keepends=True):
        if len(buf) < MAX_TOKEN * 3.5:
            buf += line
        else:
            await append_slice(buf)
            buf = line
    await append_slice(buf)
    
    return slices

slices = await split_by_max_tokens(content)

1790 7331
1409 6021


In [None]:
outputs = []
for slice in slices:
    if '<table>' in slice:
        prompt = 'Translate the following English text into Chinese, except keyword wrapped by "**", preserving HTML tags: '
    elif 'evaluator' in slice:
        prompt = 'Translate the following English text into Chinese, except code: '
    elif 'label' in slice:
        prompt = 'Translate the following English text into Chinese, where "label" is "标记": '
    else:
        prompt = 'Translate the following English text into Chinese: '
    response = model.generate_content(prompt + slice)
    output = response.text
    print(model.count_tokens(output).total_tokens, len(output))
    outputs.append(output)

In [31]:
lspace = r'(?:(?<![#-*]) )?'
terminology_fixes = {
    # Lister
    rf'{lspace}Lister ?(?:浏览器)?|列表器|目录查看器': '文件窗口',
    # Default Lister
    r'默认的?(?:文件列表器|列表器?|目录查看器)': '默认文件窗口',
    # file display
    r'文件[显展]示': '文件列表',
    # dual display
    r'双显示|双文件列表': '双栏',
    # location bar
    r'位置栏': '地址栏',
    # (folder) tab
    r'选项卡': '标签页',
    # Preferences
    r'“?首选项”?': '配置',
    # find-as-you-type
    rf'按您输入查找|按键即[查搜]|键入时查找|即时搜[素索]|[随即][打输][随即][找查]|逐字查找|{lspace}FAYT ?|{lspace}Find-As-You-Type ?': '即时查找',
    # view mode
    r'查看模式': '视图',
    # Power mode
    rf'“?强力”?模式|{lspace}\*?Power\*? 模式': '增强模式',
    # Tiles mode
    r'磁贴': '平铺',
    # SmartFavorites
    rf'{lspace}SmartFavorites ?': '智能收藏夹',
    # filter
    r'筛选': '过滤',
    # Evaluator
    rf'{lspace}Evaluator ?|评估器|计算器': '求值器',
    # evaluate
    r'评估': '求值',
    # Explorer
    rf'{lspace}Explorer ?': '资源管理器',
    r'外壳': ' Shell ',
    # archive
    r'(?<!已)(?:存档|归档)': '压缩包',
    # (USB) thumb drive
    r'拇指盘': 'U盘',
    # option
    r'选件': '选项',
    # ad-hoc
    r'即席': '临时',
    # single-click
    r'单机': '单击', 
    # rename
    r'重新命名': '重命名',

    r'\A(?:<h\d>)?\**[#＃]+ ?([^*\r\n<]*)\**(?:</h\d>)?': r'# \1',
    r'！(?=\[[^\]]*\]\(/Manual/)': '!',
    r'\.md': '.zh.md',
}
# TODO: Manual\preferences\preferences_categories\miscellaneous\advanced_options.zh.md

# label
# preferences\preferences_categories\labels
# (?<!文件夹)标签(?!页)

In [None]:
def process_translation(text: str):
    for regex, term in terminology_fixes.items():
        text = re.sub(regex, term, text)
    return text

result = '\n'.join(outputs)
result = process_translation(result)
page.with_suffix('.zh.md').write_text(result)

In [35]:
import asyncio

async def process_page(page: Path):
    content = page.read_text()
    command = None
    if page.parent.name == 'internal_commands' and page.name != 'README.md':
        command = re.match(r'\A# (.+)', content).group(1)

    try:
        slices = await split_by_max_tokens(content)
    except AssertionError:
        print('Failed to split content')
        return

    outputs = []
    for slice in slices:
        if command is not None or '<table>' in slice:
            prompt = f'Translate the following English text into Chinese, except keyword wrapped by "**" and{" command" if command is not None else ""} code, preserving HTML tags: '
        elif 'evaluator' in slice:
            prompt = 'Translate the following English text into Chinese, except code: '
        elif 'label' in slice:
            prompt = 'Translate the following English text into Chinese, where "label" is "标记": '
        else:
            prompt = 'Translate the following English text into Chinese: '
        response = await model.generate_content_async(prompt + slice)
        output = response.text
        print((await model.count_tokens_async(output)).total_tokens, len(output))
        outputs.append(output)
    
    def process_translation(text: str):
        for regex, term in terminology_fixes.items():
            text = re.sub(regex, term, text)
        if command is not None:
            text = re.sub(r'\A(?:# .+\n)?', f'# {command}\n', text)
        return text

    result = '\n'.join(outputs)
    result = process_translation(result)
    print(page)
    page.with_suffix('.zh.md').write_text(result)

async def process():
    tasks = []
    # for page in (MANUAL / r'Manual\reference\command_reference\internal_commands'.removeprefix('Manual\\')).glob('**/*.md'):
    for page in MANUAL.glob('**/*.md'):
        if page.name.endswith('.zh.md') or page.with_suffix('.zh.md').exists():
            continue
        # print(page)
        tasks.append(process_page(page))

        if len(tasks) >= 5:
            await asyncio.gather(*tasks)
            tasks = []

    await asyncio.gather(*tasks)

while True:
    try:
        await process()
        break
    except Exception as e:
        print(e)

87 268
229 861
420 3401
1545 7249
2255 7267
Failed to split content
1538 7175
1586 7230
1635 7190
1484 7331
1465 7188
89 196
..\..\Manual\reference\evaluator\swap.md
1624 7182
1663 7337
604 2625
274 610
..\..\Manual\reference\evaluator\truncate.md
400 2894
..\..\Manual\reference\evaluator\sysinfo.md
1531 4747
1534 5678
1609 4760
509 1096
1450 5241
1412 4252
2048 35559
1617 4136
572 1253
..\..\Manual\reference\command_reference\command_modifier_reference.md
149 504
92 295
218 868
113 421
1673 5026
95 229
..\..\Manual\reference\evaluator\ucase.md
126 272
..\..\Manual\reference\evaluator\typeof.md
108 234
..\..\Manual\reference\evaluator\val.md
217 675
..\..\Manual\reference\evaluator\urlencode.md
1651 3783
..\..\Manual\reference\icon_sets\dpi_aware_icon_sets.md
612 2462
208 834
404 1819
514 2057
554 2146
211 401
..\..\Manual\reference\icon_sets\icon_display_names.md
353 614
..\..\Manual\reference\icon_sets\icon_names.md
486 973
..\..\Manual\reference\icon_sets\icon_images.md
515 1173
..\

In [None]:
chat = model.start_chat(history=[])
response = chat.send_message(f'''According to the terminology table, translate the English text into Chinese:
Terminology:
English | Chinese
--- | ---
Lister | 文件窗口
File display | 文件列表
Find-as-you-type | 即时查找

English text: {q_text}
Chinese:''')
print(response.text)

In [None]:
print(model.generate_content(f'''English-to-Chinese terminology:
English | Chinese
--- | ---
Lister | 文件窗口
File display | 文件列表
Find-as-you-type | 即时查找

According to the above terminology table, translate the following English text into Chinese: {q_text}''').text)

In [None]:
print(model.generate_content(f'''According to the terminology table, translate the English text into Chinese:
Terminology:
English | Chinese
--- | ---
Lister | 文件窗口
File display | 文件列表
Find-as-you-type | 即时查找

English text: {q_text}
Chinese:''').text)

In [None]:
print(model.generate_content(
f'''This is an English to Chinese translation, please provide
the Chinese translation for these sentences:
English: Directory Opus has Lister, file display and find-as-you-type features. Chinese: Directory Opus 拥有文件窗口、文件列表和即时查找功能。
Please provide the translation for the following sentence.
Do not provide any explanations or text apart from the
translation.
English: {q_text}
Chinese: ''').text)

### SUMMARY

In [5]:
SUMMARY = MANUAL / 'SUMMARY.md'

def repl_title(m: re.Match[str]) -> str:
    page = MANUAL / m.group(2).removeprefix('/Manual/')
    zh_page = page.with_suffix('.zh.md')
    try:
        zh_m = re.match(r'\A# (.+)', zh_page.read_text())
    except FileNotFoundError:
        print('FileNotFoundError', zh_page)
        return m.group(0)
    if zh_m is not None:
        title = zh_m.group(1)
    else:
        title = m.group(1)
        print('No title', page)
    return f'[{title}]({m.group(2).replace(".md", ".zh.md")})'

zh_toc = re.sub(r'\[([^\]]+)\]\(([^\)]+)\)', repl_title, SUMMARY.read_text())
SUMMARY.with_suffix('.zh.md').write_text(zh_toc)


SUMMARY = (REPO / 'SUMMARY.md')
sum = SUMMARY.read_text('utf-8')
sum = re.sub('## 官方手册\n[\S\s]*?(?=## Manual)', '## 官方手册\n' + zh_toc + '\n', sum)
SUMMARY.write_text(sum, encoding='utf-8')

FileNotFoundError ..\..\Manual\file_operations\copying_moving_and_deleting_files\README\deleting_files\README.zh.md
No title ..\..\Manual\ftp\ftp_address_book\display_page.md
No title ..\..\Manual\preferences\preferences_categories\viewer\standalone_viewer\options.md
No title ..\..\Manual\customize\the_customize_dialog\user_commands.md
No title ..\..\Manual\customize\creating_your_own_buttons\editing_the_toolbar\README.md
No title ..\..\Manual\customize\creating_your_own_buttons\editing_the_toolbar\field_buttons\README.md
No title ..\..\Manual\file_types\filetype_editor\actions.md
No title ..\..\Manual\file_types\filetype_editor\info_tip.md
No title ..\..\Manual\scripting\rename_scripts\custom_fields_in_the_rename_dialog.md
No title ..\..\Manual\scripting\script_management\README.md
No title ..\..\Manual\scripting\script_editor\editors\dialog_editor\adding_dialog_controls.md
No title ..\..\Manual\evaluator\applicable_contexts\README.md
No title ..\..\Manual\evaluator\applicable_context

151387

## Translators

In [None]:
# translators ~= 5.8
import translators as ts

_ = ts.preaccelerate_and_speedtest()

In [None]:
# Bing Chat: good
# Baidu: good, but bad at Markdown
# qqTranSmart: not good, and not good at Markdown
# Google: bad

q_text = '''# Argument Types

The following qualifiers are used in the internal command templates to indicate the type of each argument. Remember that you **never** type the qualifiers when using arguments - they are merely a clue as to the argument type.

| Qualifier | Type | Description |
| --- | --- | --- |
| /S | Switch | Indicates a switch argument (a Boolean option that can either be on or off). |
| /K | Keyword | Indicates a value argument (a value must be provided following the argument keyword). |
| /O | Optional | Indicates an optional argument (can be used either by itself as a switch, or with a following value). |
| /N | Numeric | The value of the argument must be a number. |
| /M | Multiple | The argument can accept multiple values (e.g. a list of files). |
| /R | Raw | The argument accepts a "raw" value. For these arguments, the rest of the command line following the argument name is taken as the value.  <br />Arguments of this type are the only ones that do not require quotes around values which contain spaces. |

See the [Internal Command Arguments](/Manual/customize/creating_your_own_buttons/internal_command_arguments.md) page for a full description of the various argument types.
'''
# print(ts.translate_text(q_text, translator='google', from_language='en', to_language='zh'))

## g4f

In [None]:
import g4f
import nest_asyncio
nest_asyncio.apply()

g4f.debug.logging = True

response = g4f.ChatCompletion.create(
    model=g4f.models.gpt_4,
    provider=g4f.Provider.Bing,
    messages=[{"role": "user", "content": '''以简洁易懂的风格将以下英文翻译为中文，保留 Markdown 格式：'''+q_text}]
)

print(response)

In [None]:
response = g4f.ChatCompletion.create(
    model=g4f.models.gpt_4,
    provider=g4f.Provider.Bing,
    messages=[{"role": "user", "content": f'''According to the terminology table, translate the English text into Chinese:
Terminology:
English | Chinese
--- | ---
Lister | 文件窗口
File display | 文件列表
Find-as-you-type | 即时查找

English text: {q_text}
Chinese:'''}]
)

print(response)