# 将数据从SQLite数据库导出为XLIFF翻译文件

修改路径后依次运行下列代码块即可。适用于Linux环境，Windows环境下未经测试。

In [1]:
from lxml import etree
from sqlalchemy import create_engine, text

WORKDIR = "/mnt/f/OneDrive/cs/mtgdatabase/weblate_database/mtgzh"
SQLDIR = "/mnt/f/OneDrive/cs/mtgdatabase/"

ATOMIC_SQL_TEXT = """SELECT 
    scryfallId, 
    atomicCards.*,
    COALESCE(sets.parentCode, cards.setCode) AS parent_code,
    COALESCE(parentSets.releaseDate, sets.releaseDate) AS releaseDate,
    NULL,
    NULL
FROM 
    atomicCards 
INNER JOIN 
    cards 
ON 
    atomicCards.name = COALESCE(cards.faceName, cards.name) 
    AND atomicCards.setCode = cards.setCode 
    AND atomicCards.number = cards.number 
INNER JOIN 
    cardIdentifiers 
ON 
    cardIdentifiers.uuid = cards.uuid
INNER JOIN
    sets
ON 
    sets.code = cards.setCode
LEFT JOIN 
    sets parentSets 
ON 
    parentSets.code = sets.parentCode"""

ALL_SQL_TEXT = """SELECT 
    scryfallId, 
    COALESCE(c.faceName, c.name) AS name,
    NULL,
    c.setCode,
    c.number,
    COALESCE(c.originalText, c.text) AS text,
    NULL,
    COALESCE(z.faceName, z.name) AS zhsName,
    z.text AS zhsText,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    z.extra,
    COALESCE(sets.parentCode, c.setCode) AS parent_code,
    COALESCE(parentSets.releaseDate, sets.releaseDate) AS releaseDate,
    c.flavorText,
    z.flavorText AS zhsFlavorText
FROM 
    cards c 
LEFT JOIN
    zhs z ON z.uuid = c.uuid
INNER JOIN 
    cardIdentifiers ON cardIdentifiers.uuid = c.uuid
INNER JOIN
    sets ON sets.code = c.setCode
LEFT JOIN 
    sets parentSets ON parentSets.code = sets.parentCode"""

def escape_xml(s, without_quote=False):
    return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace("\"", "&quot;").replace("\n", "\\n") if not without_quote else s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")

def read_table(sql_text):
    engine = create_engine(f'sqlite:////{SQLDIR}/zhs.sqlite')
    data = []
    with engine.connect() as connection:
        s = connection.execute(text(sql_text))
        for row in s:
            data.append({
                "scryfallId": row[0],
                "name": escape_xml(row[1]),
                "setCode": row[3],
                "number": row[4],
                "text": escape_xml(row[5].strip().replace(row[1], "CARDNAME") if row[5] else ""),
                "zhsName": escape_xml(row[7] if row[7] else ""),
                "zhsText": escape_xml(row[8].strip().replace(row[7], "CARDNAME") if row[8] else ""),
                "zhsUpdatedAt": row[9],
                "zhsUpdatedFrom": row[10],
                "translatedName": escape_xml(row[11] if row[11] else ""),
                "translatedText": escape_xml(row[12].strip().replace(row[11], "CARDNAME") if row[12] else ""),
                "translatedAt": row[13],
                "translatedFrom": row[14],
                "extra": row[15],
                "sortCode": f"{row[17]}-{row[16]}",
                "flavorText": escape_xml(row[18] if row[18] else ""),
                "zhsFlavorText": escape_xml(row[19] if row[19] else "")
            })
    return data

def sort_data_by_set(data):
    sorted_data = {}
    for row in data:
        if row['sortCode'] not in sorted_data:
            sorted_data[row['sortCode']] = []
        sorted_data[row['sortCode']].append(row)
    return sorted_data


def create_xliff_tree():
    '''
    创建一个xliff树，返回根节点
    '''
    root = etree.Element("xliff", xmlns="urn:oasis:names:tc:xliff:document:1.1")
    file_elem = etree.SubElement(root, "file")
    body = etree.SubElement(file_elem, "body")

    return root, body

def add_trans_unit(
    en_tree,
    zh_tree,
    data
):
    '''
    读取英文和中文的翻译，将其添加到对应的xliff树中。除了元数据外的内容不是必须的。
    '''
    note = ""
    if data['zhsUpdatedAt'] and data['zhsUpdatedFrom'] and data['setCode'] != data['zhsUpdatedFrom']:
        note += f"中文数据来自{data['zhsUpdatedFrom']}（{data['zhsUpdatedAt']}）；"
    if data['translatedAt'] and data['translatedFrom']:
        note += f"非官方中文翻译来自{data['translatedFrom']}（{data['translatedAt']}）；"
    if data['extra']:
        note += data['extra']

    trans_unit = etree.SubElement(en_tree, "trans-unit", id=data['scryfallId'], resname=f"{data['setCode']}·{data['number']} | {data['name']} | Card Name")
    etree.SubElement(trans_unit, "source").text = data['name']
    etree.SubElement(trans_unit, "target", state="translated").text = data['name']

    if data['zhsName'] or data['translatedName']:
        if data['zhsName']:
            zh_name = data['zhsName']
            zh_state = "translated"
        else:
            zh_name = data['translatedName']
            zh_state = "needs-adaptation"
        trans_unit = etree.SubElement(zh_tree, "trans-unit", id=data['scryfallId'], resname=f"{data['setCode']}·{data['number']} | {data['name']} | Card Name")
        etree.SubElement(trans_unit, "source").text = data['name']
        etree.SubElement(trans_unit, "target", state=zh_state).text = zh_name
        if note: etree.SubElement(trans_unit, "note", nsmap={'from': "developer"}).text = note

    if data['text']:
        zh_text = data['zhsText'] or data['translatedText'] or ""
        if data['zhsText']:
            zh_text = data['zhsText']
            zh_state = "translated"
        elif data['translatedText']:
            zh_text = data['translatedText']
            zh_state = "needs-adaptation"

        if zh_text:
            # 如果存在中文，且中文和英文的行数无法对齐，则将所有行作为一个整体翻译（表现为Paragraphs）
            if zh_text.count("\\n") != data['text'].count("\\n"):
                trans_unit = etree.SubElement(en_tree, "trans-unit", id=data['scryfallId'], resname=f"{data['setCode']}·{data['number']} | {data['name']} | Card Text | Paragraphs")
                etree.SubElement(trans_unit, "source").text = data['text']
                etree.SubElement(trans_unit, "target", state="translated").text = data['text']

                trans_unit = etree.SubElement(zh_tree, "trans-unit", id=data['scryfallId'], resname=f"{data['setCode']}·{data['number']} | {data['name']} | Card Text | Paragraphs")
                etree.SubElement(trans_unit, "source").text = data['text']
                etree.SubElement(trans_unit, "target", state=zh_state).text = zh_text
                if note: etree.SubElement(trans_unit, "note", nsmap={'from': "developer"}).text = note
            # 如果中文和英文能够对齐，则将每行作为一个段落翻译
            else:
                for i, en_para, zh_para in zip(range(data['text'].count("\\n")+1), data['text'].split("\\n"), zh_text.split("\\n")):
                    trans_unit = etree.SubElement(en_tree, "trans-unit", id=data['scryfallId'], resname=f"{data['setCode']}·{data['number']} | {data['name']} | Card Text | Paragraph {i+1}")
                    etree.SubElement(trans_unit, "source").text = en_para
                    etree.SubElement(trans_unit, "target", state="translated").text = en_para

                    trans_unit = etree.SubElement(zh_tree, "trans-unit", id=data['scryfallId'], resname=f"{data['setCode']}·{data['number']} | {data['name']} | Card Text | Paragraph {i+1}")
                    etree.SubElement(trans_unit, "source").text = en_para
                    etree.SubElement(trans_unit, "target", state=zh_state).text = zh_para
                    if note: etree.SubElement(trans_unit, "note", nsmap={'from': "developer"}).text = note
        else:
            # 如果不存在中文，那就不需要考虑行数对齐的问题，直接分段落翻译
            for i, en_para in enumerate(data['text'].split("\\n")):
                trans_unit = etree.SubElement(en_tree, "trans-unit", id=data['scryfallId'], resname=f"{data['setCode']}·{data['number']} | {data['name']} | Card Text | Paragraph {i+1}")
                etree.SubElement(trans_unit, "source").text = en_para
                etree.SubElement(trans_unit, "target", state="translated").text = en_para

    if 'flavorText' in data and data['flavorText']:
        # 背景叙述永远作为一个整体翻译
        trans_unit = etree.SubElement(en_tree, "trans-unit", id=data['scryfallId'], resname=f"{data['setCode']}·{data['number']} | {data['name']} | Flavor Text")
        etree.SubElement(trans_unit, "source").text = data['flavorText']
        etree.SubElement(trans_unit, "target", state="translated").text = data['flavorText']

        if data['zhsFlavorText']:
            trans_unit = etree.SubElement(zh_tree, "trans-unit", id=data['scryfallId'], resname=f"{data['setCode']}·{data['number']} | {data['name']} | Flavor Text")
            etree.SubElement(trans_unit, "source").text = data['flavorText']
            etree.SubElement(trans_unit, "target", state='translated').text = data['zhsFlavorText']
            if note: etree.SubElement(trans_unit, "note", nsmap={'from': "developer"}).text = note


In [2]:
def export_xliff(sql_text, file_suffix):
    data = read_table(sql_text)

    EN_DIR = f"{WORKDIR}/project/lang/en"
    ZH_DIR = f"{WORKDIR}/project/lang/zh_Hans"

    sorted_data = sort_data_by_set(data)
    
    for set_code in sorted_data:
        en_tree, en_body = create_xliff_tree()
        zh_tree, zh_body = create_xliff_tree()

        for unit in sorted_data[set_code]:
            add_trans_unit(en_body, zh_body, unit)

        en_tree = etree.ElementTree(en_tree)
        zh_tree = etree.ElementTree(zh_tree)

        # if set_code == "CON": set_code = "CON_" # For some windows reasons
        
        en_tree.write(f"{EN_DIR}/{set_code}-{file_suffix}.xliff", pretty_print=True, xml_declaration=True, encoding="utf-8")
        zh_tree.write(f"{ZH_DIR}/{set_code}-{file_suffix}.xliff", pretty_print=True, xml_declaration=True, encoding="utf-8")

        print(f"Set {set_code} done.")

export_xliff(ATOMIC_SQL_TEXT, "Oracle")
export_xliff(ALL_SQL_TEXT, "Printed")


SyntaxError: invalid syntax (3352775565.py, line 9)