In [1]:
import xml.dom.minidom as dom
import xmltodict
import json
import os
from collections import OrderedDict
import re

In [2]:
def split_xml(src_file, target_folder):
    DOMTree = dom.parse(src_file)
    collection = DOMTree.documentElement
    db = collection.getElementsByTagName('Database')[0]
    for n in db.childNodes:
        if not isinstance(n, dom.Text) and n.childNodes.length > 0:
            print(n.tagName)
            new_root = dom.getDOMImplementation().createDocument(None,'Root',None)
            new_root.firstChild.appendChild(n)
            with open(os.path.join(target_folder, n.tagName + '.xml'), 'w', encoding='utf-8') as f:
                new_root.writexml(f, addindent='\t', newl='')

In [3]:
def deep_get(o:OrderedDict, keys:list[str]):
    if len(keys) == 0:
        return [(o, [])]
    k = keys[0]
    if k == '_':
        if not isinstance(o, list):
            return []
        result = []
        for i in range(len(o)):
            child = deep_get(o[i], keys[1:])
            for c in child:
                result.append((c[0], [i] + c[1]))
        return result
    try:
        o = o[k]
    except(KeyError, TypeError):
        return []
    result = []
    child = deep_get(o, keys[1:])
    for c in child:
        result.append((c[0], [k] + c[1]))
    return result
def deep_set(o:OrderedDict, keys:list[str], value):
    for k in keys[:-1]:
        try:
            _ = o[k]
        except(KeyError):
            o[k] = OrderedDict()
        if o[k] == None:
            o[k] = OrderedDict()
        o = o[k]
    o[keys[-1]] = value
def deep_delete(o:OrderedDict, keys:list[str]):
    k = keys[0]
    if len(keys)==1:
        del o[k]
        return
    
    try:
        _ = o[k]
    except(KeyError, TypeError):
        return
    child = o[k]
    deep_delete(child, keys[1:])

    if isinstance(child, OrderedDict):
        if len(child.keys())==0:
            del o[k]
    elif isinstance(child, list):
        if len(child)==0:
            del o[k]

def deep_values(o:OrderedDict, specific_type:type=Exception):
    iters = []
    keys = []
    skip = {}
    while 1:
        if isinstance(o, specific_type):
            pass
        elif isinstance(o, OrderedDict):
            itr = iter(o.items())
            try:
                key,val = next(itr)
                iters.append(itr)
                keys.append(key)
                o = val
            except StopIteration: # 空对象，跳过
                o = skip
            continue
        elif isinstance(o, list):
            itr = enumerate(o)
            try:
                key,val = next(itr)
                iters.append(itr)
                keys.append(key)
                o = val
            except StopIteration: # 空数组，跳过
                o = skip
            continue
        
        if o != skip:
            yield tuple(keys), o
        if len(iters) == 0:
            return
        itr = iters.pop()
        key = keys.pop()
        try:
            key,val = next(itr)
            iters.append(itr)
            keys.append(key)
            o = val
        except StopIteration:
            o = skip

def try_parse(s:str):
    if not isinstance(s, str):
        return s
    if s == 'T':
        return True
    if s == 'F':
        return False
    if re.fullmatch(r'((T|F) )+(T|F)', s):
        return [try_parse(v) for v in s.split(' ')]
    if re.fullmatch(r'(\d+(\.\d+)? )+\d+(\.\d+)?', s):
        return [try_parse(v) for v in s.split(' ')]
    try:
        return int(s)
    except ValueError:
        try:
            return float(s)
        except ValueError:
            return s
    except TypeError:
        return s

In [4]:
def all_list(a):
    list_keys:dict[str,int] = {}
    for item in a:
        for keys,v in deep_values(item, list):
            if isinstance(v, list):
                keys = ['_' if isinstance(k, int) else k for k in keys]
                list_keys['.'.join(keys)] = 1
    list_keys:list[str] = list(list_keys.keys())
    list_keys.sort()
    print(list_keys)
    for i in range(len(list_keys)):
        k = list_keys[i]
        if i>0 and k.startswith(list_keys[i-1]): # 是上一个路径的子路径
            continue
        keys = k.split('.')
        for item in a:
            got = deep_get(item, keys)
            if len(got)>0:
                v,_k = got[0]
                if v != None and not isinstance(v, list):
                    deep_set(item, keys, [v]) # 别人是数组，你也得是

def simplified(a):
    if isinstance(a, OrderedDict):
        a = a.popitem()[1]
        if isinstance(a, list):
            cnt = {}
            num = len(a)
            a = [o for o in a if ('name' in o) and (o['name'] != None) and (type(try_parse(o['name'])) != int)] # 过滤掉没名字的
            # 统一数组
            all_list(a)
            # 提取默认值
            for item in a:
                for keys,v in deep_values(item):
                    if keys[-1] == '@id' or keys[0] == 'name':
                        continue
                    keys = ['_' if isinstance(k, int) else k for k in keys]
                    k = '.'.join(keys)
                    if k not in cnt:
                        cnt[k] = {}
                    if v not in cnt[k]:
                        cnt[k][v] = 1
                    else:
                        cnt[k][v] += 1
            defalt = {}
            for k,v in cnt.items():
                vals = list(v.keys())
                cnts = list(v.values())
                if None in vals:
                    defalt[k] = None
                elif max(cnts) > num/4:
                    defalt[k] = vals[cnts.index(max(cnts))]
                elif '0' in vals:
                    defalt[k] = '0'
            defalt_item = OrderedDict([['@id', 0]])
            for k,v in defalt.items():
                keys = k.split('.')
                deep_set(defalt_item, [k for k in keys if k!='_'], v)
                for item in a:
                    try:
                        for o,kk in deep_get(item, keys)[::-1]:
                            if o == v:
                                deep_delete(item, kk)
                    except Exception as e:
                        print(item)
                        print(keys)
                        print(v)
                        raise e
            a.insert(0, defalt_item)
            # 类型转换
            for item in a:
                for k,v in deep_values(item):
                    deep_set(item, k, try_parse(v))
            
            # 统一数组
            all_list(a)
    return a

In [5]:
def xml_to_json(xml_dir, json_dir):
    for filename in os.listdir(xml_dir):
        basename = os.path.splitext(filename)[0]
        '''if (basename != 'skills'):
            continue'''
        print(basename)
        path = os.path.join(xml_dir, filename)
        if os.path.isfile(path):
            xml = dom.parse(path).documentElement.childNodes[1].toxml()
            a = xmltodict.parse(xml).popitem()[1]
            if basename in ['actors', 'attributes', 'enemies', 'items', 'skills', 'states','troops']:
                a = simplified(a)
            new_path = os.path.join(json_dir, basename + '.json')
            with open(new_path, "w", encoding='utf-8') as f:
                json.dump(a, f, ensure_ascii=False)

In [6]:
# split_xml('RPG_RT.xml', 'xml')
xml_to_json('xml', 'json')

actors
['skills.Learning']
['attribute_ranks', 'parameters.Parameters.agility', 'parameters.Parameters.attack', 'parameters.Parameters.defense', 'parameters.Parameters.maxhp', 'parameters.Parameters.maxsp', 'parameters.Parameters.spirit', 'skills.Learning', 'state_ranks']
animations
attributes
[]
[]
battlecommands
chipsets
commonevents
enemies
['actions.EnemyAction']
['actions.EnemyAction', 'attribute_ranks', 'state_ranks']
items
[]
['actor_set', 'attribute_set', 'state_set']
skills
[]
['attribute_effects', 'state_effects']
states
[]
[]
switches
system
terms
terrains
troops
['members.TroopMember', 'pages.TroopPage', 'pages.TroopPage.event_commands.EventCommand']
['members.TroopMember', 'pages.TroopPage', 'terrain_set']
variables
version
