In [1]:
from struct import unpack, pack, calcsize
import csv
import os

In [2]:
DBC_FMT = {
    'DungeonMap.dbc': 'NIIffffI',
    'DungeonMapChunk.dbc': 'NIIIf',
    'Spell.dbc': '234_936',
    '234_936': "NIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIfIIIIiiiiiiiiIIIIIIIIiiiIIIiiifffiiiIIIIIIIIIIIIIIIIIIfffIIIIIIiiiiiiIIIfffIIIIIIIIIIIIIISSSSSSSSSSSSSSSSISSSSSSSSSSSSSSSSISSSSSSSSSSSSSSSSISSSSSSSSSSSSSSSSIIIIIIIIIIIIIfffIIIIIiIIIIfffII",
    'SkillLine.dbc': '56_224',
    '56_224': "NIISSSSSSSSSSSSSSSSISSSSSSSSSSSSSSSSIISSSSSSSSSSSSSSSSII",
    'SkillLineAbility.dbc': 'NIIIIIIIIIIIII',
    'SkillRaceClassInfo.dbc': 'NIIIIIII',
    'MapDifficulty.dbc': 'NIISSSSSSSSSSSSSSSSIIIS',
    'VideoHardware.dbc': 'NIIIIIIIIIIIIIIIIISSIII',
    'DungeonEncounter.dbc': 'NIIIISSSSSSSSSSSSSSSSII',
    'Exhaustion.dbc': 'NIfffSSSSSSSSSSSSSSSSII',
    '23_92': "NIIIIIIIIIIIIIIIIIIIIII",
    'WorldMapArea.dbc': "NIISffffiiI",
    'WorldMapTransforms.dbc': "NIffffIffI",
}


In [3]:
class DBCHeader:
    HEADER_FORMAT = '4sIIII'
    HEADER_SIZE = 20

    def __init__(self, iterable=(), **kwargs):
        self.magic = b'WDBC'
        self.record_count = 0
        self.field_count = 0
        self.record_size = 0
        self.string_block_size = 1

        self.__dict__.update(iterable, **kwargs)

    def __repr__(self):
        msg = f'magic:{self.magic}, record_count:{self.record_count}, field_count:{self.field_count}, ' \
              f'record_size:{self.record_size}, string_block_size:{self.string_block_size} ' \
              f'file_size:{DBCHeader.HEADER_SIZE + self.record_count * self.record_size + self.string_block_size}'
        return msg

    def unpack_binary(self, data):
        [self.magic, self.record_count, self.field_count, self.record_size,
         self.string_block_size] = unpack(self.HEADER_FORMAT, data[:self.HEADER_SIZE])

    def pack_header(self):
        # little endian
        return pack(self.HEADER_FORMAT, self.magic, self.record_count, self.field_count, self.record_size,
                    self.string_block_size)


class DBC:
    def __init__(self, iterable=(), **kwargs):
        self.path = ''
        self.fmt = ''
        self.field_index = 0
        self.header = DBCHeader()
        self.records = {}
        self.string_block = b'\0'

        self.__dict__.update(iterable, **kwargs)

    def load(self, dbc_path, fmt='', raw_float=False):
        with open(dbc_path, 'rb') as f:
            data = f.read()

        if len(data) == 0:
            return

        self.path = dbc_path

        # Read Headers
        self.header = DBCHeader()
        self.header.unpack_binary(data)

        k = f'{self.header.field_count}_{self.header.record_size}'
        f_name = os.path.basename(dbc_path)
        if not fmt:
            if f_name in DBC_FMT and DBC_FMT[f_name] != k:
                fmt = DBC_FMT[f_name]
            elif k in DBC_FMT:
                fmt = DBC_FMT[k]
        if fmt:
            if raw_float:
                fmt = fmt.replace('f', 'I')
                fmt = fmt.replace('d', 'Q')
            self.fmt = fmt
            fmt = fmt.replace('S', 'I')
            fmt = fmt.replace('N', 'I')
            assert calcsize(fmt) == self.header.record_size
            assert len(self.fmt) == self.header.field_count

        # Read Records
        self.records = {}
        for i in range(self.header.record_count):
            begin = DBCHeader.HEADER_SIZE + i * self.header.record_size
            end = begin + self.header.record_size
            if fmt:
                rec = unpack(fmt, data[begin:end])
            else:
                rec = data[begin:end]
            self.records[rec[self.field_index]] = list(rec)

        # Read String Block
        self.string_block = data[DBCHeader.HEADER_SIZE + self.header.record_count * self.header.record_size:]
        assert (len(self.string_block) == self.header.string_block_size)

        if (not fmt) or (not self.string_block):
            return

        # preprocess String Block
        string_map = {}
        cnt = 0
        for s in self.string_block.split(b'\0'):
            string_map[cnt] = s
            cnt = cnt + len(s) + 1

        for i in range(self.header.field_count):
            if self.fmt[i] == 'S':
                for rec in self.records.values():
                    if rec[i]:
                        rec[i] = string_map[rec[i]].decode()
                    else:
                        rec[i] = ''

    def store(self, dbc_path):
        if self.fmt:
            # Generate String Block
            field_count = len(self.fmt)
            string_set = set()
            for i in range(field_count):
                if self.fmt[i] == 'S':
                    for rec in self.records.values():
                        if rec[i]:
                            string_set.add(rec[i])

            string_list = []
            string_map = {b'\0': 0}
            cnt = 1
            for s in string_set:
                string_map[s] = cnt
                b = s.encode()
                string_list.append(b)
                cnt += len(b) + 1
            self.string_block = b'\0' + b'\0'.join(string_list) + b'\0'

            records = []
            for r in self.records.values():
                rec = r.copy()
                for i in range(field_count):
                    if self.fmt[i] == 'S':
                        if rec[i]:
                            rec[i] = string_map[rec[i]]
                        else:
                            rec[i] = 0
                records.append(rec)
            records.sort()

            # pack records
            fmt = self.fmt.replace('S', 'I')
            fmt = fmt.replace('N', 'I')
            raw_record_list = []
            for rec in records:
                raw_record_list.append(pack(fmt, *rec))

            # Generate header
            self.header = DBCHeader(
                record_count=len(records),
                field_count=len(fmt),
                record_size=calcsize(fmt),
                string_block_size=len(self.string_block)
            )
        else:
            raw_record_list = list(self.records.values())
        raw_records = b''.join(raw_record_list)
        self.header.record_count = len(self.records)

        with open(dbc_path, 'wb') as f:
            f.write(self.header.pack_header())
            f.write(raw_records)
            f.write(self.string_block)

    def import_csv(self, csv_path, fmt='', raw_float=False):
        if not fmt:
            fmt = self.fmt

        if not fmt:
            print('unknow format')
            return

        if raw_float:
            fmt = fmt.replace('f', 'I')
            fmt = fmt.replace('d', 'Q')

        with open(csv_path, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            field_count = len(fmt)
            records = [row for row in spamreader]
            if records:
                assert len(records[0]) == field_count
        self.records = {}
        for row in records[1:]:
            for i in range(field_count):
                if fmt[i].lower() in ['i', 'b', 'h', 'l', 'q', 'n']:
                    row[i] = int(row[i])
                elif fmt[i] in ['f', 'e', 'd']:
                    row[i] = float(row[i])
                # elif fmt[i] not in ['S', 'x', 'c', 's', 'p']:
                #     print(f'unsupported import format:{fmt[i]}')
                #     return False
            self.records[row[self.field_index]] = row
        self.fmt = fmt
        self.path = csv_path

    def export_csv(self, csv_path):
        if not self.fmt:
            return
        with open(csv_path, 'w', newline='') as csvfile:
            spamwriter = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
            spamwriter.writerow([f'{f}' for f in range(len(self.fmt))])
            records = list(self.records.values())
            records.sort()
            for rec in records:
                spamwriter.writerow(rec)

    def import_string(self, dbc_path, merge=False):
        if not self.fmt:
            return
        src_dbc = DBC()
        src_dbc.load(dbc_path, fmt=self.fmt)
        for f in range(self.header.field_count):
            if self.fmt[f] == 'S':
                for i in self.records.keys():
                    if i in src_dbc.records and ((merge and src_dbc.records[i][f]) or (not merge)):
                        self.records[i][f] = src_dbc.records[i][f]

def diff():
    # todo: Show changes between two dbc files
    # diff -u file1.csv file2.csv > patchfile.patch
    pass

def apply():
    # todo: Apply a patch to a dbc file
    # patch file1.csv patchfile.patch
    pass

In [4]:
dbc = DBC()
%timeit dbc.load('dbc/Spell_cn.dbc')
%timeit dbc.export_csv('dbc/test.csv')
%timeit dbc.import_csv('dbc/test.csv')
%timeit dbc.store('dbc/test.dbc')
print(dbc.header)

283 ms ± 16.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
478 ms ± 16.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
1.44 s ± 84.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
806 ms ± 51.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
magic:b'WDBC', record_count:49839, field_count:234, record_size:936, string_block_size:2307035 file_size:48956359


In [5]:
dbc = DBC()
%timeit dbc.load('dbc/en/Spell.dbc')
print(dbc.header)
%timeit dbc.import_string('dbc/cn/Spell.dbc', merge=False)
%timeit dbc.store('dbc/test.dbc')
print(dbc.header)

297 ms ± 7.04 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
magic:b'WDBC', record_count:49839, field_count:234, record_size:936, string_block_size:2317797 file_size:48967121
1.25 s ± 37.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
795 ms ± 5.44 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
magic:b'WDBC', record_count:49839, field_count:234, record_size:936, string_block_size:2307035 file_size:48956359


In [None]:
for d in os.listdir('dbc'):
    for f in os.listdir(os.path.join('dbc', d)):
        if not f.endswith('.dbc'):
            continue
        dbc = DBC()
        dbc.load(os.path.join('dbc', d, f))
        dbc.export_csv(os.path.join('dbc', d, f'{f}.csv'))
        print(d, f, dbc.header)

In [42]:
# dbc_list = ['MapDifficulty.dbc', 'SkillLine.dbc', 'Spell.dbc']
dbc_list = ['Spell.dbc',]

for d in dbc_list:
    dbc = DBC()
    dbc.load(os.path.join('dbc', 'en', d))
    dbc.import_string(os.path.join('dbc', 'cn', d), merge=True)
    # dbc.import_string(os.path.join('dbc', 'en', d), merge=True)
    dbc.import_string(os.path.join('dbc', 'tw', d), merge=True)
    dbc.export_csv(os.path.join('dbc', 'cn_alvin', f'{d}.csv'))
    dbc.store(os.path.join('dbc', 'cn_alvin', d))
    print(d, dbc.header)

Spell.dbc magic:b'WDBC', record_count:49839, field_count:234, record_size:936, string_block_size:6473160 file_size:53122484


In [None]:
dbc = DBC()
dbc.load(os.path.join('dbc', 'mip_m', 'MapDifficulty.dbc'))
dbc.import_csv(os.path.join('dbc', 'mip_m', 'MapDifficulty.dbc.csv'))
dbc.store(os.path.join('dbc', 'mip_m', 'MapDifficulty.dbc'))
dbc.load(os.path.join('dbc', 'mip_m', 'MapDifficulty.dbc'))
dbc.export_csv(os.path.join('dbc', 'mip_m', 'MapDifficulty.dbc.csv'))

In [None]:
from zhconv import convert

dbc1 = DBC()
dbc1.load(os.path.join('dbc', 'cn', 'Spell.dbc'))

dbc2 = DBC()
dbc2.load(os.path.join('dbc', 'tw', 'Spell.dbc'))

for i in dbc1.records.keys():
    for f in range(dbc1.header.field_count):
        if dbc1.fmt[f] == 'S':
            s = dbc1.records[i][f]
            if s and len(s.encode()) == len(s):
                s2 = dbc2.records[i][f+1]
                if s2 and len(s2.encode()) != len(s2):
                    s2 = convert(s2, 'zh-cn')
                    dbc1.records[i][f] = s2
                    print(s2)
dbc1.export_csv(os.path.join('dbc', 'Spell.dbc.csv'))
dbc1.store(os.path.join('dbc', 'Spell.dbc'))