In [36]:
import re
import dateutil

with open('character.txt', 'r') as file:
    character_lines = file.read().split('\n')
with open('weapon.txt', 'r') as file:
    weapon_lines = file.read().split('\n')
with open('handbook_excerpt.txt', 'r') as file:
    handbook_lines = file.read().split('\n')

handbook_regex = re.compile(r'^\s*(\d+) : (.+)$')
ids = {name:id for line in handbook_lines for (id,name) in handbook_regex.findall(line)}

elements = {'Anemo', 'Cryo', 'Dendro', 'Electro', 'Geo', 'Hydro', 'Pyro'}
character_banners = [{}]

def parse_date(line):
    start_date = line.partition('–')[0].strip()
    return dateutil.parser.parse(start_date, fuzzy=True)

for line in character_lines:
    c = character_banners[-1]
    if line.startswith('\t'):
        c['date'] = parse_date(line)
        character_banners.append({})
        continue
    if line.endswith('.png'):
        continue
    if 'title' not in c:
        c['title'] = line.strip()
        continue
    if line not in elements:
        c['entries'] = c.get('entries', []) + [line]
character_banners.pop()  # One extra at the end

weapon_banners = [{}]
weapon_regex = re.compile(r'Weapon (.+)\.png')

for line in weapon_lines:
    w = weapon_banners[-1]
    if line.startswith('\t'):
        w['date'] = parse_date(line)
        weapon_banners.append({})
        continue
    line = line.strip()
    if 'title' not in w:
        if line.endswith('.png'):
            continue
        w['title'] = line.strip()
        continue
    if len(line) < 2 or line == 'Card Corner.png':
        continue
    for weapon in weapon_regex.findall(line):
        w['entries'] = w.get('entries', []) + [weapon]
weapon_banners.pop()


for b,feat_count in ((character_banners, 1), (weapon_banners, 2)):
    for c in b:
        c['ids'] = [int(ids.get(name)) for name in c['entries']]
        c['ids5'] = c['ids'][:feat_count]
        c['ids4'] = c['ids'][feat_count:]



In [38]:
interspersed_banners = character_banners + weapon_banners
interspersed_banners.sort(key=lambda x: x['date'])


with open('banners.tsj', 'w') as file:
    file.write('Date\tTitle\tIDs 5*\tIDs 4*\tNames\n')
    # for b in (character_banners, weapon_banners):
    for c in interspersed_banners:
        file.write(f"{c['date']:%Y-%m-%d}\t{c['title']}\t{c['ids5']}\t{c['ids4']}\t{c['entries']}\n")