In [185]:
with open('19.txt', 'r') as f:
    data = f.read()

In [28]:
def parse(data):
    rules, msgs = data.strip().split('\n\n')
    msgs = msgs.split('\n')
    rules = [r.split(": ") for r in rules.split('\n')]
    rules = {a:b for a,b in rules}
    return rules, msgs

In [29]:
data = """0: 4 1 5
1: 2 3 | 3 2
2: 4 4 | 5 5
3: 4 5 | 5 4
4: "a"
5: "b"

ababbb
bababa
abbbab
aaabbb
aaaabbb"""

In [30]:
parse(data)

({'0': '4 1 5',
  '1': '2 3 | 3 2',
  '2': '4 4 | 5 5',
  '3': '4 5 | 5 4',
  '4': '"a"',
  '5': '"b"'},
 ['ababbb', 'bababa', 'abbbab', 'aaabbb', 'aaaabbb'])

In [31]:
import re

In [40]:
def match_rules(rules, msgs):
    regex = {}
    def expand_rule(i):
        if i not in regex:
            r = rules[i]
            if '"' in r:
                r = r.strip('"')
            else:
                options = r.split(' | ')
                options = ["".join(expand_rule(j) for j in op.split(' ')) for op in options]
                if len(options) == 2:
                    r = f"({options[0]}|{options[1]})"
                else:
                    r = options[0]
            regex[i] = r
        return regex[i]
    pattern = re.compile(f"^{expand_rule('0')}$")
    print(pattern)
    return sum(bool(pattern.match(m)) for m in msgs)

In [43]:
match_rules(*parse(data))

re.compile('^(b((a(a(b(a(ba|bb)|b(ba|ab))|a((aa|bb)a|aab))|b(a(aa|ba)(b|a)|b(a(bb|a(b|a))|baa)))|b(a(a(b(ba|bb)|a(aa|ba))|b(aab|aba))|b(b(b(aa|ba)|a(ba|a(b|a)))|a(ab|bb)a)))b|((b((abb|(ba|ab)a)a|((ba|(b|a)b)b|(b)


126

In [175]:
data = """42: 9 14 | 10 1
9: 14 27 | 1 26
10: 23 14 | 28 1
1: "a"
11: 42 31
5: 1 14 | 15 1
19: 14 1 | 14 14
12: 24 14 | 19 1
16: 15 1 | 14 14
31: 14 17 | 1 13
6: 14 14 | 1 14
2: 1 24 | 14 4
0: 8 11
13: 14 3 | 1 12
15: 1 | 14
17: 14 2 | 1 7
23: 25 1 | 22 14
28: 16 1
4: 1 1
20: 14 14 | 1 15
3: 5 14 | 16 1
27: 1 6 | 14 18
14: "b"
21: 14 1 | 1 14
25: 1 1 | 1 14
22: 14 14
8: 42
26: 14 22 | 1 20
18: 15 15
7: 14 5 | 1 21
24: 14 1

abbbbbabbbaaaababbaabbbbabababbbabbbbbbabaaaa
bbabbbbaabaabba
babbbbaabbbbbabbbbbbaabaaabaaa
aaabbbbbbaaaabaababaabababbabaaabbababababaaa
bbbbbbbaaaabbbbaaabbabaaa
bbbababbbbaaaaaaaabbababaaababaabab
ababaaaaaabaaab
ababaaaaabbbaba
baabbaaaabbaaaababbaababb
abbbbabbbbaaaababbbbbbaaaababb
aaaaabbaabaaaaababaa
aaaabbaaaabbaaa
aaaabbaabbaaaaaaabbbabbbaaabbaabaaa
babaaabbbaaabaababbaabababaaab
aabbbbbaabbbaaaaaabbbbbababaaaaabbaaabba"""

In [186]:
def match_rules(rules, msgs, max_depth):
    regex = {}
    def expand_rule(i):
        if i not in regex:
            r = rules[i]
            if '"' in r:
                r = r.strip('"')
            else:
                options = r.split(' | ')
                options = ["".join(expand_rule(j) for j in op.split(' ')) for op in options]
                if len(options) == 2:
                    r = f"({options[0]}|{options[1]})"
                else:
                    r = options[0]
            regex[i] = r
        return regex[i]
    expand_rule('0')
    regex['0'] = f"({regex['42']})+({regex['31']})+"        
    pattern = re.compile(f"^{regex['0']}$")
    candidates = [m for m in msgs if pattern.match(m)]
    total = 0
    for d in range(1, max_depth+1):
        if len(candidates) == 0:
            break

        regex['0'] = f"({regex['42']})+" # 8 part
        regex['0'] += f"({regex['42']}){{{d}}}({regex['31']}){{{d}}}" # 11 part
        pattern = re.compile(f"^{regex['0']}$")
        unmatched = []
        for c in candidates:
            if pattern.match(c):
                total += 1
            else:
                unmatched.append(c)
        candidates = unmatched
        
    return total
#     print(pattern)
#     return sum(bool(pattern.match(m)) for m in msgs)

In [187]:
match_rules(*parse(data), 10)

282

# CFG solution using pyformlang

In [123]:
from pyformlang.cfg import Production, Variable, Terminal, CFG
def build_cfg(rules):
    variables = {v: Variable(v) for v in rules}
    terminals = {t: Terminal(t) for t in [x.strip('"') for x in rules.values() if '"' in x]}
    productions = []
    for v, rule in rules.items():
        if '"' in rule:
            p = Production(variables[v], [terminals[rule.strip('"')]])
            productions.append(p)
        else:
            for part in rule.split(' | '):
                body = [variables[v] for v in part.split(' ')] 
                p = Production(variables[v], body)
                productions.append(p)
    cfg = CFG(
        list(variables.values()),
        list(terminals.values()),
        variables['0'],
        productions
    )
    return cfg, terminals
    

In [109]:
def match_rules(rules, msgs):
    cfg, terminals = build_cfg(rules)
    return sum(cfg.contains([terminals[c] for c in msg]) for msg in msgs)

In [110]:
[a if x == 'a' else b for x in "bbabbbbaabaabba"]

[Terminal(b),
 Terminal(b),
 Terminal(a),
 Terminal(b),
 Terminal(b),
 Terminal(b),
 Terminal(b),
 Terminal(a),
 Terminal(a),
 Terminal(b),
 Terminal(a),
 Terminal(a),
 Terminal(b),
 Terminal(b),
 Terminal(a)]

In [111]:
cfg.contains([a if x == 'a' else b for x in "ababaaaaaabaaab"])

False

In [113]:
match_rules(*parse(data))

3

In [114]:
pda = cfg.to_pda()

In [69]:
pda.state

{State(q)}

In [70]:
cfg.contains('a')

False

In [119]:
def parse2(data):
    data = data.replace('8: 42', '8: 42 | 42 8').replace('11: 42 31', '11: 42 31 | 42 11 31')
    return parse(data)

In [122]:
match_rules(*parse2(data))

282