In [3]:
from pathlib import Path

In [4]:
data = Path('input.txt').read_text().split('\n')
data[:2]

['76xkqjzqtwonfour', 'sixthree8sixjxjqsjgjgp']

In [62]:
import pandas as pd
df = pd.DataFrame({'txt': data})

In [63]:
df['length'] = df.txt.str.len()

In [64]:
df.loc[df.length == 2].txt.tolist()

['33', 'c5', 'x8', '7n', 'l4']

In [65]:
mapping = {'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7', 'eight': '8', 'nine': '9'}

In [66]:
def replace_with_mapping(x: str, mapping: dict) -> str:
    out = ''
    for i, _ in enumerate(x):
        # print(x[i+offset:], offset, out)
        matched = False
        for num_in, num_out in mapping.items():
            if x[i:].startswith(num_in):
                out += num_out
                matched = True
                break
        if i >= len(x):
            return out
        if not matched:
            out += x[i]
    return out

In [67]:
df['mapped_txt'] = df.txt.apply(replace_with_mapping, mapping=mapping)

In [68]:
replace_with_mapping('zoneight234', mapping)

'z1n8ight234'

In [69]:
[replace_with_mapping(x, mapping) for x in ['33', 'c5', 'x8', '7n', 'l4']]

['33', 'c5', 'x8', '7n', 'l4']

In [70]:
from typing import Any

def preprocess_data(data) -> list[Any]:
    updated_data = [replace_with_mapping(i, mapping) for i in data]
    return updated_data

updated_data = preprocess_data(data)
list(zip(data[:10], updated_data[:10]))

[('76xkqjzqtwonfour', '76xkqjzq2won4our'),
 ('sixthree8sixjxjqsjgjgp', '6ix3hree86ixjxjqsjgjgp'),
 ('38bgcczgtninefivefive', '38bgcczgt9ine5ive5ive'),
 ('sixthree4eight', '6ix3hree48ight'),
 ('nhp3zdc', 'nhp3zdc'),
 ('279four', '2794our'),
 ('vzxf4tqrljgxmthreejcr', 'vzxf4tqrljgxm3hreejcr'),
 ('bbm4twoeight8oneone3one', 'bbm42wo8ight81ne1ne31ne'),
 ('nineninesix6nine', '9ine9ine6ix69ine'),
 ('fourseven5seveneightsvtkcjdrfour', '4our7even57even8ightsvtkcjdr4our')]

In [71]:
print(replace_with_mapping('zoneight234', mapping) == 'z1ight234')
print(replace_with_mapping('zeightwone234', mapping) == 'z8w1234')
print(replace_with_mapping('one', mapping) == '1')
print(replace_with_mapping('oneoneoneoneone', mapping) == '11111')
print(replace_with_mapping('11111111', mapping) == '11111111')
print(replace_with_mapping('twonetwonetwone', mapping) == '2ne2ne2ne')
print(replace_with_mapping('eigh', mapping) == 'eigh')
print(replace_with_mapping('1one', mapping) == '11')


False
False
False
False
True
False
True
False


## Oneliner solution

In [72]:
def first_last_digit_single(txt: str) -> int:
    digits = [j for j in txt if j.isdigit()]
    result = int(digits[0] + digits[-1])
    return result

def first_last_digit_txt(data) -> list[int]:
    res = []
    for i in data:
        result = first_last_digit_single(i)
        res.append(result)
    # answer = [int(x[0] + x[-1]) for x in [[j for j in i if j.isnumeric()] for i in data]]
    return res

answer = first_last_digit_txt(updated_data)
sum(answer)

53348

In [78]:
df['digits'] = df.mapped_txt.apply(first_last_digit_single)
df.digits.sum()

53348

In [75]:
df.digits.sum()

53348

In [139]:
test_data = """two1nine
eightwothree
abcone2threexyz
xtwone3four
4nineeightseven2
zoneight234
7pqrstsixteen""".split('\n')
expected_output = [29, 83, 13, 24, 42, 14, 76]
expected_result = 281

clean_test_data = preprocess_data(test_data)
print(list(zip(test_data, clean_test_data)))
test_answer = first_last_digit_txt(clean_test_data)
print(test_answer)
print(sum(test_answer) == expected_result)
print(expected_output == test_answer)
sum(test_answer)

[('two1nine', '219'), ('eightwothree', '8wo3'), ('abcone2threexyz', 'abc123xyz'), ('xtwone3four', 'x2ne34'), ('4nineeightseven2', '49872'), ('zoneight234', 'z1ight234'), ('7pqrstsixteen', '7pqrst6teen')]
[29, 83, 13, 24, 42, 14, 76]
True
True


281