In [1]:
import pandas as pd 
import numpy as np
import re

In [2]:
header = ['Values']

part1 = pd.read_csv('/mnt/Puzzle_1_Input.txt', header = None, names = header)
part2 = part1.copy()

part1

Unnamed: 0,Values
0,kjrqmzv9mmtxhgvsevenhvq7
1,four2tszbgmxpbvninebxns6nineqbqzgjpmpqr
2,rkzlnmzgnk91zckqprrptnthreefourtwo
3,fouronevzkbnzm6seven47
4,zphgdcznqsm2
...,...
995,plxfoursc41five
996,sixfour9fivernqcknsbgpfrzmgz3
997,onesevenf78threedzvlm1
998,xlkdlhlk23four


In [3]:
part1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Values  1000 non-null   object
dtypes: object(1)
memory usage: 7.9+ KB


### Puzzle 1

In [4]:
#Convert data type to string
part1['Values'] = part1['Values'].astype(str)

In [5]:
#Remove all letters, as we only want numbers
part1['Values'] = part1['Values'].str.replace('\D', '', regex=True)

In [6]:
part1

Unnamed: 0,Values
0,97
1,26
2,91
3,647
4,2
...,...
995,41
996,93
997,781
998,23


In [7]:
def digits_only(value):
    first_digit = str(value)[0]
    last_digit = str(value)[-1]
    return first_digit, last_digit

In [8]:
part1[['first_digit', 'last_digit']] = part1['Values'].apply(digits_only).apply(pd.Series)

part1

Unnamed: 0,Values,first_digit,last_digit
0,97,9,7
1,26,2,6
2,91,9,1
3,647,6,7
4,2,2,2
...,...,...,...
995,41,4,1
996,93,9,3
997,781,7,1
998,23,2,3


In [9]:
part1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Values       1000 non-null   object
 1   first_digit  1000 non-null   object
 2   last_digit   1000 non-null   object
dtypes: object(3)
memory usage: 23.6+ KB


In [10]:
part1['first_digit'] = part1['first_digit'].astype(str)
part1['last_digit'] = part1['last_digit'].astype(str)

part1["Calibration_Value"] = part1["first_digit"] + part1["last_digit"]

part1

Unnamed: 0,Values,first_digit,last_digit,Calibration_Value
0,97,9,7,97
1,26,2,6,26
2,91,9,1,91
3,647,6,7,67
4,2,2,2,22
...,...,...,...,...
995,41,4,1,41
996,93,9,3,93
997,781,7,1,71
998,23,2,3,23


In [11]:
part1['Calibration_Value'] = part1['Calibration_Value'].astype(int)

In [12]:
calibration_value_sum = part1['Calibration_Value'].sum()

print(calibration_value_sum)

55447


55,447 is the correct answer for Puzzle 1

### Puzzle 2

We need to adjust to account for numbers that are spelled out as well.

In [13]:
part2

Unnamed: 0,Values
0,kjrqmzv9mmtxhgvsevenhvq7
1,four2tszbgmxpbvninebxns6nineqbqzgjpmpqr
2,rkzlnmzgnk91zckqprrptnthreefourtwo
3,fouronevzkbnzm6seven47
4,zphgdcznqsm2
...,...
995,plxfoursc41five
996,sixfour9fivernqcknsbgpfrzmgz3
997,onesevenf78threedzvlm1
998,xlkdlhlk23four


In [14]:
part2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Values  1000 non-null   object
dtypes: object(1)
memory usage: 7.9+ KB


In [15]:
#After suffering with this for a while, I realized just replacing numbers with mappings like 'one':'1' caused issues with missing the right digit 
# on fringe cases like where Values would be 'eightwo', this would map to '8wo' and would cause calibration value to be '88'. So this mapping is
# a result of creating a way to avoid those fringe cases.

def replace_numbers(text):
    mapping = {
        'zero': 'z0o',
        'one': 'o1e',
        'two': 't2o',
        'three': 't3e',
        'four': 'f4r',
        'five': 'f5e',
        'six': 's6x',
        'seven': 's7n',
        'eight': 'e8t',
        'nine': 'n9e'
    }

    # Iterate through the number_mapping dictionary and replace each spelled-out number
    for word, replacement in mapping.items():
        text = text.replace(word, replacement)

    return text

In [16]:
part2['Values_Digits'] = part2['Values'].apply(replace_numbers).apply(pd.Series)

part2

Unnamed: 0,Values,Values_Digits
0,kjrqmzv9mmtxhgvsevenhvq7,kjrqmzv9mmtxhgvs7nhvq7
1,four2tszbgmxpbvninebxns6nineqbqzgjpmpqr,f4r2tszbgmxpbvn9ebxns6n9eqbqzgjpmpqr
2,rkzlnmzgnk91zckqprrptnthreefourtwo,rkzlnmzgnk91zckqprrptnt3ef4rt2o
3,fouronevzkbnzm6seven47,f4ro1evzkbnzm6s7n47
4,zphgdcznqsm2,zphgdcznqsm2
...,...,...
995,plxfoursc41five,plxf4rsc41f5e
996,sixfour9fivernqcknsbgpfrzmgz3,s6xf4r9f5ernqcknsbgpfrzmgz3
997,onesevenf78threedzvlm1,o1es7nf78t3edzvlm1
998,xlkdlhlk23four,xlkdlhlk23f4r


In [17]:
part2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Values         1000 non-null   object
 1   Values_Digits  1000 non-null   object
dtypes: object(2)
memory usage: 15.8+ KB


In [18]:
part2['Values_Digits'] = part2['Values_Digits'].astype(str)

In [19]:
part2['Values_Digits'] = part2['Values_Digits'].str.replace('\D', '', regex=True)

In [20]:
part2[['first_digit', 'last_digit']] = part2['Values_Digits'].apply(digits_only).apply(pd.Series)

part2

Unnamed: 0,Values,Values_Digits,first_digit,last_digit
0,kjrqmzv9mmtxhgvsevenhvq7,977,9,7
1,four2tszbgmxpbvninebxns6nineqbqzgjpmpqr,42969,4,9
2,rkzlnmzgnk91zckqprrptnthreefourtwo,91342,9,2
3,fouronevzkbnzm6seven47,416747,4,7
4,zphgdcznqsm2,2,2,2
...,...,...,...,...
995,plxfoursc41five,4415,4,5
996,sixfour9fivernqcknsbgpfrzmgz3,64953,6,3
997,onesevenf78threedzvlm1,177831,1,1
998,xlkdlhlk23four,234,2,4


In [21]:
part2['first_digit'] = part2['first_digit'].astype(str)
part2['last_digit'] = part2['last_digit'].astype(str)

part2["Calibration_Value"] = part2["first_digit"] + part2["last_digit"]

part2

Unnamed: 0,Values,Values_Digits,first_digit,last_digit,Calibration_Value
0,kjrqmzv9mmtxhgvsevenhvq7,977,9,7,97
1,four2tszbgmxpbvninebxns6nineqbqzgjpmpqr,42969,4,9,49
2,rkzlnmzgnk91zckqprrptnthreefourtwo,91342,9,2,92
3,fouronevzkbnzm6seven47,416747,4,7,47
4,zphgdcznqsm2,2,2,2,22
...,...,...,...,...,...
995,plxfoursc41five,4415,4,5,45
996,sixfour9fivernqcknsbgpfrzmgz3,64953,6,3,63
997,onesevenf78threedzvlm1,177831,1,1,11
998,xlkdlhlk23four,234,2,4,24


In [22]:
part2['Calibration_Value'] = part2['Calibration_Value'].astype(int)

In [23]:
calibration_value_sum_2 = part2['Calibration_Value'].sum()

print(calibration_value_sum_2)

54706


54,706 is the correct solution for part 2