## Soundex

Let's create a function which calculates the soundex of any given string 

In [1]:
def get_soundex(token):
    """Get the soundex code for the string"""
    # Pre-define the mapping dictionary as a constant
    SOUNDEX_CODES = {
        letter: code
        for code, letters in [
            ('1', 'BFPV'), ('2', 'CGJKQSXZ'), ('3', 'DT'),
            ('4', 'L'), ('5', 'MN'), ('6', 'R'), ('.', 'AEIOUHWY')
        ]
        for letter in letters
    }
    
    # Convert to uppercase once
    token = token.upper()
    
    # Initialize with first letter
    soundex = [token[0]]
    
    # Get codes for remaining letters, only append if different from previous
    for char in token[1:]:
        code = SOUNDEX_CODES.get(char, '')
        if code != '.' and (not soundex[-1].isdigit() or code != soundex[-1]):
            soundex.append(code)
    
    # Join, trim to 4 chars and pad with zeros
    return ''.join(soundex)[:4].ljust(4, '0')

Let's see what's the soudex of 'Bombay' and 'Bambai'

In [2]:
get_soundex("UpGrad")

'U126'

In [5]:
print(get_soundex("Bombay"))
print(get_soundex("Bambai"))

B510
B510


Let's see soundex of 'Aggrawal', 'Agrawal', 'Aggarwal' and 'Agarwal'

In [6]:
print(get_soundex("Aggrawal"))
print(get_soundex("Agrawal"))
print(get_soundex("Aggarwal"))
print(get_soundex("Agarwal"))

A264
A264
A264
A264
