# Soundex Algorithm

In [1]:
def soundex(name):
    """
    Compute the Soundex code for a given name.

    The Soundex algorithm indexes words (mainly names) by their sound when pronounced in English.
    Steps followed:
    1. Keep the first letter of the name.
    2. Remove all occurrences of 'a', 'e', 'h', 'i', 'o', 'u', 'w', 'y' after the first letter.
    3. Replace the remaining letters with digits based on Soundex rules:
        b, f, p, v   → 1
        c, g, j, k, q, s, x, z → 2
        d, t         → 3
        l            → 4
        m, n         → 5
        r            → 6
    4. Remove consecutive duplicate digits.
    5. Adjust the code to four characters by padding with zeros or truncating.

    Args:
        name (str): The input name to encode.

    Returns:
        str: The Soundex code of the given name.
    """

    # Step 1: Soundex mapping for letters
    soundex_mapping = {
        "b": "1", "f": "1", "p": "1", "v": "1",
        "c": "2", "g": "2", "j": "2", "k": "2",
        "q": "2", "s": "2", "x": "2", "z": "2",
        "d": "3", "t": "3",
        "l": "4",
        "m": "5", "n": "5",
        "r": "6"
    }

    # Convert to uppercase for consistency
    name = name.upper()

    # Step 2: Keep the first letter
    first_letter = name[0]

    # Step 3: Replace letters with digits
    encoded = first_letter
    for char in name[1:]:
        if char.lower() in soundex_mapping:
            encoded += soundex_mapping[char.lower()]
        else:
            encoded += ""

    # Step 4: Remove consecutive duplicates
    result = first_letter
    for i in range(1, len(encoded)):
        if encoded[i] != encoded[i - 1]:
            result += encoded[i]

    # Step 5: Remove all non-digit characters except the first letter
    result = result[0] + "".join(filter(str.isdigit, result[1:]))

    # Step 6: Adjust to length of 4 characters
    result = (result + "000")[:4]

    return result


if __name__ == "__main__":
    # Test cases
    names = ["Jurafsky", "Jarofsky", "Jarovsky", "Einstein", "Alex"]

    print("Soundex codes:")
    for name in names:
        print(f"{name} → {soundex(name)}")


Soundex codes:
Jurafsky → J612
Jarofsky → J612
Jarovsky → J612
Einstein → E523
Alex → A420
