In [4]:
import re


def soundex(name: str) -> str:
    # Step 1: Keep the first letter and convert the rest to lowercase
    first_letter = name[0].upper()
    name = name.lower()

    # Step 2: Remove non-initial vowels and certain consonants
    name = re.sub(r'[aeiouwhy]', '', name[1:])

    # Step 3: Replace letters with corresponding numbers
    mappings = {
        'b': '1', 'f': '1', 'p': '1', 'v': '1',
        'c': '2', 'g': '2', 'j': '2', 'k': '2', 'q': '2', 's': '2', 'x': '2', 'z': '2',
        'd': '3', 't': '3',
        'l': '4',
        'm': '5', 'n': '5',
        'r': '6'
    }

    encoded = ''.join(mappings.get(char, '') for char in name)

    # Step 4: Remove consecutive duplicate numbers
    encoded = re.sub(r'(\d)\1+', r'\1', encoded)

    # Step 5: Format to Letter Digit Digit Digit
    soundex_code = (first_letter + encoded + "000")[:4]

    return soundex_code

In [6]:
if __name__ == "__main__":
    names = ["Abubakar", "Ibrahim", "Waziri", "Saudi", "Arabia", "Nigeria", "Britsh", "Columbia", "United", "States", "America", "Canada", "Germany", "France", "Italy", "Spain", "Portugal", "Brazil", "Argentina", "Russia", "China", "Japan", "Australia", "India", "Pakistan", "Bangladesh", "Indonesia", "South", "Africa", "Egypt", "Morocco", "Algeria", "Tunisia", "Libya", "Sudan", "Ethiopia", "Kenya", "Ghana", "Cameroon", "Senegal", "Mali", "Niger", "Chad", "Togo", "Benin", "Burkina", "Faso", "Guinea", "Sierra", "Leone", "Liberia", "Ivory", "Coast", "Gambia", "Guinea-Bissau", "Cape", "Verde", "Mauritania", "Mauritius", "Seychelles", "Comoros", "Madagascar", "Mozambique", "Zimbabwe", "Zambia", "Angola", "Namibia", "Botswana", "Lesotho", "Swaziland", "South", "Sudan", "Uganda", "Rwanda", "Burundi", "Tanzania", "Malawi", "Zambia", "Zimbabwe", "Mozambique", "Madagascar", "Comoros", "Seychelles", "Mauritius", "Mauritania", "Cape", "Verde", "Guinea-Bissau", "Gambia", "Senegal", "Guinea", "Sierra", "Leone", "Liberia", "Ivory", "Coast", "Burkina", "Faso", "Benin", "Togo", "Chad", "Niger", "Mali", "Senegal", "Cameroon", "Ghana", "Kenya", "Ethiopia", "Sudan", "Libya", "Tunisia", "Algeria", "Morocco", "Egypt", "South", "Africa", "Indonesia", "Bangladesh", "Pakistan", "India", "Australia", "Japan", "China", "Russia", "Argentina", "Brazil", "Portugal", "Spain", "Italy", "France", "Germany", "Canada", "America", "States", "]United", "Columbia", "Britsh", "Nigeria", "Arabia", "Saudi", "Waziri", "Ibrahim", "Abubakar"]
    for name in names:
        print(f"{name}: {soundex(name)}")

Abubakar: A126
Ibrahim: I165
Waziri: W260
Saudi: S300
Arabia: A610
Nigeria: N260
Britsh: B632
Columbia: C451
United: U530
States: S320
America: A562
Canada: C530
Germany: G650
France: F652
Italy: I340
Spain: S150
Portugal: P632
Brazil: B624
Argentina: A625
Russia: R200
China: C500
Japan: J150
Australia: A236
India: I530
Pakistan: P235
Bangladesh: B524
Indonesia: I535
South: S300
Africa: A162
Egypt: E213
Morocco: M620
Algeria: A426
Tunisia: T520
Libya: L100
Sudan: S350
Ethiopia: E310
Kenya: K500
Ghana: G500
Cameroon: C565
Senegal: S524
Mali: M400
Niger: N260
Chad: C300
Togo: T200
Benin: B500
Burkina: B625
Faso: F200
Guinea: G500
Sierra: S600
Leone: L500
Liberia: L160
Ivory: I160
Coast: C230
Gambia: G510
Guinea-Bissau: G512
Cape: C100
Verde: V630
Mauritania: M635
Mauritius: M632
Seychelles: S242
Comoros: C562
Madagascar: M326
Mozambique: M251
Zimbabwe: Z510
Zambia: Z510
Angola: A524
Namibia: N510
Botswana: B325
Lesotho: L230
Swaziland: S245
South: S300
Sudan: S350
Uganda: U253
Rwanda: R5