In [1]:
import re

In [2]:
prefixes = ['Mr.', 'Mrs.', 'Ms.', 'Miss', 'Dr.', 'Prof.', 'Sir']
suffixes = ['Jr.', 'Sr.', 'II', 'III', 'IV', 'MD', 'PhD']

In [3]:
#Function for Cleaning Names - Logic
def clean_name(name):
    # Remove any commas that might be in the name
    name = name.replace(',', '')

    # Remove prefixes (if any)
    for prefix in prefixes:
        if name.startswith(prefix):
            name = name[len(prefix):].strip()

    # Remove suffixes (if any)
    for suffix in suffixes:
        if name.endswith(suffix):
            name = name[:-len(suffix)].strip()

    # Split name by spaces to extract first and last name
    name_parts = name.split()

    # If there are more than 2 parts, assume everything after the first is the last name
    if len(name_parts) >= 2:
        first_name = name_parts[0]
        last_name = ' '.join(name_parts[1:])
    else:
        # Handle case where there's only one name part (e.g., "John")
        first_name = name_parts[0]
        last_name = ''
    
    return first_name, last_name

In [4]:
#Function for Cleaning Phone Number
def clean_phone_number_with_extension(phone):
    # Step 1: Check and extract extension if it exists (based on 'x' or 'X')
    extension = "NA"  # Default extension to 'NA'
    if 'x' in phone or 'X' in phone:
        # Extract extension and the phone number before the extension
        parts = phone.split('x')
        phone = parts[0].strip()
        extension = parts[1].strip()  # Get the part after 'x' as the extension
    
    # Step 2: Remove non-numeric characters except '+'
    phone = phone.replace('(', '').replace(')', '').replace('-', '').replace('.', '').replace(' ', '')
    
    # Step 3: Extract country code and phone number
    if phone.startswith('+'):
        # Case 1: Country code starts with '+'
        country_code = phone[1:2] if phone[1].isdigit() else '1'
        phone_number = phone[2:]  # Remove country code from phone number
    elif phone.startswith('1'):
        # Case 2: Country code starts with '1' (default to US/Canada)
        country_code = '1'
        phone_number = phone[1:]  # Remove '1' from phone number
    else:
        # Case 3: No country code, assume default '1'
        country_code = '1'
        phone_number = phone

    # Step 4: Ensure the phone number has exactly 10 digits
    if len(phone_number) > 10:
        phone_number = phone_number[-10:]  # Trim to last 10 digits (ignores any extra digits)

    # Step 5: Format the phone number to (xxx)-xxx-xxxx
    phone_number = f"({phone_number[:3]})-{phone_number[3:6]}-{phone_number[6:]}"
    
    return country_code, phone_number, extension