In [19]:
import re

def clean_contour_label(label):
    """
    Cleans a contour label:
    1. Converts all characters to lowercase.
    2. Removes leading and trailing spaces.
    3. Replaces special characters with spaces.
    4. Condenses consecutive spaces and special characters into a single space.
    5. Converts all spaces to underscores, except the trailing space if it exists.
    6. Capitalizes the first character after each space.
    
    Args:
    label (str): The input contour label.
    
    Returns:
    str: The cleaned contour label.
    """
    
    # Convert all characters to lowercase
    label = label.lower()
    
    # Remove leading and trailing spaces
    label = label.strip()
    
    # Replace special characters with spaces
    label = re.sub(r'[^\w\s]', ' ', label)
    
    # Condense consecutive spaces and special characters into a single space
    label = re.sub(r'[\s]+', ' ', label)
    
    # Check if there is a trailing space, and if so, remove it
    if label.endswith(' '):
        label = label[:-1]
    
    # Split the label into words and capitalize the first character of each word
    words = label.split(' ')
    words = [word.capitalize() for word in words]
    
    # Join the words with underscores
    label = '_'.join(words)
    
    return label


In [20]:
# Test cases
test_cases = [
    "Contour_Label with Special Characters!",
    "  Extra    Spaces    ",
    "UPPER CASE",
    "lowercase",
    "!@#$%^&*()",
    "Multiple       Spaces     and         Special    Characters!!!",
]

for label in test_cases:
    cleaned_label = clean_contour_label(label)
    print(f"Original Label: {label}")
    print(f"Cleaned Label:  {cleaned_label}\n")


Original Label: Contour_Label with Special Characters!
Cleaned Label:  Contour_label_With_Special_Characters

Original Label:   Extra    Spaces    
Cleaned Label:  Extra_Spaces

Original Label: UPPER CASE
Cleaned Label:  Upper_Case

Original Label: lowercase
Cleaned Label:  Lowercase

Original Label: !@#$%^&*()
Cleaned Label:  

Original Label: Multiple       Spaces     and         Special    Characters!!!
Cleaned Label:  Multiple_Spaces_And_Special_Characters



In [32]:
pattern = r'[\D\S]'
text = "this is the1"

output = re.findall(pattern ,text)
output

['t', 'h', 'i', 's', ' ', 'i', 's', ' ', 't', 'h', 'e', '1']