In [None]:
import pandas as pd
import re
from datetime import datetime

# 1. Cleanse Claim Amounts
def cleanse_claim_amount(amount_series):
    """
    Removes symbols like $ and commas, and converts to float.
    """
    return amount_series.replace('[\$,]', '', regex=True).astype(float)

# 2. Standardize ICD-10 Codes
def standardize_icd10(code_series):
    """
    Converts ICD-10 codes to uppercase and adds period in correct position if missing.
    Example: A123 -> A12.3
    """
    def format_code(code):
        if pd.isna(code):
            return code
        code = code.upper().replace('.', '')
        if len(code) > 3:
            return f"{code[:3]}.{code[3:]}"
        return code
    return code_series.apply(format_code)

# 3. Validate and Convert Dates
def standardize_date(date_series):
    """
    Converts various date formats to ISO format (YYYY-MM-DD).
    """
    def parse_date(date):
        if pd.isna(date):
            return pd.NaT
        for fmt in ('%Y-%m-%d', '%m/%d/%Y', '%d-%m-%Y', '%d/%m/%Y'):
            try:
                return pd.to_datetime(date, format=fmt)
            except:
                continue
        return pd.NaT  # if all formats fail
    return date_series.apply(parse_date)

# ============================
# ✅ Example Usage
# ============================

# Sample raw data
df = pd.DataFrame({
    'ClaimAmount': ['$1,000.00', '500', '$2,345', None],
    'ICD10Code': ['a123', 'B45.2', 'c789', None],
    'ServiceDate': ['01/15/2022', '2022-02-01', '15-03-2022', None]
})

# Apply reusable functions
df['CleanedAmount'] = cleanse_claim_amount(df['ClaimAmount'])
df['StandardICD10'] = standardize_icd10(df['ICD10Code'])
df['FormattedDate'] = standardize_date(df['ServiceDate'])

print(df)
