In [None]:
import pandas as pd
import numpy as np

def load_data(filepath):
    """Load CSV data into a DataFrame."""
    return pd.read_csv(filepath)

def convert_nonstandard_nulls(df):
    """Convert non-standard null indicators to np.nan."""
    df.replace(['NA', 'null', 'N/A', '', 'NaN'], np.nan, inplace=True)
    return df

def clean_customer_lifetime_value(df):
    """Remove % sign and convert to float."""
    if 'customer_lifetime_value' in df.columns:
        df['customer_lifetime_value'] = df['customer_lifetime_value'].apply(
            lambda x: float(str(x).replace('%', '')) if pd.notnull(x) else np.nan
        )
    return df

def clean_number_of_open_complaints(df):
    """Extract the middle number from complaint format like '1/5/00' and convert to int."""
    if 'number_of_open_complaints' in df.columns:
        df['number_of_open_complaints'] = df['number_of_open_complaints'].apply(
            lambda x: int(str(x).split('/')[1]) if isinstance(x, str) and '/' in x else np.nan
        )
    return df

def convert_data_types(df):
    """Convert column types: numerical, datetime, categorical."""
    if 'effective_to_date' in df.columns:
        df['effective_to_date'] = pd.to_datetime(df['effective_to_date'], errors='coerce')

    for col in df.select_dtypes(include='object').columns:
        if df[col].nunique() < 10:  # heuristic for categorical
            df[col] = df[col].astype('category')

    return df

def drop_and_fill_nulls(df, strategy='drop'):
    """
    Handle missing values by strategy: 
    - 'drop': drop rows with any nulls
    - 'mean': fill numeric with mean, categorical with mode
    - 'ffill': forward fill
    """
    if strategy == 'drop':
        return df.dropna()
    elif strategy == 'mean':
        for col in df.select_dtypes(include='number'):
            df[col] = df[col].fillna(df[col].mean())
        for col in df.select_dtypes(include='category'):
            df[col] = df[col].fillna(df[col].mode()[0])
        return df
    elif strategy == 'ffill':
        return df.fillna(method='ffill')
    else:
        raise ValueError("Unsupported strategy: choose from 'drop', 'mean', or 'ffill'")

def remove_duplicates(df):
    """Remove duplicate rows."""
    return df.drop_duplicates()

def save_clean_data(df, output_path):
    """Save cleaned DataFrame to CSV."""
    df.to_csv(output_path, index=False)