In [None]:
import pandas as pd

def load_data(filepath):
    """Load the dataset from a CSV file."""
    try:
        df = pd.read_csv(filepath)
        initial_count = len(df)
        print("Data loaded successfully.")
        print(f"Dataset with {initial_count} rows.")
        return df
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

def remove_duplicates(df, subset=None, keep="first"):
    """
    Parameters:
    - subset: list, str, or column label, optional
        Only consider certain columns for identifying duplicates. 
        If None, all columns are used.
    - keep: {'first', 'last', False}, default 'first'
        Determines which duplicates to keep:
        - 'first': Keep the first occurrence.
        - 'last': Keep the last occurrence.
        - False: Drop all duplicates.
    """
    initial_count = len(df)
    df = df.drop_duplicates(subset=subset, keep=keep)
    removed_count = initial_count - len(df)
    if removed_count > 0:
        print(f"Removed {removed_count} duplicate rows.")
    else:
        print("No duplicate rows found.")
    return df

def handle_missing_values(df, strategy="drop"):
    """
    Handle missing values in the dataset.
    
    Parameters:
    - strategy: 'drop' to remove rows with missing values, 
                'fill' to replace with a default value.
    """
    if strategy == "drop":
        initial_count = len(df)
        df = df.dropna()
        print(f"Dropped {initial_count - len(df)} rows with missing values.")
    elif strategy == "fill":
        df = df.fillna(9999)
        print(f"Replaced rows with missing values to '9999'.")
    else:
        print("Invalid strategy provided. No changes made to missing values.")
    return df

def standardize_column_names(df):
    """Standardize column names to lowercase and replace spaces with underscores."""
    original_columns = df.columns.tolist()
    df.columns = [col.lower().replace(" ", "_") for col in df.columns]
    print(f"Standardized column names to {df.columns.tolist()}.")
    return df

def clean_percentage_columns(df):
    """Clean percentage values in columns by converting them to decimals."""
    for column in df.columns:
        if df[column].dtype == 'object':  # Check if column contains strings
            if df[column].str.contains('%', na=False).any():  # Check for '%' in column
                print(f"Cleaning percentage values in column: {column}")
                df[column] = df[column].str.replace('%', '', regex=False)  # Remove '%'
                df[column] = pd.to_numeric(df[column], errors='coerce') / 100  # Convert to float and scale to decimal
    return df

def format_numeric_columns(df):
    """Round numeric columns to two decimal places."""
    numeric_columns = df.select_dtypes(include=['number']).columns  # Identify numeric columns
    for column in numeric_columns:
        print(f"Formatting column '{column}' to 2 decimal places.")
        df[column] = df[column].round(2)  # Round to 2 decimal places
    return df

def convert_data_types(df, conversions):
    """
    Convert columns to specified data types.
    
    Parameters:
    - conversions: A dictionary where keys are column names and values are target data types.
    """
    for column, dtype in conversions.items():
        if column in df.columns:
            df[column] = df[column].astype(dtype)
            print(f"Converted column '{column}' to {dtype}.")
        else:
            print(f"Column '{column}' not found in DataFrame.")
    return df

def main(filepath):
    """Main function to perform all cleaning and formatting."""
    df = load_data(filepath)
    if df is not None:
        df = remove_duplicates(df)
        df = handle_missing_values(df, strategy="drop")
        df = standardize_column_names(df)
        df = clean_percentage_columns(df)
        df = format_numeric_columns(df)
        df = convert_data_types(df, conversions={"id": "int", "price": "float"})
        print("Data cleaning and formatting complete.")
        print(f"Dataset with {len(df)} rows.")
        return df
    return None
