In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

def fill_missing_median(df, columns=None):
    """
    Fill missing values with median for specified columns.
    
    Parameters:
    df (DataFrame): Input dataframe
    columns (list): List of columns to process. If None, processes all numeric columns.
    
    Returns:
    DataFrame: Dataframe with missing values filled
    """
    if columns is None:
        columns = df.select_dtypes(include=[np.number]).columns
        
    df_filled = df.copy()
    for col in columns:
        if df_filled[col].isnull().any():
            median_val = df_filled[col].median()
            df_filled[col] = df_filled[col].fillna(median_val)
            print(f"Filled missing values in {col} with median: {median_val:.2f}")
    
    return df_filled


def drop_missing(df, threshold=0.3):
    """
    Drop columns with excessive missing values and rows with any missing values.
    
    Parameters:
    df (DataFrame): Input dataframe
    threshold (float): Threshold for dropping columns (default 0.3 = 30%)
    
    Returns:
    DataFrame: Dataframe after dropping columns and rows
    """
    df_cleaned = df.copy()
    
    # Drop columns with missing values exceeding threshold
    missing_percent = df_cleaned.isnull().mean()
    cols_to_drop = missing_percent[missing_percent > threshold].index.tolist()
    
    if cols_to_drop:
        print(f"Dropping columns with >{threshold*100}% missing values: {cols_to_drop}")
        df_cleaned = df_cleaned.drop(columns=cols_to_drop)
    
    # Drop rows with any remaining missing values
    initial_rows = len(df_cleaned)
    df_cleaned = df_cleaned.dropna()
    rows_dropped = initial_rows - len(df_cleaned)
    
    if rows_dropped > 0:
        print(f"Dropped {rows_dropped} rows with missing values")
    
    return df_cleaned


def normalize_data(df, columns=None):
    """
    Normalize specified columns using StandardScaler.
    
    Parameters:
    df (DataFrame): Input dataframe
    columns (list): List of columns to normalize. If None, normalizes all numeric columns.
    
    Returns:
    tuple: (Normalized DataFrame, Fitted scaler object)
    """
    if columns is None:
        columns = df.select_dtypes(include=[np.number]).columns
        
    df_normalized = df.copy()
    scaler = StandardScaler()
    
    df_normalized[columns] = scaler.fit_transform(df_normalized[columns])
    print(f"Normalized columns: {list(columns)}")
    
    return df_normalized, scaler