In [2]:
# Question: Advanced Data Profiling and Outlier Detection
# Description: Perform detailed data profiling including outlier detection for numeric columns.

import pandas as pd
import numpy as np
from scipy import stats

def profile_numeric_column(df, column_name):
    data = df[column_name].dropna()
    profile = {}
    
    # Basic stats
    profile['count'] = data.count()
    profile['mean'] = data.mean()
    profile['median'] = data.median()
    profile['mode'] = data.mode().iloc[0] if not data.mode().empty else np.nan
    profile['std_dev'] = data.std()
    profile['min'] = data.min()
    profile['max'] = data.max()
    
    # Distribution
    profile['skewness'] = data.skew()
    profile['kurtosis'] = data.kurtosis()
    
    # Missing and zeros
    profile['missing_count'] = df[column_name].isnull().sum()
    profile['missing_percentage'] = (profile['missing_count'] / len(df)) * 100
    profile['zero_count'] = (data == 0).sum()
    profile['zero_percentage'] = (profile['zero_count'] / len(df)) * 100

    # IQR Outliers
    Q1 = data.quantile(0.25)
    Q3 = data.quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    iqr_outliers = data[(data < lower_bound) | (data > upper_bound)]
    profile['iqr_outlier_count'] = len(iqr_outliers)
    
    # Z-Score Outliers
    z_scores = np.abs(stats.zscore(data))
    z_outliers = data[z_scores > 3]
    profile['z_outlier_count'] = len(z_outliers)

    return profile




