# Outlier Handling Example

In [None]:

import pandas as pd
import numpy as np
from scipy import stats
from sklearn.preprocessing import RobustScaler
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:

# 예시 데이터 생성
np.random.seed(42)
df = pd.DataFrame({
    'bp': np.random.normal(120, 10, size=100),
    'glucose': np.random.normal(90, 15, size=100)
})

# 이상값 삽입
df.loc[5, 'bp'] = 200
df.loc[10, 'glucose'] = 300


In [None]:

# 1. Z-score로 이상값 탐지
df['bp_z'] = np.abs(stats.zscore(df['bp']))
df['glucose_z'] = np.abs(stats.zscore(df['glucose']))

# 2. IQR 기준 이상값 탐지
def iqr_outlier_flags(series):
    q1 = series.quantile(0.25)
    q3 = series.quantile(0.75)
    iqr = q3 - q1
    lower = q1 - 1.5 * iqr
    upper = q3 + 1.5 * iqr
    return ((series < lower) | (series > upper)).astype(int)

df['bp_outlier_flag'] = iqr_outlier_flags(df['bp'])
df['glucose_outlier_flag'] = iqr_outlier_flags(df['glucose'])


In [None]:

# 3. 로그 변환
df['bp_log'] = np.log1p(df['bp'])
df['glucose_log'] = np.log1p(df['glucose'])


In [None]:

# 4. Robust Scaling
scaler = RobustScaler()
df[['bp_robust', 'glucose_robust']] = scaler.fit_transform(df[['bp', 'glucose']])


In [None]:

# 결과 요약
df[['bp', 'bp_z', 'bp_outlier_flag', 'bp_log', 'bp_robust',
    'glucose', 'glucose_z', 'glucose_outlier_flag', 'glucose_log', 'glucose_robust']].head()
