In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
import os

def is_categorical(series, threshold=10):
    return series.nunique() <= threshold or series.dtype == 'object'

def plot_all_features(
    df,
    features,
    target_col,
    n_bins=3,
    max_cols=2,
    figsize=(14, 5),
    normalize_target=False,
    bar_color='skyblue',
    line_color='red',
    marker='o',
    save_path=None  # e.g., 'output_graphs.pdf' or 'graphs.png'
):
    num_features = len(features)
    num_rows = math.ceil(num_features / max_cols)

    fig, axes = plt.subplots(num_rows, max_cols, figsize=(figsize[0], figsize[1] * num_rows))
    if num_features == 1:
        axes = np.array([axes])
    axes = axes.flatten()

    for idx, feature in enumerate(features):
        ax1 = axes[idx]
        ax2 = ax1.twinx()

        df_valid = df[[feature, target_col]].dropna()

        try:
            # target 정규화 옵션
            if normalize_target:
                min_val = df_valid[target_col].min()
                max_val = df_valid[target_col].max()
                if max_val > min_val:
                    df_valid[target_col] = (df_valid[target_col] - min_val) / (max_val - min_val)

            # 범주형 처리
            if is_categorical(df_valid[feature]):
                grouped = df_valid.groupby(feature).agg(
                    user_count=(feature, 'count'),
                    target_rate=(target_col, 'mean')
                ).reset_index()
                grouped['user_ratio'] = grouped['user_count'] / grouped['user_count'].sum()
                x_vals = grouped[feature].astype(str)

            # 연속형 처리
            else:
                df_valid['bin'] = pd.qcut(df_valid[feature], q=n_bins, duplicates='drop')
                grouped = df_valid.groupby('bin').agg(
                    user_count=(feature, 'count'),
                    target_rate=(target_col, 'mean')
                ).reset_index()
                grouped['user_ratio'] = grouped['user_count'] / grouped['user_count'].sum()
                x_vals = grouped['bin'].astype(str)

            # Bar (user ratio)
            ax1.bar(x_vals, grouped['user_ratio'], color=bar_color)
            ax1.set_ylabel('user ratio')

            # Line (target_rate)
            ax2.plot(x_vals, grouped['target_rate'], color=line_color, marker=marker)
            ax2.set_ylabel('target_rate', color=line_color)

            ax1.set_title(f"target_rate by {feature}")
            ax1.tick_params(axis='x', rotation=45)

        except Exception as e:
            ax1.set_title(f"{feature} - 오류 발생")
            print(f"[ERROR] '{feature}' 처리 중 오류: {e}")

    # 나머지 subplot 비우기
    for i in range(len(features), len(axes)):
        fig.delaxes(axes[i])

    plt.tight_layout()

    # 저장
    if save_path:
        dirname = os.path.dirname(save_path)
        if dirname and not os.path.exists(dirname):
            os.makedirs(dirname)
        fig.savefig(save_path, dpi=300)
        print(f"✅ 그래프 저장 완료: {save_path}")

    plt.show()


# 샘플 데이터
df = pd.DataFrame({
    'price': np.random.uniform(0, 300, 500),
    'period': np.random.randint(0, 1000, 500),
    'product_type': np.random.choice(['A', 'B', 'C'], 500),
    'channel': np.random.choice(['online', 'offline'], 500),
    'age': np.random.randint(20, 70, 500),
    'target': np.random.choice([0, 1], 500, p=[0.8, 0.2])
    # 'target': np.random.normal(0.1, 0.05, 500)  # 예: 확률형 타겟
})

# 시각화
plot_all_features(
    df,
    features=['price', 'period', 'product_type', 'channel', 'age'],
    target_col='target',
    n_bins=3,
    normalize_target=True,
    bar_color='skyblue',
    line_color='darkorange',
    marker='s',
    save_path='output/feature_visuals.png'
)


In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math

def is_categorical(series, threshold=10):
    """범주형 판단: 고유값 개수 기준"""
    return series.nunique() <= threshold or series.dtype == 'object'

def plot_all_features(df, features, target_col, n_bins=3, max_cols=2, figsize=(14, 5)):
    """
    연속형 + 범주형 변수 모두 subplot으로 시각화
    """
    num_features = len(features)
    num_rows = math.ceil(num_features / max_cols)

    fig, axes = plt.subplots(num_rows, max_cols, figsize=(figsize[0], figsize[1] * num_rows))
    if num_features == 1:
        axes = np.array([axes])
    axes = axes.flatten()

    for idx, feature in enumerate(features):
        ax1 = axes[idx]
        ax2 = ax1.twinx()

        df_valid = df[[feature, target_col]].dropna()

        try:
            # --- 범주형 피처 처리 ---
            if is_categorical(df_valid[feature]):
                grouped = df_valid.groupby(feature).agg(
                    user_count=(feature, 'count'),
                    target_rate=(target_col, 'mean')
                ).reset_index()
                grouped['user_ratio'] = grouped['user_count'] / grouped['user_count'].sum()
                x_vals = grouped[feature].astype(str)

            # --- 연속형 피처 처리 ---
            else:
                df_valid['bin'] = pd.qcut(df_valid[feature], q=n_bins, duplicates='drop')
                grouped = df_valid.groupby('bin').agg(
                    user_count=(feature, 'count'),
                    target_rate=(target_col, 'mean')
                ).reset_index()
                grouped['user_ratio'] = grouped['user_count'] / grouped['user_count'].sum()
                x_vals = grouped['bin'].astype(str)

            # bar plot
            ax1.bar(x_vals, grouped['user_ratio'], color='skyblue')
            ax1.set_ylabel('user ratio')

            # line plot
            ax2.plot(x_vals, grouped['target_rate'], color='black', marker='o')
            ax2.set_ylabel('target_rate', color='black')

            ax1.set_title(f"target_rate by {feature}")
            ax1.tick_params(axis='x', rotation=0)

        except Exception as e:
            ax1.set_title(f"{feature} - 오류 발생")
            print(f"[ERROR] {feature}: {e}")
            continue

    # 나머지 subplot 빈칸 숨기기
    for i in range(len(features), len(axes)):
        fig.delaxes(axes[i])

    plt.tight_layout()
    plt.show()

# 예시 데이터
df = pd.DataFrame({
    'price': np.random.uniform(0, 300, 500),
    'period': np.random.randint(0, 1000, 500),
    'product_type': np.random.choice(['A', 'B', 'C'], 500),
    'channel': np.random.choice(['online', 'offline'], 500),
    'age': np.random.randint(20, 70, 500),
    'target': np.random.choice([0, 1], 500, p=[0.8, 0.2])
})

# 피처 리스트 (연속형 + 범주형 혼합)
features = ['price', 'period', 'product_type', 'channel', 'age']

# 실행
plot_all_features(df, features, target_col='target', n_bins=3, max_cols=2)
