In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import gc

from google.colab import drive
drive.mount('/content/drive')

import sklearn
from sklearn.utils.class_weight import compute_class_weight
import imblearn
from imblearn.over_sampling import SMOTE
import xgboost as xgb
from xgboost import XGBClassifier

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### 파일패스 설정하기

In [None]:
data_type = "train"
category = "채널정보"

# local
# root_path = '../data/open'

# colab
# root_path = '/content/drive/MyDrive/12조 파이널프로젝트/data'

#drive_folder = f'{root_path}/{data_type}/6.채널정보/'

In [None]:
train_df = pd.read_parquet(f'{root_path}/{data_type}_channel_cleaned.parquet')

feature_cols = [col for col in train_df.columns if col not in ["ID", "Segment"]]
X = train_df[feature_cols].copy()
y = train_df["Segment"].copy()
y = y.map({'A':0, 'B':1,'C':2,'D':3,'E':4})

del train_df
gc.collect()

# 클래스 weight 계산
classes = np.unique(y)
weights = compute_class_weight(class_weight='balanced', classes=classes, y=y)
class_weights = dict(zip(classes, weights))

# 각 샘플에 대해 weight 매핑
w_train = pd.Series(y).map(class_weights)

# 전체 feature로 XGBoost 학습 (변수 중요도 추출용)
temp_model = xgb.XGBClassifier(
    objective='multi:softprob',
    num_class=5,
    eval_metric='mlogloss',
    n_estimators=300,
    tree_method='hist',
    device='cuda',
    random_state=42
    )

temp_model.fit(X, y, sample_weight = w_train, verbose=False)

In [None]:
# XGBoost 기준 중요도 상위 300개 변수 추출
importance_df = pd.DataFrame({
    'feature': X.columns,
    'importance': temp_model.feature_importances_
}).sort_values(by='importance', ascending=False)

top30_features = importance_df.head(30)['feature'].tolist()

print(top30_features)

top30_df = pd.DataFrame({'feature': top30_features})
top30_df.to_csv(
    f"{root_path}/results/top30_features_XGB_balanced.csv",
    index=False,
    encoding="utf-8-sig"
)

['불만제기후경과월_R12M', '방문월수_PC_R6M', '상담건수_R6M', '홈페이지_금융건수_R6M', 'IB문의건수_분실도난_R6M', '방문후경과월_앱_R6M', '방문일수_앱_B0M', '당사멤버쉽_방문월수_R6M', '홈페이지_선결제건수_R6M', '홈페이지_금융건수_R3M', '방문후경과월_PC_R6M', '인입횟수_ARS_R6M', '방문일수_모바일웹_R6M', '방문일수_PC_B0M', '방문월수_모바일웹_R6M', '인입월수_IB_R6M', 'IB문의건수_사용승인내역_R6M', '이용메뉴건수_ARS_B0M', '방문횟수_PC_B0M', '인입일수_ARS_R6M', '방문횟수_앱_R6M', '방문일수_앱_R6M', '이용메뉴건수_IB_R6M', '방문횟수_PC_R6M', 'IB문의건수_CL_RV_R6M', '당사멤버쉽_방문횟수_R6M', '인입횟수_IB_R6M', '방문월수_앱_R6M', '인입후경과월_IB_R6M', 'IB문의건수_결제_R6M']
