In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import numpy as np
import pickle

In [None]:
# 데이터 로드
train_data = pd.read_excel('train_data.xlsx')
train_data = train_data.drop(['상품','RRC_CD','HAC_CD','지역코드','AD_NO','주거지','주소지','ADD_YN','SP등급','결과값(연체회차)'], axis=1)

In [None]:
# 학습에 사용할 특성과 레이블 분리
X = train_data.select_dtypes(include=[np.number])
y = train_data['CB등급']

In [None]:
# 데이터 전처리
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
# 학습 데이터와 검증 데이터 분리
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# 전처리 저장
with open('train_data_scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

In [None]:
# 모델 생성
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1)
])

In [None]:
# 모델 컴파일
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# 모델 학습
model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

In [17]:
# 모델 저장
model.save('CB_model.h5')

### feature importance 추출

In [None]:
from sklearn.inspection import permutation_importance
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

def create_model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# 모델을 scikit-learn에서 사용할 수 있도록 감싸기
keras_reg = KerasRegressor(build_fn=create_model, epochs=10, batch_size=10, verbose=0)

# 학습된 모델의 가중치를 감싼 모델에 할당
keras_reg.model = model

# PFI 계산
result = permutation_importance(keras_reg, X_val, y_val, n_repeats=10, random_state=42)

# 특성 이름과 중요도를 DataFrame으로 만들기
feature_importances = pd.DataFrame(
    {'importance_mean': result.importances_mean, 'importance_std': result.importances_std},
    index=train_data.select_dtypes(include=[np.number]).columns) # Make sure to use the correct feature names

# 중요도에 따라 내림차순 정렬
feature_importances = feature_importances.sort_values(by='importance_mean', ascending=False)

# 중요도를 엑셀 파일로 저장
feature_importances.to_excel('feature_importances.xlsx')


  keras_reg = KerasRegressor(build_fn=create_model, epochs=10, batch_size=10, verbose=0)


In [None]:
import shap

# 모델의 예측 함수를 설명하기 위해 SHAP Explainer 객체를 생성
explainer = shap.DeepExplainer(model, X_train)

# 특성 중요도를 계산
shap_values = explainer.shap_values(X_val)

# 중요도를 DataFrame으로 변환
feature_importances = pd.DataFrame(shap_values[0], columns=train_data.columns)

# 각 특성의 평균 절대 SHAP 값을 계산
feature_importances = feature_importances.abs().mean().sort_values(ascending=False)

# 중요도를 엑셀 파일로 저장
feature_importances.to_excel('feature_importances.xlsx')
