In [None]:
# !pip install catboost
# !pip install shap

In [None]:
import shap
import pandas as pd
import matplotlib.pyplot as plt
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv('실습데이터.csv')

df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df = df.sort_values('Timestamp').reset_index(drop=True)
df['STATUS.xlsx'] = df['STATUS.xlsx'].astype(str)

# 학습 후 STATUS 각각으로 SHAP

In [None]:
le = LabelEncoder()
y = pd.Series(le.fit_transform(df['STATUS.xlsx']), index=df.index)
X = df.drop(columns=['STATUS.xlsx', 'Timestamp'])

for col in X.select_dtypes(include='object').columns:
    X[col] = LabelEncoder().fit_transform(X[col])

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

model = CatBoostClassifier(
    iterations=100,
    learning_rate=0.1,
    depth=6,
    loss_function='MultiClass',
    verbose=0
)
model.fit(X_train, y_train)

train_pool = Pool(X_train, label=y_train)
shap_values = model.get_feature_importance(train_pool, type="ShapValues")
shap_values = shap_values.transpose(0, 2, 1)
shap_values = shap_values[:, 1:, :] # bias 제거용

feature_names = X_train.columns
n_classes = shap_values.shape[2]

for i in range(n_classes):
    print(f"\n📊 SHAP summary plot for class {i}")
    shap.summary_plot(shap_values[:, :, i], X_train, feature_names=feature_names, plot_type="bar")
    shap.summary_plot(shap_values[:, :, i], X_train, feature_names=feature_names)


# STATUS 전체에 대한 SHAP

In [None]:
explainer = shap.TreeExplainer(model)

shap_values = explainer.shap_values(X_val)

shap.summary_plot(shap_values, X_val, plot_type="bar")

shap.summary_plot(shap_values, X_val)