In [None]:
# AutoGluon 설치 - colab에서 실행했음
!pip install autogluon --quiet

In [None]:
import time
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score
from sklearn.model_selection import train_test_split
from autogluon.tabular import TabularPredictor

seed = 2025

df = pd.read_csv('실습데이터.csv')
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df = df.sort_values('Timestamp')

label_col = 'STATUS.xlsx'
df[label_col] = df[label_col].astype(str)  

# Train Test Split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=seed, stratify=df['STATUS.xlsx'])

# Predictor 생성
predictor = TabularPredictor(label='STATUS.xlsx',  
                             eval_metric='f1_macro',
                             path='autogluon_model_f1macro')

# fitting
start = time.time()
predictor.fit(
    train_data=train_df,
    presets='best_quality',
    time_limit=3600,
    verbosity=2
)
end = time.time()
print(f"run time: {(end - start):.2f}초")

leaderboard_df = predictor.leaderboard(test_df, silent=True)
print(leaderboard_df)
leaderboard_df.to_csv('model_leaderboard_f1_macro.csv', index=False)

test_features = test_df.drop(columns=[label_col])
test_labels = test_df[label_col]

preds = predictor.predict(test_features)
probas = predictor.predict_proba(test_features)

print("예측 결과 (상위 5개):")
print(preds.head())
print("\n예측 확률 (상위 5개):")
print(probas.head())

# Confusion Matrix 
classes = sorted(test_labels.unique())
cm = confusion_matrix(test_labels, preds, labels=classes)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
disp.plot(cmap='Blues')
plt.title('Confusion Matrix (Test Set)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('confusion_matrix_f1_macro.png')
plt.show()

# f1_macro score
f1_macro = f1_score(test_labels, preds, average='macro')
print(f"f1_macro Score: {f1_macro:.4f}")

In [None]:
# Feature importance 및 t-SNE는 출력했음, notion에 업로드 완

In [None]:
# model load
# SHAP 보고싶은데 F1 Score가 가장 높은 WeightedEnsemble_L3은 SHAP 출력이 안되는 모델임,
# 그래서 출력이 되는 3순위 모델인 RF의 SHAP 출력하고자 함
# 여기서 진행이 안됨.

model = predictor._trainer.load_model('RandomForestEntr_BAG_L2')
import shap

# SHAP용 예측 함수 정의
def model_predict(X):
    return model.predict_proba(X)

# 샘플 데이터 준비 (SHAP 계산 시간 단축)
sample_data = features.sample(100, random_state=42)

# TreeExplainer 사용
explainer = shap.Explainer(model_predict, sample_data)
shap_values = explainer(sample_data)

# SHAP 시각화 (bee swarm plot)
shap.plots.beeswarm(shap_values)