In [29]:
import joblib
import torch
import json
import torch.nn as nn

from modules.predict_noshow_proba_df import predict_noshow_proba_df
from modules.one_hot_module import build_df_onehot, fetch_df, rows_to_df_onehot
from NoShowMLP_KDY import NoShowMLP_KDY


In [6]:
rows = fetch_df("appointment")

with open("artifacts/feature_columns.json", "r", encoding="utf-8") as f:
    feature_cols = json.load(f)

scaler = joblib.load("artifacts/scaler.joblib")
input_dim = len(feature_cols)

device = "cuda" if torch.cuda.is_available() else "cpu"

model = NoShowMLP_KDY(input_dim=input_dim)
state = torch.load("artifacts/mlp_model.pt", map_location=device)
model.load_state_dict(state)
model.to(device)
model.eval()

df = rows_to_df_onehot(rows)

rows["no_show_prob"] = predict_noshow_proba_df(model, scaler, df)["no_show_prob"]

In [19]:
rows.columns

Index(['appointment_id', 'appointment_datetime', 'appointment_date', 'name',
       'specialty', 'gender', 'age', 'under_12_years_old', 'over_60_years_old',
       'patient_needs_companion', 'disability', 'no_show', 'icd',
       'entry_service_date', 'no_show_prob'],
      dtype='object')

In [28]:
type(rows.appointment_datetime[0]) == pandas._libs.tslibs.timestamps.Timestamp

True

In [36]:
import pandas as pd
from sklearn.model_selection import train_test_split

# 1) 원본 데이터 로드 (경로 맞게 수정)
df = fetch_df("appointment")

# 2) train / test 분리
train_df, test_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42,
    stratify=df["no_show"]   # 분류 문제면 강추
)

# 3) test 데이터 저장 (Streamlit에서 업로드할 파일)
test_df.to_csv("artifacts/eval_test.csv", index=False)

print("Saved eval_test.csv")
print(test_df.shape)


Saved eval_test.csv
(9918, 14)
