In [None]:
#----------------          ------------------------------------------------#
#                     Import Libraries และตั้งค่าไฟล์พื้นฐาน
#----------------          ------------------------------------------------#
from pathlib import Path
import json
import pandas as pd
import numpy as np
from IPython.display import display  # ✅ สำหรับ Jupyter/VS Code notebooks

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

import matplotlib.pyplot as plt

#----------------          ------------------------------------------------#
#                กำหนด Path และเตรียมโฟลเดอร์
#----------------          ------------------------------------------------#
BASE = Path("AI_project_master")
DATA_PATH = BASE / "data" / "earthquakes.csv"
METRICS_DIR = BASE / "metrics"
MODELS_DIR  = BASE / "models"
FIG_DIR     = METRICS_DIR / "figs"

METRICS_DIR.mkdir(parents=True, exist_ok=True)
MODELS_DIR.mkdir(parents=True, exist_ok=True)
FIG_DIR.mkdir(parents=True, exist_ok=True)

#----------------          ------------------------------------------------#
#               โหลดและสำรวจข้อมูล
#----------------          ------------------------------------------------#
df = pd.read_csv(DATA_PATH)
display(df.head(), df.describe(include='all'))
print("Missing per column:\n", df.isna().sum())

#----------------          ------------------------------------------------#
#               ทำความสะอาดคอลัมน์ตัวเลข (ตัด cdi ออก)
#----------------          ------------------------------------------------#
# ใช้เฉพาะ magnitude, depth, mmi, sig
numeric_wanted = ["magnitude", "depth", "mmi", "sig"]
num_cols = [c for c in numeric_wanted if c in df.columns]

# แปลงเป็นตัวเลขและเติมค่าว่างด้วย median
df[num_cols] = df[num_cols].apply(pd.to_numeric, errors="coerce")
df[num_cols] = df[num_cols].fillna(df[num_cols].median())

#----------------          ------------------------------------------------#
#               เตรียมข้อมูลเป้าหมาย (Target)
#----------------          ------------------------------------------------#
assert "alert" in df.columns, "ต้องมีคอลัมน์ 'alert'"
le = LabelEncoder()
y = le.fit_transform(df["alert"].astype(str))

#----------------          ------------------------------------------------#
#            สร้างโมเดล Decision Tree (ไม่มี cdi)
#----------------          ------------------------------------------------#
# เลือกเฉพาะฟีเจอร์ที่มีจริงใน df
features = [c for c in numeric_wanted if c in df.columns]
assert len(features) > 0, "ไม่พบคอลัมน์ฟีเจอร์ที่ต้องการในข้อมูล"

X = df[features]

# แบ่ง train/test โดยรักษาสัดส่วนคลาส
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

best_params = {"max_depth": 20, "min_samples_split": 2}
clf = DecisionTreeClassifier(random_state=42, **best_params).fit(X_train, y_train)

#----------------          ------------------------------------------------#
#               ประเมินผลโมเดล (Evaluation)
#----------------          ------------------------------------------------#
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy (Test): {acc:.4f}  -> {acc*100:.2f}%")
print("\nClassification report:\n",
      classification_report(y_test, y_pred, target_names=le.classes_, zero_division=0))

# (เสริม) Confusion Matrix ไว้ดูภาพรวมถูกผิดของแต่ละคลาส
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels=le.classes_)
disp.plot(cmap="Blues")
plt.title("Confusion Matrix (Decision Tree, no CDI)")
plt.tight_layout()
plt.savefig(FIG_DIR / "confusion_matrix_no_cdi.png", dpi=200)
plt.show()

# (เสริม) Cross-Validation 5 fold เพื่อดูเสถียรภาพ
cv_scores = cross_val_score(
    DecisionTreeClassifier(random_state=42, **best_params),
    X, y, cv=5, scoring="accuracy"
)
print("5-fold CV scores:", cv_scores.round(4),
      " | mean =", round(cv_scores.mean(), 4),
      " | std =", round(cv_scores.std(), 4))


FileNotFoundError: [Errno 2] No such file or directory: 'AI_project_master\\data\\earthquakes.csv'

In [12]:
from pathlib import Path
import pandas as pd
import joblib
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# ----------------------------
# โหลดข้อมูลและโมเดล
# ----------------------------
BASE = Path("AI_project_master")
DATA_PATH = BASE / "data" / "earthquakes.csv"
MODEL_PATH = BASE / "models" / "earthquake_model.pkl"
ENC_PATH   = BASE / "models" / "label_encoder.pkl"

# โหลดไฟล์
df = pd.read_csv(DATA_PATH)
model = joblib.load(MODEL_PATH)
le = joblib.load(ENC_PATH)

# ----------------------------
# เตรียมข้อมูล
# ----------------------------
features = ["magnitude", "depth", "cdi", "mmi", "sig"]
df[features] = df[features].apply(pd.to_numeric, errors="coerce").fillna(df[features].median())

# เข้ารหัส target
y = le.transform(df["alert"].astype(str))
X = df[features]

# ----------------------------
# แบ่ง train/test
# ----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# ----------------------------
# ทดสอบความแม่นยำ
# ----------------------------
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f" ความแม่นยำของโมเดล (Accuracy): {acc*100:.2f}%")


 ความแม่นยำของโมเดล (Accuracy): 88.85%
