# Practice Exam 2 - Classification (Decision Tree)
โจทย์: ใช้ชุดข้อมูลสังเคราะห์ทำนายว่าผู้ป่วยมีโรค (1) หรือไม่ (0).
- แบ่ง train/test 80/20
- Scale ด้วย StandardScaler
- หา best max_depth ระหว่าง 1-20
- แสดง classification report ของโมเดลที่ดีที่สุด

In [6]:
# Import libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

np.random.seed(0)
N = 200
age = np.random.randint(30, 80, size=N)
chol = np.random.randint(150, 300, size=N)
bp = np.random.randint(100, 180, size=N)
label = ((age > 50) & (chol > 220)) | (bp > 160)
df = pd.DataFrame({'Age': age, 'Cholesterol': chol, 'RestingBP': bp, 'HeartDisease': label.astype(int)})
print(df.head())

   Age  Cholesterol  RestingBP  HeartDisease
0   74          229        142             1
1   77          191        165             1
2   30          168        120             0
3   33          190        136             0
4   33          161        168             1


In [10]:
X = df.drop('HeartDisease', axis=1)
y = df['HeartDisease']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

best_max_depth = None
best_accuracy = 0

model = DecisionTreeClassifier(random_state=42)
for depth in range(1, 21):
    model.set_params(max_depth=depth)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_max_depth = depth
print(f"Best max_depth: {best_max_depth} with accuracy: {best_accuracy:.4f}")
model.set_params(max_depth=best_max_depth)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

Best max_depth: 3 with accuracy: 1.0000
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        17

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40

[[23  0]
 [ 0 17]]
