# ML Exam Template: Best Accuracy (from provided list)
เลือกวิธีที่ให้ accuracy สูงสุดจากรายการที่ให้มา
---

## REGRESSION
### Polynomial Regression (Best for non-linear tabular data in list)

In [None]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import pandas as pd
# df = pd.read_csv('your_data.csv')
# X = df.drop('target', axis=1)
# y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
model = LinearRegression()
model.fit(X_train_poly, y_train)
y_pred = model.predict(X_test_poly)
print('R2:', r2_score(y_test, y_pred))
print('RMSE:', mean_squared_error(y_test, y_pred, squared=False))
print('MAE:', mean_absolute_error(y_test, y_pred))

---
## CLASSIFICATION
### Random Forest (Best ensemble in list for tabular data)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd
# df = pd.read_csv('your_data.csv')
# X = df.drop('target', axis=1)
# y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
params = {
    'n_estimators': [100, 200],
    'max_depth': [3, 6]
}
rf = RandomForestClassifier(random_state=42)
grid = GridSearchCV(rf, params, cv=5, scoring='accuracy', n_jobs=-1)
grid.fit(X_train_scaled, y_train)
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test_scaled)
print('Accuracy:', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print('Confusion matrix:')
print(confusion_matrix(y_test, y_pred))

---
## (Optional) KFold Cross-Validation Example
ใช้สำหรับประเมินโมเดลแบบ cross-validation (ถ้าอยากโชว์ best practice)

In [None]:
# Polynomial Regression with KFold
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
import numpy as np
# df = pd.read_csv('your_data.csv')
# X = df.drop('target', axis=1)
# y = df['target']
model = make_pipeline(StandardScaler(), PolynomialFeatures(degree=2, include_bias=False), LinearRegression())
cv = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X, y, cv=cv, scoring='r2')
print('KFold R2 scores:', scores)
print('Mean R2:', np.mean(scores))

In [None]:
# Random Forest Classification with KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import numpy as np
# df = pd.read_csv('your_data.csv')
# X = df.drop('target', axis=1)
# y = df['target']
model = make_pipeline(StandardScaler(), RandomForestClassifier(random_state=42))
cv = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X, y, cv=cv, scoring='accuracy')
print('KFold accuracy scores:', scores)
print('Mean accuracy:', np.mean(scores))

---
## ตัวอย่างการนำโมเดล Regression ไปใช้กับข้อมูลใหม่
Predict เงินเดือนพนักงานใหม่จากโมเดลที่เทรนไว้

In [None]:
# ตัวอย่างการใช้โมเดล Regression ทำนายข้อมูลใหม่
new = pd.DataFrame({
    'YearsExperience': [6.5],
    'EducationLevel': [2],
    'PerformanceScore': [82]
})
# ถ้าใช้ PolynomialFeatures ต้อง transform ก่อน
# new_poly = poly.transform(new)
# pred_salary = model.predict(new_poly)
# print(f"Predicted Salary: {pred_salary[0]:.2f} ฿")
# ถ้าใช้ LinearRegression ธรรมดา
# pred_salary = model.predict(new)
# print(f"Predicted Salary: {pred_salary[0]:.2f} ฿")

---
## ตัวอย่างการนำโมเดล Classification ไปใช้กับข้อมูลใหม่
Predict โรคหัวใจจากข้อมูลผู้ป่วยใหม่

In [None]:
# ตัวอย่างการใช้โมเดล Classification ทำนายข้อมูลใหม่
new_patient = pd.DataFrame({
    'Age': [54],
    'Sex': [1],
    'ChestPain': [2],
    'RestingBP': [145],
    'Cholesterol': [265],
    'FastingBS': [1],
    'MaxHR': [138]
})
# ถ้าใช้ StandardScaler ต้อง transform ก่อน
# new_scaled = scaler.transform(new_patient)
# pred = model.predict(new_scaled)
# print(f"Predicted: {pred[0]}")
# ถ้าใช้ RandomForest ธรรมดา
# pred = model.predict(new_patient)
# print(f"Predicted: {pred[0]}")