# Practice Exam 4 - Classification (Logistic Regression + SVM)
โจทย์: สร้างโมเดลเพื่อทำนายการซื้อสินค้าจาก Features: Age, Income, BrowsingTime.
- สร้าง LogisticRegression และ SVM
- เทียบ accuracy และ F1
- แสดง confusion matrix ของโมเดลที่ให้ F1 สูงสุด

In [1]:
# Import libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

np.random.seed(2)
N = 300
age = np.random.randint(18, 70, N)
income = np.random.randint(20000, 150000, N)
time = np.round(np.random.exponential(5, N), 1)
buy = ((income > 60000) & (time > 3)) | (age < 30)
df = pd.DataFrame({'Age': age, 'Income': income, 'BrowsingTime': time, 'Buy': buy.astype(int)})
print(df.head())

   Age  Income  BrowsingTime  Buy
0   58  111829           2.1    0
1   33  141426           5.2    1
2   63   98714           1.1    0
3   26   80424          11.3    1
4   40  129579           8.4    1


In [8]:
x = df.drop('Buy', axis=1)
y = df['Buy']

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.2, random_state=42)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

models = {
    'Logistic Regression': LogisticRegression(),
    'SVM': SVC(kernel='rbf', probability=True, random_state=42)
}

for name, model in models.items():
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    print(f"\n{name} Performance:")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(f"F1 Score: {f1_score(y_test, y_pred):.4f}")

best_model = max(models, key=lambda x: f1_score(y_test, models[x].predict(x_test)))
print(f"\nBest Model: {best_model}")
print(classification_report(y_test, models[best_model].predict(x_test)))
print(confusion_matrix(y_test, models[best_model].predict(x_test)))


Logistic Regression Performance:
Accuracy: 0.7500
F1 Score: 0.7000

SVM Performance:
Accuracy: 0.8333
F1 Score: 0.7980

Best Model: SVM
              precision    recall  f1-score   support

           0       0.77      0.97      0.86       125
           1       0.95      0.69      0.80       115

    accuracy                           0.83       240
   macro avg       0.86      0.83      0.83       240
weighted avg       0.86      0.83      0.83       240

[[121   4]
 [ 36  79]]
