In [None]:
# ==========================================
# STEP 0: Import Libraries
# ==========================================
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras.callbacks import EarlyStopping


# ==========================================
# STEP 1: Load & Preprocess Data
# ==========================================
url = "https://raw.githubusercontent.com/pakornlee/ml_example/e4fe04e97b387f17aaabb53709391f4c364a06c0/customer_data_100.csv"
df = pd.read_csv(url)

# Ordinal Encoding
education_map = {
    "HighSchool": 1,
    "Bachelor": 2,
    "Master": 3,
    "PhD": 4
}
df["education_level"] = df["education_level"].map(education_map)

# One-Hot Encoding (Nominal)
df = pd.get_dummies(
    df,
    columns=["job_type", "city"],
    drop_first=True
)

# Feature / Target
X = df.drop("buy_product", axis=1)
y = df["buy_product"]

print("Data shape:", X.shape)


# ==========================================
# STEP 2: Split CV (90%) & Evaluation (10%)
# ==========================================
X_cv, X_eval, y_cv, y_eval = train_test_split(
    X,
    y,
    test_size=0.10,
    random_state=42,
    stratify=y
)

# Neural Network ต้อง scale
scaler = StandardScaler()
X_cv = scaler.fit_transform(X_cv)
X_eval = scaler.transform(X_eval)

print("CV set size:", X_cv.shape)
print("Evaluation set size:", X_eval.shape)


# ==========================================
# STEP 3: Build MLP with Regularization
# ==========================================
model = Sequential([
    Dense(
        32,
        activation="relu",
        input_shape=(X_cv.shape[1],),
        kernel_regularizer=l1(0.001)    # L1 Regularization
    ),
    Dropout(0.3),                      # Dropout

    Dense(
        16,
        activation="relu",
        kernel_regularizer=l2(0.001)    # L2 Regularization
    ),
    Dropout(0.3),                      # Dropout

    Dense(1, activation="sigmoid")
])

model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)


# ==========================================
# STEP 3.1: Early Stopping
# ==========================================
early_stop = EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)


# ==========================================
# STEP 3.2: Train Model (Validation Split)
# ==========================================
history = model.fit(
    X_cv,
    y_cv,
    epochs=200,
    batch_size=16,
    validation_split=0.2,   # Validation set จาก CV set
    callbacks=[early_stop],
    verbose=1
)

print("\nTraining stopped at epoch:", len(history.history["loss"]))


# ==========================================
# STEP 4: Final Evaluation on 10% Hold-out Set
# ==========================================
y_eval_pred_prob = model.predict(X_eval)
y_eval_pred = (y_eval_pred_prob > 0.5).astype(int)

print("\nEvaluation Set Accuracy:")
print(accuracy_score(y_eval, y_eval_pred))

print("\nClassification Report (Evaluation Set):")
print(classification_report(y_eval, y_eval_pred))


Data shape: (100, 6)
CV set size: (90, 6)
Evaluation set size: (10, 6)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 70ms/step - accuracy: 0.5327 - loss: 0.7006 - val_accuracy: 0.3333 - val_loss: 0.7552
Epoch 2/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.4554 - loss: 0.7473 - val_accuracy: 0.4444 - val_loss: 0.7139
Epoch 3/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.5616 - loss: 0.6989 - val_accuracy: 0.5000 - val_loss: 0.6786
Epoch 4/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.6021 - loss: 0.6700 - val_accuracy: 0.6111 - val_loss: 0.6451
Epoch 5/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.7630 - loss: 0.5683 - val_accuracy: 0.7222 - val_loss: 0.6147
Epoch 6/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.6794 - loss: 0.5927 - val_accuracy: 0.9444 - val_loss: 0.5868
Epoch 7/200
[1m5/5[0m [32m━━━━━━━━━━━