## Demo Qlearning cho khoá luận tốt nghiệp

In [33]:
import random
import numpy as np
from collections import defaultdict

# -----------------------------
# Danh sách các trạng thái bạn muốn huấn luyện (có thể mở rộng)
# -----------------------------
initial_state_pool = [
    {
        "quiz_avg_scores": {"easy": 7.2, "medium": 2, "hard": None},
        "pdf_views": {"PDF_1_2_1": 100, "PDF_1_2_2": 0},
        "video_views": {"VID_1_2_1": 1, "VID_1_2_2": 0, "VID_1_2_3": 0},
        "forum_post_count": 1,
        "assignment_status": 0,
        "cluster": 2
    },
    {
        "quiz_avg_scores": {"easy": 5.5, "medium": None, "hard": None},
        "pdf_views": {"PDF_1_2_1": 2, "PDF_1_2_2": 1},
        "video_views": {"VID_1_2_1": 0, "VID_1_2_2": 2, "VID_1_2_3": 1},
        "forum_post_count": 0,
        "assignment_status": 0,
        "cluster": 1
    },
]

# -----------------------------
# Danh sách hành động có thể thực hiện
# -----------------------------
ACTIONS = [
    "xem_pdf:PDF_1_2_1",
    "xem_pdf:PDF_1_2_2",
    "xem_video:VID_1_2_1",
    "xem_video:VID_1_2_2",
    "xem_video:VID_1_2_3",
    "lam_quiz:QUIZ_1_2_EASY",
    "lam_quiz:QUIZ_1_2_MEDIUM",
    "lam_quiz:QUIZ_1_2_HARD",
    "tham_gia_thao_luan",
    "nop_assignment"
]

# -----------------------------
# Mock mô hình dự đoán điểm (Linear Regression)
# -----------------------------
def predict_easy_score(state): return 5.5 + 0.2 * random.random()
def predict_medium_score(state): return 5.0 + 0.5 * random.random()
def predict_hard_score(state): return 4.5 + 0.8 * random.random()

# -----------------------------
# Xác định quiz mục tiêu sắp tới (easy → medium → hard)
# -----------------------------
def determine_target_level(state):
    scores = state["quiz_avg_scores"]
    if scores["easy"] is None or scores["easy"] < 6: return "easy"
    if scores["medium"] is None or scores["medium"] < 6: return "medium"
    return "hard"

# -----------------------------
# Mô phỏng hành động để cập nhật state
# -----------------------------
def simulate_action(state, action):
    new_state = {
        "quiz_avg_scores": dict(state["quiz_avg_scores"]),
        "pdf_views": dict(state["pdf_views"]),
        "video_views": dict(state["video_views"]),
        "forum_post_count": state["forum_post_count"],
        "assignment_status": state["assignment_status"],
        "cluster": state["cluster"]
    }

    parts = action.split(":")
    act_type = parts[0]
    target = parts[1] if len(parts) > 1 else None

    if act_type == "xem_pdf":
        new_state["pdf_views"][target] = new_state["pdf_views"].get(target, 0) + 1
    elif act_type == "xem_video":
        new_state["video_views"][target] = new_state["video_views"].get(target, 0) + 1
    elif act_type == "tham_gia_thao_luan":
        new_state["forum_post_count"] += 1
    elif act_type == "nop_assignment":
        new_state["assignment_status"] = 1

    return new_state

# -----------------------------
# Vector hóa state để dùng làm key trong Q-table
# -----------------------------
def vectorize_state(state):
    return (
        round(state["quiz_avg_scores"]["easy"] if state["quiz_avg_scores"]["easy"] is not None else -1, 1),
        round(state["quiz_avg_scores"]["medium"] if state["quiz_avg_scores"]["medium"] is not None else -1, 1),
        round(state["quiz_avg_scores"]["hard"] if state["quiz_avg_scores"]["hard"] is not None else -1, 1),
        state["pdf_views"].get("PDF_1_2_1", 0),
        state["pdf_views"].get("PDF_1_2_2", 0),
        state["video_views"].get("VID_1_2_1", 0),
        state["video_views"].get("VID_1_2_2", 0),
        state["video_views"].get("VID_1_2_3", 0),
        state["forum_post_count"],
        state["assignment_status"],
        state["cluster"]
    )

# -----------------------------
# Tính reward dựa trên chênh lệch điểm dự đoán
# -----------------------------
def compute_reward(state, action):
    target = determine_target_level(state)
    current_score = state["quiz_avg_scores"][target] or 0
    next_state = simulate_action(state, action)

    if target == "easy":
        predicted_score = predict_easy_score(next_state)
    elif target == "medium":
        predicted_score = predict_medium_score(next_state)
    else:
        predicted_score = predict_hard_score(next_state)

    return predicted_score - current_score

# -----------------------------
# Huấn luyện Q-table
# -----------------------------
def train_q_table(episodes=5000, alpha=0.1, gamma=0.9, epsilon=1.0):
    Q = defaultdict(float)
    epsilon_decay = 0.995

    for episode in range(episodes):
        state = random.choice(initial_state_pool)

        for step in range(10):
            state_key = vectorize_state(state)

            # Chọn hành động theo epsilon-greedy
            if random.random() < epsilon:
                action = random.choice(ACTIONS)
            else:
                action = max(ACTIONS, key=lambda a: Q[(state_key, a)])

            # Tính reward & cập nhật Q-value
            reward = compute_reward(state, action)
            next_state = simulate_action(state, action)
            next_state_key = vectorize_state(next_state)
            future_q = max([Q[(next_state_key, a)] for a in ACTIONS])

            Q[(state_key, action)] += alpha * (reward + gamma * future_q - Q[(state_key, action)])

            # Cập nhật state
            state = next_state

        epsilon = max(0.05, epsilon * epsilon_decay)

    print(f"✅ Đã train xong {episodes} lượt, total Q-state-action: {len(Q)}")
    return Q

# -----------------------------
# Gợi ý hành động tiếp theo từ Q-table
# -----------------------------
def suggest_next_action(state, Q):
    state_key = vectorize_state(state)
    print(f"\n🧠 State key: {state_key}")

    scores = {a: Q.get((state_key, a), 0) for a in ACTIONS}
    for a, q in scores.items():
        print(f"🔹 {a}: Q = {q:.4f}")
    best_action = max(scores, key=scores.get)
    return best_action

In [34]:

def suggest_next_action(state, Q):
    state_key = vectorize_state(state)
    print(f"\n🧠 State key: {state_key}")
    scores = {a: Q.get((state_key, a), 0) for a in ACTIONS}
    for a, score in scores.items():
        print(f"🔹 {a}: Q = {score:.4f}")
    best_action = max(scores, key=scores.get)
    return best_action

In [35]:
Q = train_q_table(episodes=1000)


current_state = {
    "quiz_avg_scores": {"easy": 7.2, "medium": 2, "hard": None},
    "pdf_views": {"PDF_1_2_1": 100, "PDF_1_2_2": 0},
    "video_views": {"VID_1_2_1": 1, "VID_1_2_2": 0, "VID_1_2_3": 0},
    "forum_post_count": 1,
    "assignment_status": 0,
    "cluster": 2
}

action = suggest_next_action(current_state, Q)
print("👉 Gợi ý học tiếp theo:", action)

✅ Đã train xong 1000 lượt, total Q-state-action: 7500

🧠 State key: (7.2, 2, -1, 100, 0, 1, 0, 0, 1, 0, 2)
🔹 xem_pdf:PDF_1_2_1: Q = 9.6472
🔹 xem_pdf:PDF_1_2_2: Q = 15.7057
🔹 xem_video:VID_1_2_1: Q = 16.9541
🔹 xem_video:VID_1_2_2: Q = 13.2341
🔹 xem_video:VID_1_2_3: Q = 6.6554
🔹 lam_quiz:QUIZ_1_2_EASY: Q = 30.8700
🔹 lam_quiz:QUIZ_1_2_MEDIUM: Q = 31.3039
🔹 lam_quiz:QUIZ_1_2_HARD: Q = 32.5837
🔹 tham_gia_thao_luan: Q = 17.8575
🔹 nop_assignment: Q = 13.4267
👉 Gợi ý học tiếp theo: lam_quiz:QUIZ_1_2_HARD
