## demo Qlearning

In [35]:
import random
import numpy as np
from collections import defaultdict
import pandas as pd

# -----------------------------
# Danh sách hành động cho mục 1_2
# -----------------------------
ACTIONS = [
    "xem_pdf:PDF_1_2_1",
    "xem_pdf:PDF_1_2_2",
    "xem_video:VID_1_2_1",
    "xem_video:VID_1_2_2",
    "xem_video:VID_1_2_3",
    "lam_quiz:QUIZ_1_2_EASY",
    "lam_quiz:QUIZ_1_2_MEDIUM",
    "lam_quiz:QUIZ_1_2_HARD",
    "tham_gia_thao_luan:",
    "nop_assignment:"
]

initial_state_pool = [
    {
        f"quiz_avg_easy_1_2_1": 6.0,
        f"quiz_avg_medium_1_2_1": 4.2,
        f"quiz_avg_hard_1_2_1": -1,
        f"pdf_views_1_2_1": 2,
        f"video_views_1_2_1": 3,
        "forum_post_count": 1,
        "assignment_status": 0,
        "cluster": 0
    },
    {
        f"quiz_avg_easy_1_2_1": 7.5,
        f"quiz_avg_medium_1_2_1": 6.0,
        f"quiz_avg_hard_1_2_1": 3.2,
        f"pdf_views_1_2_1": 5,
        f"video_views_1_2_1": 8,
        "forum_post_count": 2,
        "assignment_status": 1,
        "cluster": 0
    },
    {
        f"quiz_avg_easy_1_2_1": 3.0,
        f"quiz_avg_medium_1_2_1": 2.5,
        f"quiz_avg_hard_1_2_1": -1,
        f"pdf_views_1_2_1": 1,
        f"video_views_1_2_1": 1,
        "forum_post_count": 0,
        "assignment_status": 0,
        "cluster": 1
    },
    {
        f"quiz_avg_easy_1_2_1": 5.5,
        f"quiz_avg_medium_1_2_1": 5.0,
        f"quiz_avg_hard_1_2_1": -1,
        f"pdf_views_1_2_1": 3,
        f"video_views_1_2_1": 2,
        "forum_post_count": 1,
        "assignment_status": 0,
        "cluster": 1
    }
]
# -----------------------------
# Dự đoán điểm với model thật
# -----------------------------
def predict_score(state, level, section, models_dict):
    X = []

    X.append(state[f"video_views_{section}_1"])
    X.append(state[f"pdf_views_{section}_1"])

    if level == "medium":
        X.append(state[f"quiz_avg_easy_{section}_1"])
    elif level == "hard":
        X.append(state[f"quiz_avg_easy_{section}_1"])
        X.append(state[f"quiz_avg_medium_{section}_1"])

    X.append(state["forum_post_count"])
    X.append(state["assignment_status"])
    X.append(state["cluster"])
   
    model_key = f"{level}_{section}"
    model = models_dict[model_key]

    X_df = pd.DataFrame([X], columns=model.feature_names_in_)
    return model.predict(X_df)[0]

# -----------------------------
# Xác định level mục tiêu tiếp theo
# -----------------------------
def determine_target_level(state, section):
    easy = state.get(f"quiz_avg_easy_{section}_1", None)
    medium = state.get(f"quiz_avg_medium_{section}_1", None)
    hard = state.get(f"quiz_avg_hard_{section}_1", None)

    if easy is None or easy < 6: return "easy"
    if medium is None or medium < 6: return "medium"
    return "hard"

# -----------------------------
# Giả lập action → sinh ra state mới
# -----------------------------
def simulate_action(state, action, section):
    # state = dict(state)  # clone
    state = dict(random.choice(initial_state_pool)) 
    
    parts = action.split(":")
    act_type = parts[0]
    target = parts[1] if len(parts) > 1 else None

    if act_type == "xem_pdf":
        state[f"pdf_views_{section}_1"] += 1
    elif act_type == "xem_video":
        state[f"video_views_{section}_1"] += 1
    elif act_type == "tham_gia_thao_luan":
        state["forum_post_count"] += 1
    elif act_type == "nop_assignment":
        state["assignment_status"] = 1

    return state

# -----------------------------
# Tính reward bằng Linear Regression thật
# -----------------------------
def compute_reward(state, action, section, models_c0, models_c1):
    cluster = state["cluster"]
    models = models_c0 if cluster == 0 else models_c1

    target_level = determine_target_level(state, section)
    current_score = state.get(f"quiz_avg_{target_level}_{section}_1", 0)

    next_state = simulate_action(state, action, section)
    predicted_score = predict_score(next_state, target_level, section, models)

    return predicted_score - current_score

# -----------------------------
# Vector hóa state
# -----------------------------
def vectorize_state(state, section):
    return (
        round(state[f"quiz_avg_easy_{section}_1"], 1),
        round(state[f"quiz_avg_medium_{section}_1"], 1),
        round(state[f"quiz_avg_hard_{section}_1"], 1),
        state[f"pdf_views_{section}_1"],
        state[f"video_views_{section}_1"],
        state["forum_post_count"],
        state["assignment_status"],
        state["cluster"]
    )

# -----------------------------
# Q-learning
# -----------------------------
def train_q_table(section, models_c0, models_c1, episodes=1000):
    Q = defaultdict(float)

    for ep in range(episodes):
        state = {
            f"quiz_avg_easy_{section}_1": round(random.uniform(3, 7), 1),
            f"quiz_avg_medium_{section}_1": round(random.uniform(1, 6), 1),
            f"quiz_avg_hard_{section}_1": -1.0,
            f"pdf_views_{section}_1": random.randint(0, 5),
            f"video_views_{section}_1": random.randint(0, 5),
            "forum_post_count": random.randint(0, 3),
            "assignment_status": random.randint(0, 1),
            "cluster": random.randint(0, 1)
        }

        for step in range(10):
            s_key = vectorize_state(state, section)

            # Chọn hành động
            action = random.choice(ACTIONS)

            # Reward
            reward = compute_reward(state, action, section, models_c0, models_c1)

            # Q-learning update
            next_state = simulate_action(state, action, section)
            next_s_key = vectorize_state(next_state, section)
            future_q = max([Q[(next_s_key, a)] for a in ACTIONS])
            Q[(s_key, action)] += 0.1 * (reward + 0.9 * future_q - Q[(s_key, action)])

            state = next_state

    print(f"✅ Đã train xong {episodes} lượt Q-learning với mục {section}")
    return Q

### Cách dùng:

In [36]:
import joblib
section = "1_2"

models_c0 = joblib.load("models/models_cluster_0.pkl")
models_c1 = joblib.load("models/models_cluster_1.pkl")
Q = train_q_table(section, models_c0, models_c1)

✅ Đã train xong 1000 lượt Q-learning với mục 1_2


###  Gợi ý hành động cho học viên cụ thể:

In [37]:
def suggest_next_action(state, Q, section):
    s_key = vectorize_state(state, section)
    actions_q = {a: Q.get((s_key, a), 0) for a in ACTIONS}

    print("🧠 Q-values:")
    for a, q in actions_q.items():
        print(f"  {a:25} → Q = {q:.3f}")

    best = max(actions_q, key=actions_q.get)
    return best

### Ví dụ test:

In [38]:
test_states = [
    {
        f"quiz_avg_easy_1_2_1": 6.0,
        f"quiz_avg_medium_1_2_1": 4.2,
        f"quiz_avg_hard_1_2_1": -1,
        f"pdf_views_1_2_1": 2,
        f"video_views_1_2_1": 3,
        "forum_post_count": 1,
        "assignment_status": 0,
        "cluster": 0
    },
    {
        f"quiz_avg_easy_1_2_1": 7.5,
        f"quiz_avg_medium_1_2_1": 6.0,
        f"quiz_avg_hard_1_2_1": 3.2,
        f"pdf_views_1_2_1": 5,
        f"video_views_1_2_1": 4,
        "forum_post_count": 2,
        "assignment_status": 1,
        "cluster": 0
    },
    {
        f"quiz_avg_easy_1_2_1": 7.5,
        f"quiz_avg_medium_1_2_1": 6.0,
        f"quiz_avg_hard_1_2_1": 3.2,
        f"pdf_views_1_2_1": 5,
        f"video_views_1_2_1": 8,
        "forum_post_count": 2,
        "assignment_status": 1,
        "cluster": 0
    }
]
for i, test_state in enumerate(test_states, start=1):
    print(f"\n=== 🧑‍🎓 Học viên {i} ===")
    action = suggest_next_action(test_state, Q, section="1_2")
    print(f"👉 Gợi ý học tiếp theo: {action}")


=== 🧑‍🎓 Học viên 1 ===
🧠 Q-values:
  xem_pdf:PDF_1_2_1         → Q = 6.754
  xem_pdf:PDF_1_2_2         → Q = 6.855
  xem_video:VID_1_2_1       → Q = 6.720
  xem_video:VID_1_2_2       → Q = 8.682
  xem_video:VID_1_2_3       → Q = 8.324
  lam_quiz:QUIZ_1_2_EASY    → Q = 6.883
  lam_quiz:QUIZ_1_2_MEDIUM  → Q = 6.789
  lam_quiz:QUIZ_1_2_HARD    → Q = 7.413
  tham_gia_thao_luan:       → Q = 4.725
  nop_assignment:           → Q = 5.767
👉 Gợi ý học tiếp theo: xem_video:VID_1_2_2

=== 🧑‍🎓 Học viên 2 ===
🧠 Q-values:
  xem_pdf:PDF_1_2_1         → Q = 0.000
  xem_pdf:PDF_1_2_2         → Q = 0.000
  xem_video:VID_1_2_1       → Q = 0.000
  xem_video:VID_1_2_2       → Q = 0.000
  xem_video:VID_1_2_3       → Q = 0.000
  lam_quiz:QUIZ_1_2_EASY    → Q = 0.000
  lam_quiz:QUIZ_1_2_MEDIUM  → Q = 0.000
  lam_quiz:QUIZ_1_2_HARD    → Q = 0.000
  tham_gia_thao_luan:       → Q = 0.000
  nop_assignment:           → Q = 0.000
👉 Gợi ý học tiếp theo: xem_pdf:PDF_1_2_1

=== 🧑‍🎓 Học viên 3 ===
🧠 Q-values:
  xem_pd