## Demo Linear regression - V1 - State đơn giản là LO


In [None]:

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import joblib, os
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import joblib, pathlib

In [None]:
import random
import numpy as np
from collections import defaultdict
import pandas as pd

# -----------------------------
# Danh sách hành động và trạng thái (LOs) cho mục 1_2
# -----------------------------
ACTIONS = [
    "xem_pdf:PDF_1_2_1",
    "xem_pdf:PDF_1_2_2",
    "xem_video:VID_1_2_1",
    "xem_video:VID_1_2_2",
    "xem_video:VID_1_2_3",
    "lam_quiz:QUIZ_1_2_EASY",
    "lam_quiz:QUIZ_1_2_MEDIUM",
    "lam_quiz:QUIZ_1_2_HARD",
    "tham_gia_thao_luan:",
    "nop_assignment:"
]

# Trạng thái là các LO
STATES = [action.split(":")[1] if ":" in action else action for action in ACTIONS]

# Pool dữ liệu vector trạng thái để tính reward
initial_state_pool = [
    {
        "quiz_avg_easy_1_2_1": 6.0,
        "quiz_avg_medium_1_2_1": 4.2,
        "quiz_avg_hard_1_2_1": -1,
        "pdf_views_1_2_1": 2,
        "video_views_1_2_1": 3,
        "forum_post_count": 1,
        "assignment_status": 0,
        "cluster": 0
    },
    {
        "quiz_avg_easy_1_2_1": 7.5,
        "quiz_avg_medium_1_2_1": 6.0,
        "quiz_avg_hard_1_2_1": 3.2,
        "pdf_views_1_2_1": 5,
        "video_views_1_2_1": 8,
        "forum_post_count": 2,
        "assignment_status": 1,
        "cluster": 0
    },
    {
        "quiz_avg_easy_1_2_1": 3.0,
        "quiz_avg_medium_1_2_1": 2.5,
        "quiz_avg_hard_1_2_1": -1,
        "pdf_views_1_2_1": 1,
        "video_views_1_2_1": 1,
        "forum_post_count": 0,
        "assignment_status": 0,
        "cluster": 1
    },
    {
        "quiz_avg_easy_1_2_1": 5.5,
        "quiz_avg_medium_1_2_1": 5.0,
        "quiz_avg_hard_1_2_1": -1,
        "pdf_views_1_2_1": 3,
        "video_views_1_2_1": 2,
        "forum_post_count": 1,
        "assignment_status": 0,
        "cluster": 1
    }
]

# -----------------------------
# Dự đoán điểm với model thật
# -----------------------------
def predict_score(state_vector, level, section, models_dict):
    X = []
    X.append(state_vector[f"video_views_{section}_1"])
    X.append(state_vector[f"pdf_views_{section}_1"])

    if level == "medium":
        X.append(state_vector[f"quiz_avg_easy_{section}_1"])
    elif level == "hard":
        X.append(state_vector[f"quiz_avg_easy_{section}_1"])
        X.append(state_vector[f"quiz_avg_medium_{section}_1"])

    X.append(state_vector["forum_post_count"])
    X.append(state_vector["assignment_status"])
    X.append(state_vector["cluster"])
   
    model_key = f"{level}_{section}"
    model = models_dict[model_key]

    X_df = pd.DataFrame([X], columns=model.feature_names_in_)
    return model.predict(X_df)[0]

# -----------------------------
# Xác định level mục tiêu tiếp theo
# -----------------------------
def determine_target_level(state_vector, section):
    easy = state_vector.get(f"quiz_avg_easy_{section}_1", None)
    medium = state_vector.get(f"quiz_avg_medium_{section}_1", None)
    hard = state_vector.get(f"quiz_avg_hard_{section}_1", None)

    if easy is None or easy < 6: return "easy"
    if medium is None or medium < 6: return "medium"
    return "hard"

# -----------------------------
# Giả lập action → sinh ra vector trạng thái mới
# -----------------------------
def simulate_action(state_vector, action, section):
    state_vector = dict(random.choice(initial_state_pool))  # Mô phỏng vector trạng thái mới
    
    parts = action.split(":")
    act_type = parts[0]
    target = parts[1] if len(parts) > 1 else None

    if act_type == "xem_pdf":
        state_vector[f"pdf_views_{section}_1"] += 1
    elif act_type == "xem_video":
        state_vector[f"video_views_{section}_1"] += 1
    elif act_type == "tham_gia_thao_luan":
        state_vector["forum_post_count"] += 1
    elif act_type == "nop_assignment":
        state_vector["assignment_status"] = 1

    return state_vector

# -----------------------------
# Tính reward bằng Linear Regression
# -----------------------------
def compute_reward(state_vector, action, section, models_c0, models_c1):
    cluster = state_vector["cluster"]
    models = models_c0 if cluster == 0 else models_c1

    target_level = determine_target_level(state_vector, section)
    current_score = state_vector.get(f"quiz_avg_{target_level}_{section}_1", 0)

    next_state_vector = simulate_action(state_vector, action, section)
    predicted_score = predict_score(next_state_vector, target_level, section, models)

    return predicted_score - current_score

# -----------------------------
# Q-learning với trạng thái là LO
# -----------------------------
def train_q_table(section, models_c0, models_c1, episodes=1000):
    Q = np.zeros((len(STATES), len(ACTIONS)))  # Q-table: 10 states x 10 actions
    alpha = 0.1  # Learning rate
    gamma = 0.9  # Discount factor

    for ep in range(episodes):
        # Khởi tạo vector trạng thái để tính reward
        state_vector = dict(random.choice(initial_state_pool))
        # Khởi tạo trạng thái LO
        current_state = random.choice(STATES)

        for step in range(10):
            state_idx = STATES.index(current_state)
            
            # Chọn hành động
            action = random.choice(ACTIONS)
            action_idx = ACTIONS.index(action)

            # Tính reward dựa trên vector trạng thái
            reward = compute_reward(state_vector, action, section, models_c0, models_c1)

            # Trạng thái tiếp theo là LO của hành động
            next_state = action.split(":")[1] if ":" in action else action
            next_state_idx = STATES.index(next_state)

            # Cập nhật Q-table
            Q[state_idx, action_idx] += alpha * (
                reward + gamma * np.max(Q[next_state_idx, :]) - Q[state_idx, action_idx]
            )

            # Cập nhật state_vector và current_state
            state_vector = simulate_action(state_vector, action, section)
            current_state = next_state

    print(f"✅ Đã train xong {episodes} lượt Q-learning với mục {section}")
    return Q

# -----------------------------
# Gợi ý hành động tiếp theo
# -----------------------------
def suggest_next_action(current_state, state_vector, Q, section):
    state_idx = STATES.index(current_state)
    actions_q = {ACTIONS[i]: Q[state_idx, i] for i in range(len(ACTIONS))}

    print("🧠 Q-values:")
    for a, q in actions_q.items():
        print(f"  {a:25} → Q = {q:.3f}")

    best_action = max(actions_q, key=actions_q.get)
    return best_action

# -----------------------------
# Dữ liệu kiểm tra
# -----------------------------
test_states = [
    {
        "vector": {
            "quiz_avg_easy_1_2_1": 6.0,
            "quiz_avg_medium_1_2_1": 4.2,
            "quiz_avg_hard_1_2_1": -1,
            "pdf_views_1_2_1": 2,
            "video_views_1_2_1": 3,
            "forum_post_count": 1,
            "assignment_status": 0,
            "cluster": 0
        },
        "lo_state": "PDF_1_2_1"
    },
    {
        "vector": {
            "quiz_avg_easy_1_2_1": 7.5,
            "quiz_avg_medium_1_2_1": 6.0,
            "quiz_avg_hard_1_2_1": 3.2,
            "pdf_views_1_2_1": 5,
            "video_views_1_2_1": 4,
            "forum_post_count": 2,
            "assignment_status": 1,
            "cluster": 0
        },
        "lo_state": "VID_1_2_1"
    },
    {
        "vector": {
            "quiz_avg_easy_1_2_1": 7.5,
            "quiz_avg_medium_1_2_1": 6.0,
            "quiz_avg_hard_1_2_1": 3.2,
            "pdf_views_1_2_1": 5,
            "video_views_1_2_1": 8,
            "forum_post_count": 2,
            "assignment_status": 1,
            "cluster": 0
        },
        "lo_state": "QUIZ_1_2_EASY"
    }
]

# -----------------------------
# Hàm chạy thử nghiệm
# -----------------------------
def run_tests(Q, section, models_c0, models_c1):
    for i, test_state in enumerate(test_states, start=1):
        print(f"\n=== 🧑‍🎓 Học viên {i} ===")
        action = suggest_next_action(test_state["lo_state"], test_state["vector"], Q, section)
        print(f"👉 Gợi ý học tiếp theo: {action}")


In [5]:
import joblib
section = "1_2"

models_c0 = joblib.load("models/models_cluster_0.pkl")
models_c1 = joblib.load("models/models_cluster_1.pkl")
Q = train_q_table(section, models_c0, models_c1)

✅ Đã train xong 1000 lượt Q-learning với mục 1_2


In [6]:

# Ví dụ gọi hàm (giả sử có models_c0, models_c1 đã được huấn luyện)
Q = train_q_table(section="1_2", models_c0=models_c0, models_c1=models_c1)
run_tests(Q, section="1_2", models_c0=models_c0, models_c1=models_c1)

✅ Đã train xong 1000 lượt Q-learning với mục 1_2

=== 🧑‍🎓 Học viên 1 ===
🧠 Q-values:
  xem_pdf:PDF_1_2_1         → Q = 9.229
  xem_pdf:PDF_1_2_2         → Q = 8.883
  xem_video:VID_1_2_1       → Q = 9.333
  xem_video:VID_1_2_2       → Q = 10.211
  xem_video:VID_1_2_3       → Q = 11.003
  lam_quiz:QUIZ_1_2_EASY    → Q = 9.276
  lam_quiz:QUIZ_1_2_MEDIUM  → Q = 10.306
  lam_quiz:QUIZ_1_2_HARD    → Q = 8.510
  tham_gia_thao_luan:       → Q = 9.997
  nop_assignment:           → Q = 9.505
👉 Gợi ý học tiếp theo: xem_video:VID_1_2_3

=== 🧑‍🎓 Học viên 2 ===
🧠 Q-values:
  xem_pdf:PDF_1_2_1         → Q = 10.136
  xem_pdf:PDF_1_2_2         → Q = 9.571
  xem_video:VID_1_2_1       → Q = 10.385
  xem_video:VID_1_2_2       → Q = 9.711
  xem_video:VID_1_2_3       → Q = 9.755
  lam_quiz:QUIZ_1_2_EASY    → Q = 10.168
  lam_quiz:QUIZ_1_2_MEDIUM  → Q = 9.105
  lam_quiz:QUIZ_1_2_HARD    → Q = 7.730
  tham_gia_thao_luan:       → Q = 9.155
  nop_assignment:           → Q = 9.688
👉 Gợi ý học tiếp theo: xem_vid