## Question Recommendation based on quiz 1 performance

In [13]:
import pickle
import pandas as pd
import json
from sklearn.preprocessing import LabelEncoder

In [14]:
def recommend_questions(user_id, quiz_1_features, question_bank, trained_model, num_questions=40):

    quiz_1_df = pd.DataFrame(quiz_1_features)

    user_data = quiz_1_df[quiz_1_df["user_id"] == user_id]

    # Identify weak areas: categories with low performance
    category_performance = user_data.groupby("category")["is_correct"].mean()
    weak_categories = category_performance[category_performance < 0.5].index.tolist()

    question_df = pd.DataFrame(question_bank)

    question_df["overall_accuracy"] = user_data["overall_accuracy"].iloc[0]
    question_df["category_performance"] = question_df["category"].map(
        lambda cat: category_performance.get(cat, 0)
    )

    label_encoder = LabelEncoder()
    if "category" in question_df.columns:
        question_df["category_encoded"] = label_encoder.fit_transform(question_df["category"])

    numeric_columns = ["difficulty", "importance_weight", "overall_accuracy", "category_performance"]
    for col in numeric_columns:
        question_df[col] = pd.to_numeric(question_df[col], errors="coerce")

    features = question_df[numeric_columns]

    # Predict performance on all questions
    predictions = trained_model.predict(features)
    question_df["predicted_performance"] = predictions

    # Filter questions from weak categories
    weak_questions = question_df[question_df["category"].isin(weak_categories)]

    # Rank questions by predicted performance (ascending) and select top 40
    recommended_questions = weak_questions.sort_values(
        by=["predicted_performance", "importance_weight"], ascending=[True, False]
    ).head(num_questions)

    # Return recommended questions
    output_columns = ["question_id", "category"]
    if "question" in question_df.columns:
        output_columns.append("question")

    return recommended_questions[output_columns].to_dict(orient="records")


In [15]:
with open("/content/quiz_1_features.json", "r") as file:
    quiz_1_features = json.load(file)

with open("/content/questions.json", "r") as file:
    question_bank = json.load(file)

with open("/content/answer_prediction_model.pkl", "rb") as file:
    trained_model = pickle.load(file)

quiz_1_features = pd.DataFrame(quiz_1_features)
question_bank = pd.DataFrame(question_bank)

user_id = 'U002'

recommended_questions = recommend_questions(user_id, quiz_1_features=quiz_1_features, question_bank=question_bank, trained_model=trained_model)

i = 1

print(f"Recommended Questions for Quiz 2 for user {user_id}:")

for q in recommended_questions:
    print(f"{i}- QID: {q['question_id']}, Category: {q['category']}, Question: {q.get('question', 'N/A')}")
    i+=1

Recommended Questions for Quiz 2 for user U002:
1- QID: Q013, Category: Road Signs and Markings, Question: What does a no overtaking sign mean?
2- QID: Q015, Category: Road Signs and Markings, Question: What does a lane merging sign signify?
3- QID: Q017, Category: Traffic Rules and Regulations, Question: Who has the right of way at an uncontrolled intersection?
4- QID: Q024, Category: Traffic Rules and Regulations, Question: What should you do at a zebra crossing?
5- QID: Q029, Category: Traffic Rules and Regulations, Question: What is the correct procedure to make a U-turn?
6- QID: Q059, Category: Road Safety and Accident Prevention, Question: What is the primary reason for wearing a seatbelt?
7- QID: Q060, Category: Road Safety and Accident Prevention, Question: What should you do if you are driving in a heavy rainstorm?
8- QID: Q062, Category: Road Safety and Accident Prevention, Question: When should you use your car's horn?
9- QID: Q063, Category: Road Safety and Accident Prevent