In [1]:
import os
import base64
from pydantic import BaseModel
from openai import AzureOpenAI
from api_utils import *
import jiter


In [3]:
topic_descriptions = {
    "Household Finance": "Household finance such as income, utility bills, money, interest, savings, instalment, mortgage, financial planning etc.",
    "Recreation": "Recreation such as sports, games, exercises, music, movie, dancing, painting, fishing and other recreation activities",
    "Services": "Services such as installation, maintenance, repairing, cleaning, laundry, hotel, retail, e-commerce, streaming services, digital services etc."}
batch_path_dict = {"P": r"Data/Final_kc_pairs/primary_kc_pairs_final.json",
                    "P3": r"Data/Final_kc_pairs/P3_kc_pairs_final.json",
                    "P4": r"Data/Final_kc_pairs/P4_kc_pairs_final.json",
                    "P5": r"Data/Final_kc_pairs/P5_kc_pairs_final.json",
                    "P6": r"Data/Final_kc_pairs/P6_kc_pairs_final.json",
                    "O": r"Data/Final_kc_pairs/secondary_kc_pairs_final.json",
                    "O1": r"Data/Final_kc_pairs/O1_kc_pairs_final.json",
                    "O2": r"Data/Final_kc_pairs/O2_kc_pairs_final.json",
                    "O3": r"Data/Final_kc_pairs/O3_kc_pairs_final.json", 
                  "All": r"Data/Final_kc_pairs/all_kc_pairs_final.json"}

In [15]:

    
user_prompt_path = r"Prompts\Claude\claude_generation_user_prompt_v3.txt"
with open(user_prompt_path, "r", encoding="utf-8") as file:
    user_prompt_template = file.read()


In [16]:
aws_client = get_aws_client()

In [24]:
import json
import re

def metadata(response_dict=None, **kwargs):
    kwargs['response_dict'] = response_dict
    return kwargs

def claude_question_generator(kc1, kc2, topic, grade, qid, max_attempts=10):
    user_prompt = user_prompt_template.format(kc1=kc1, kc2=kc2, topic=topic, grade=grade)

    for attempt in range(max_attempts):
        response_dict = get_claude_response(aws_client, claude4_model_id, user_prompt=user_prompt)

        try:
            # Step 1: Extract actual JSON string (e.g., inside ```json ... ```)
            raw = response_dict.get("response", "")
            match = re.search(r"```json\s*(\{.*?\})\s*```", raw, re.DOTALL)
            json_str = match.group(1) if match else raw  # fallback to raw if not markdown-wrapped

            # Step 2: Parse JSON string into dict
            parsed = json.loads(json_str)

            # Step 3: Flatten into response_dict
            response_dict = {
                "word_problem": parsed.get("word_problem"),
                "solution": parsed.get("solution"),
                "status": response_dict.get("status"),
                "response_time": response_dict.get("response_time")
            }
            break  # success

        except Exception as e:
            print(f"Attempt {attempt+1} failed to parse: {e}")
            response_dict = None

    else:
        # All retries failed
        response_dict = {
            "word_problem": None,
            "solution": None,
            "status": -1,
            "response_time": None
        }

    return metadata(
        response_dict=response_dict,
        Primary_kc=kc1,
        Secondary_kc=kc2,
        Topic=topic,
        Grade=grade,
        QID=qid
    )


In [25]:
# Testing the function
response = claude_question_generator("FRACTIONS | Addition | adding mixed numbers", "FRACTIONS | Simplifying | expressing a fraction in its simplest form", "Recreation such as sports, games, exercises, music, movie, dancing, painting, fishing and other recreation activities", "Primary 5", "P5-FrAddMix_P3-FrSmp")
response

{'Primary_kc': 'FRACTIONS | Addition | adding mixed numbers',
 'Secondary_kc': 'FRACTIONS | Simplifying | expressing a fraction in its simplest form',
 'Topic': 'Recreation such as sports, games, exercises, music, movie, dancing, painting, fishing and other recreation activities',
 'Grade': 'Primary 5',
 'QID': 'P5-FrAddMix_P3-FrSmp',
 'response_dict': {'word_problem': 'Sarah is training for a swimming competition. On Monday, she swam $2\\frac{3}{8}$ km in the morning and $1\\frac{5}{8}$ km in the afternoon. On Tuesday, she swam $3\\frac{1}{4}$ km in the morning and $2\\frac{3}{4}$ km in the afternoon. What is the total distance Sarah swam over the two days? Express your answer in its simplest form.',
  'solution': "First, I need to find the total distance Sarah swam on Monday.\n\nMonday's total: $2\\frac{3}{8} + 1\\frac{5}{8}$\n\nSince the fractional parts have the same denominator:\n$2\\frac{3}{8} + 1\\frac{5}{8} = (2 + 1) + (\\frac{3}{8} + \\frac{5}{8}) = 3 + \\frac{8}{8} = 3 + 1 = 

In [26]:
def one_topic_claude_question_generator(input_path, output_path, topic):
    with open(input_path, "r", encoding="utf-8") as f:
        input_data = json.load(f)
    if os.path.exists(output_path):
        with open(output_path, "r", encoding="utf-8") as f:
            all_responses = json.load(f)
    else:
        all_responses = {}
    for qid, data_dict in input_data.items():
        kc1 = data_dict.get("primary_kc_name", "")
        kc2 = data_dict.get("secondary_kc_name", "")
        grade = data_dict.get("primary_kc_grade", "")
        updated_qid = qid + "_sonnet4_"+ topic
        updated_qid = get_next_question_id(updated_qid)
        response = claude_question_generator(kc1 = kc1, kc2 = kc2, topic = topic_descriptions[topic], grade = grade, qid = updated_qid)
        if response:  
            all_responses[updated_qid] = response
            with open(output_path, "w", encoding="utf-8") as f:
                json.dump(all_responses, f, indent=2)

In [27]:
one_topic_claude_question_generator(batch_path_dict['All'], r"Data/Generated_questions/CLAUDE/all_recreation_v3.json", "Recreation")

In [None]:
import json
with open(r"Data\Generated_questions\GPT4.1\All\assigned_KCs.json", 'r', encoding = "utf_8") as f:
    assigned_KCs = json.load(f)
# Split 176 MWPs on another topics according to the assigned KC pairs.
with open(r"Data\Generated_questions\CLAUDE\all_recreation_v3.json", "r", encoding="utf-8") as f:
    MWPs = json.load(f)
# Initialize split dictionary
recreation_v8_v1_split = {"TD": dict(), "Minh": dict(), "Sarah": dict()}

# Assign based on safe_qid matching assigned_KCs
for qid, data in MWPs.items():
    safe_qid = "_".join(qid.split("_")[:2])
    if safe_qid in assigned_KCs["TD"]:
        recreation_v8_v1_split["TD"][qid] = data
    elif safe_qid in assigned_KCs["Minh"]:
        recreation_v8_v1_split["Minh"][qid] = data
    elif safe_qid in assigned_KCs["Sarah"]:
        recreation_v8_v1_split["Sarah"][qid] = data


with open(r"Data\Generated_questions\CLAUDE\recreation_split_v3.json", "w", encoding="utf-8") as f:
    json.dump(recreation_v8_v1_split, f, indent=2, ensure_ascii=False)

In [None]:
import json
from pathlib import Path
def convert_json_into_tex(input_path, output_file, title):
    if isinstance(input_path, (str, Path)) and Path(input_path).exists():
        with open(input_path, "r", encoding="utf-8") as f:
            input_data = json.load(f)
    else:
        input_data = input_path  # assume it's already a dictionary
    
    with open(output_file, "w", encoding="utf-8") as f:
        f.write("\\documentclass{article}\n")
        f.write("\\usepackage[utf8]{inputenc}\n")
        f.write("\\usepackage{amsmath}\n")
        f.write("\\usepackage{amsfonts}\n") 
        f.write("\\usepackage{amssymb}\n")
        f.write("\\usepackage{graphicx}\n")
        f.write("\\usepackage{hyperref}\n")
        f.write(f"\\title{ {title} }\n")
        f.write("\\author{Tien Dung Doan}\n")
        f.write("\\begin{document}\n")
        f.write("\\maketitle\n")
        i = 1
        for qid, data in input_data.items():
            word_problem = data.get("response_dict", {}).get("response", "")
            kc1 = data.get("Primary_kc", "")
            kc2 = data.get("Secondary_kc", "")
            topic = data.get("Topic", "")
            grade = data.get("Grade", "")
            
            safe_qid = qid.replace("_", "\\_")
            f.write(f"\\section*{{Question {i}}}\n")
            i += 1

            # Metadata
            f.write("\\textbf{Metadata}\n\n")
            f.write(f"\\begin{{itemize}}\n")
            f.write(f"  \\item Question ID: {safe_qid}\n")
            f.write(f"  \\item Primary KC: {kc1}\n")
            f.write(f"  \\item Secondary KC: {kc2}\n")
            f.write(f"  \\item Topic: {topic}\n")
            f.write(f"  \\item Grade: {grade}\n")
            f.write(f"\\end{{itemize}}\n\n")

            # Question
            f.write("\\textbf{Question}\n\n")
            f.write(f"{word_problem}\n\n")

        f.write("\\end{document}\n")

def convert_json_into_tex_question(input_path, output_path, title):
    if isinstance(input_path, (str, Path)) and Path(input_path).exists():
        with open(input_path, "r", encoding="utf-8") as f:
            input_data = json.load(f)
    else:
        input_data = input_path  # assume it's already a dictionary
    
    with open(output_path, "w", encoding="utf-8") as f:
        f.write("\\documentclass{article}\n")
        f.write("\\usepackage[utf8]{inputenc}\n")
        f.write("\\usepackage{amsmath}\n")
        f.write("\\usepackage{amsfonts}\n")
        f.write("\\usepackage{amssymb}\n")
        f.write("\\usepackage{graphicx}\n")
        f.write("\\usepackage{hyperref}\n")
        f.write(f"\\title{ {title} }\n")
        f.write("\\author{Tien Dung Doan}\n")
        f.write("\\begin{document}\n")
        f.write("\\maketitle\n")
        i = 1
        for qid, data in input_data.items():
            word_problem = data.get("response_dict", {}).get("word_problem", "")
            kc1 = data.get("Primary_kc", "")
            kc2 = data.get("Secondary_kc", "")
            topic = data.get("Topic", "")
            grade = data.get("Grade", "")
            
            safe_qid = qid.replace("_", "\\_")
            f.write(f"\\section*{{Question {i}}}\n")
            i += 1

            # Metadata
            f.write("\\textbf{Metadata}\n\n")
            f.write(f"\\begin{{itemize}}\n")
            f.write(f"  \\item Question ID: {safe_qid}\n")
            f.write(f"  \\item Primary KC: {kc1}\n")
            f.write(f"  \\item Secondary KC: {kc2}\n")
            f.write(f"  \\item Topic: {topic}\n")
            f.write(f"  \\item Grade: {grade}\n")
            f.write(f"\\end{{itemize}}\n\n")

            # Question
            f.write("\\textbf{Question}\n\n")
            f.write(f"{word_problem}\n\n")

        f.write("\\end{document}\n")

def convert_json_into_tex_solution(input_path, output_path, title):
    if isinstance(input_path, (str, Path)) and Path(input_path).exists():
        with open(input_path, "r", encoding="utf-8") as f:
            input_data = json.load(f)
    else:
        input_data = input_path  # assume it's already a dictionary
    
    with open(output_path, "w", encoding="utf-8") as f:
        f.write("\\documentclass{article}\n")
        f.write("\\usepackage[utf8]{inputenc}\n")
        f.write("\\usepackage{amsmath}\n")
        f.write("\\usepackage{amsfonts}\n")
        f.write("\\usepackage{amssymb}\n")
        f.write("\\usepackage{graphicx}\n")
        f.write("\\usepackage{hyperref}\n")
        f.write(f"\\title{ {title} }\n")
        f.write("\\author{Tien Dung Doan}\n")
        f.write("\\begin{document}\n")
        f.write("\\maketitle\n")
        i = 1
        for qid, data in input_data.items():
            word_problem = data.get("response_dict", {}).get("word_problem", "")
            solution = data.get("response_dict", {}).get("solution", "")
            kc1 = data.get("Primary_kc", "")
            kc2 = data.get("Secondary_kc", "")
            topic = data.get("Topic", "")
            grade = data.get("Grade", "")
            
            safe_qid = qid.replace("_", "\\_")
            f.write(f"\\section*{{Question {i}}}\n")
            i += 1

            # Metadata
            f.write("\\textbf{Metadata}\n\n")
            f.write(f"\\begin{{itemize}}\n")
            f.write(f"  \\item Question ID: {safe_qid}\n")
            f.write(f"  \\item Primary KC: {kc1}\n")
            f.write(f"  \\item Secondary KC: {kc2}\n")
            f.write(f"  \\item Topic: {topic}\n")
            f.write(f"  \\item Grade: {grade}\n")
            f.write(f"\\end{{itemize}}\n\n")

            # Solution
            f.write("\\textbf{Solution}\n\n")
            f.write(f"{solution}\n\n")

        f.write("\\end{document}\n")

        
           

In [None]:
input_path = r"Data\Generated_questions\CLAUDE\recreation_split_v3.json"
with open(input_path, "r", encoding="utf-8") as f:
    input_data = json.load(f)
for name, data in input_data.items():
    output_question_path = fr"Data\Generated_questions\CLAUDE\{name}_question_recreation_v3.tex"
    output_solution_path = fr"Data\Generated_questions\CLAUDE\{name}_solution_recreation_v3.tex"
    title_solution = f"{name} Solutions recreation v3 CLAUDE "
    title_question = f"{name} Questions recreation v3 CLAUDE "

    convert_json_into_tex_question(data, output_question_path, title_question)
    convert_json_into_tex_solution(data, output_solution_path, title_solution)