In [1]:
#Import important libraries
import os
import json
import ast
import re

import torch
import pandas as pd
import numpy as np
import sklearn as sk

from dotenv import load_dotenv
from together import Together
from openai import OpenAI

load_dotenv()
TOKEN = os.getenv("AStarPrivate")
if TOKEN is None:
    raise RuntimeError("Token parsing failed")

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=TOKEN,
)

models = ["openai/gpt-4.1", "meta-llama/llama-4-scout", "deepseek/deepseek-r1", 
          "microsoft/phi-4", "qwen/qwen3-14b", "anthropic/claude-3.7-sonnet"]

dataset = pd.read_excel("./dataset/all-l1-kcs.xlsx", sheet_name = None)

In [2]:
dataset = dataset["All-Level-1-KCs"]
dataset.head(5)

Unnamed: 0,Grade,Grade_ID,Sub_strand,Order,Level_1,Level_1_short
0,Primary 1,P1,WHOLE NUMBERS,1,Numbers up to 100,N100
1,Primary 1,P1,WHOLE NUMBERS,8,Addition and Subtraction,AddSub
2,Primary 1,P1,WHOLE NUMBERS,15,Multiplication and Division,MulDiv
3,Primary 1,P1,MONEY,19,Money,Money
4,Primary 2,P2,WHOLE NUMBERS,1,Numbers up to 1000,N1000


In [3]:
def get_kc_ID(row):
    return (row['Grade_ID'] + row['Level_1_short'])

dataset['kcID'] = dataset.apply(get_kc_ID, axis = 1)
dataset_dct = dataset.set_index('kcID').to_dict(orient = 'index')

In [4]:
print(dataset_dct['P1N100'])

{'Grade': 'Primary 1', 'Grade_ID': 'P1', 'Sub_strand': 'WHOLE NUMBERS', 'Order': 1, 'Level_1': 'Numbers up to 100', 'Level_1_short': 'N100'}


In [5]:
import datetime
model = models[0]
print("Currently working with model: ", model)
kc_index = 0

#Replace / as _ so it wouldn't interfere with path directory
model_path = model.replace("/","_")

all_outputs_dct = {}
all_outputs_text = {}
errors = {}

# Assuming primary_df is defined elsewhere
#for i in range(1, len(dataset.keys())+1):
for kcID in dataset_dct.keys():
    kc_index += 1

    try:
        with open(f"./prompt/question/gpt_system_prompt.txt","r") as file:
            system_prompt = file.read()
    except Exception as e:
        system_prompt = ""
    try:
        with open(f"./prompt/question/gpt_user_prompt.txt","r") as file:
            user_prompt = file.read()
    except Exception as e:
        user_prompt = ""

    prompt = [
        {
            "role": "system",
            "content": system_prompt
        },
        {
            "role" : "user",
            "content" : user_prompt.format(substrand = dataset_dct[kcID]['Sub_strand'], kc = dataset_dct[kcID]['Level_1'], 
                                           grade = dataset_dct[kcID]['Grade_ID'], ID = kcID)
        }
    ]
              
    response = client.chat.completions.create(
            model=model,
            messages=prompt,
            temperature=0.9
        )

    message = response.choices[0].message
    if message is None or message.content is None:
        print(f"No valid message returned for kc: {dataset_dct[kcID]}")
        continue

    response_text = message.content.strip()

    response_text_extracted = None
    match = re.findall(r'\{.*\}', response_text, re.DOTALL)
    if match:
        response_text_extracted = match[0]
    else:
        print("NO JSON FORMAT OUTPUT FOUND! ERROR!")
        print(response_text)

    try:
        response_dct = ast.literal_eval(response_text_extracted)
        all_outputs_dct[kcID] = response_dct
    except Exception as e:
        errors[i] = response_text
        print("Found error at", i, "error: ", e)
        print(response_text)

    all_outputs_text[kcID] = response_text

    print(f"Knowledge component {kcID} ({kc_index}/{len(dataset_dct.keys())}) loaded successfully")

if model is None:
    with open(f'./dataset/default_model_generated_questions.json', 'w') as file:
        json.dump(all_outputs_dct, file, indent=4)
else:
    with open(f'./dataset/{str(datetime.datetime.now())[:-7].replace(":","_").replace(" ","_")}_generated_questions.json', 'w') as file:
        json.dump(all_outputs_dct, file, indent=4)

Currently working with model:  openai/gpt-4.1
Knowledge component P1N100 (1/32) loaded successfully
Knowledge component P1AddSub (2/32) loaded successfully
Knowledge component P1MulDiv (3/32) loaded successfully
Knowledge component P1Money (4/32) loaded successfully
Knowledge component P2N1000 (5/32) loaded successfully
Knowledge component P2FracW (6/32) loaded successfully
Knowledge component P2FracAS (7/32) loaded successfully
Knowledge component P3N10k (8/32) loaded successfully
Knowledge component P3FracEq (9/32) loaded successfully
Knowledge component P4N100k (10/32) loaded successfully
Knowledge component P4FctorM (11/32) loaded successfully
Knowledge component P44Op (12/32) loaded successfully
Knowledge component P4FracMI (13/32) loaded successfully
Knowledge component P4FracSet (14/32) loaded successfully
Knowledge component P4Deci3d (15/32) loaded successfully
Knowledge component P4DeciAS (16/32) loaded successfully
Knowledge component P4DeciMD (17/32) loaded successfully
Know



Knowledge component P5perctg (22/32) loaded successfully
Knowledge component P5rate (23/32) loaded successfully
Knowledge component P6ratio (24/32) loaded successfully
Knowledge component P6algebr (25/32) loaded successfully
Knowledge component O1NumOps (26/32) loaded successfully
Knowledge component O1RioPro (27/32) loaded successfully
Knowledge component O1RatSpd (28/32) loaded successfully
Knowledge component O1AgbrEF (29/32) loaded successfully
Knowledge component O1EqIneq (30/32) loaded successfully
Knowledge component O2Prob (31/32) loaded successfully
Knowledge component O3NumOps (32/32) loaded successfully


OSError: [Errno 22] Invalid argument: './dataset/2025-06-09 14:12:05.032117_generated_questions.json'

In [14]:
with open(f'./dataset/{str(datetime.datetime.now())[:-7].replace(":","_").replace(" ","_")}_generated_questions.json', 'w') as file:
        json.dump(all_outputs_dct, file, indent=4)

In [13]:
str(datetime.datetime.now())[:-7].replace(":","_").replace(" ","_")

'2025-06-09_14_16_03'