In [None]:
import instructor
import llama_cpp

# from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
from pydantic import BaseModel, Field

llama = llama_cpp.Llama(
    model_path="../models/Hermes-2-Pro-Llama-3-8B-Q4.gguf",
    n_gpu_layers=-1,
    n_ctx=2048,
    chat_format="llama-3",
)


create = instructor.patch(
    create=llama.create_chat_completion_openai_v1,
    mode=instructor.Mode.JSON_SCHEMA,
)

In [None]:
class QuestionExtraction(BaseModel):
    chain_of_thought: str = Field(
        ...,
        description="The chain of thought that led to the prediction.",
    )
    user_input_code: str = Field(
        ...,
        description="Code that the user will implement, such as blank function implementations for the user to implement. Use the question prompt to help determine what the user needs to implement.",
    )
    predefined_code: str = Field(
        ...,
        description="Code that is predefined by the system, such as imports, function definitions, struct definitions, etc.",
    )


def extract_question_data(data: str) -> QuestionExtraction:
    extraction: QuestionExtraction = create(
        messages=[
            {
                "role": "system",
                "content": (
                    "You are an expert at extracting information from exams. "
                    "You will be given a question from a Computer Science exam, "
                    "and you will need to extract metadata about the question. "
                    "Focus on identifying the user_input_code and predefined_code. "
                    "Do not solve the question. "
                    "For example, if the question provides a function prototype, "
                    "extract it as user_input_code. If there are any predefined "
                    "function implementations or imports, extract them as predefined_code."
                ),
            },
            {
                "role": "user",
                "content": (
                    f"Extract user_input_code and predefined_code from the following text: "
                    f"<text>{data}</text>"
                ),
            },
        ],
        response_model=QuestionExtraction,
    )
    return extraction

In [None]:
from typing import List

from parser.model import Section
from parser.parse import main

sections: List[Section] = main("../fe_files/exams/FE-Aug23.pdf")

input_question = sections[1].questions[1].text

print(input_question)

extraction = extract_question_data(input_question)

print(extraction.model_dump_json(indent=2))