### Imports

In [None]:
import os
import time
import json

from openai import OpenAI
from pydantic import BaseModel, RootModel

from concurrent.futures import ThreadPoolExecutor, as_completed
import random
import time

from dotenv import load_dotenv
load_dotenv(override = True)

from typing import Optional

## Main Code

In [104]:
class InferenceSummary(BaseModel):
    input_tokens: int
    output_tokens: int
    total_tokens: int
    estimated_cost: float


class Inference:
    def __init__(
        self, model: str, max_tokens: int, base_url: str, temperature: float = 0.4, stream: bool = True
    ):
        self.openai = OpenAI(api_key=os.getenv("DEEPINFRA_API_KEY"), base_url=base_url)

        self.model = model
        self.max_tokens = max_tokens

        self.temperature = temperature
        self.stream = stream

    def execute(
        self,
        system_prompt: str,
        user_prompt: str,
        temperature: float = None,
        stream: bool = None,
        max_tokens: int = None,
        model: str = None,
        verbose: int = 1,
    ) -> tuple[str, InferenceSummary]:
        temperature = temperature or self.temperature
        stream = stream or self.stream
        max_tokens = max_tokens or self.max_tokens
        model = model or self.model

        chunks = self.openai.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
            temperature=temperature,
            stream=stream,
            max_tokens=max_tokens,
        )
        
        output = ""
        usage = None
        if stream:
            for chunk in chunks:
                delta = chunk.choices[0].delta

                if delta and delta.content:
                    if verbose > 0:
                        print(delta.content, end="", flush=True)

                    output += delta.content

                if chunk.choices[0].finish_reason == "stop":
                    usage = chunk.usage
        else:
            output = chunks.choices[0].message.content
            usage = chunks.usage

        summary = InferenceSummary(
            input_tokens=usage.prompt_tokens,
            output_tokens=usage.completion_tokens,
            total_tokens=usage.total_tokens,
            estimated_cost=usage.estimated_cost,
        )
        return output, summary

In [105]:
class Question(BaseModel):
    id: str
    edicao: int
    numero: int
    enunciado: str
    alternativas: list[str]
    area_conhecimento: str
    area: str
    subarea: str
    dificuldade: str
    gabarito: str
    solucao: str
    dificuldade_experimental: Optional[str] = None


class Response(BaseModel):
    id: str
    statement: str
    alternatives: list[str]
    area_knowledge: str
    area: str
    subarea: str
    answer: str
    reasoning: str


def load_questions(input_path: str) -> list[Question]:

    with open(input_path, "r", encoding="utf-8") as f:
        raw_questions: str = json.load(f)

        questions: list[Question] = [
            Question.model_validate_json(json.dumps(raw_question))
            for raw_question in raw_questions
        ]

    return questions

In [None]:
def process_questions(
    input_path: str,
    output_path: str,
    model: str,
    base_url: str,
    system_prompt: str,
    max_tokens: int,
    stream: bool,
    verbose: int = 1,
):
    questions = load_questions(input_path=input_path)

    inference_model = Inference(
        model=model, 
        max_tokens=max_tokens, 
        stream=stream, 
        base_url=base_url
    )

    total_execution_time = 0
    total_input_tokens = 0
    total_output_tokens = 0
    total_tokens = 0
    total_estimated_cost = 0

    responses: list[Response] = []

    def process_single_question(question: Question) -> tuple[Response, InferenceSummary, float]:
        user_prompt = (
            f"Questão:\n{question.enunciado}\nAlternativas:\n"
            + "\n".join(question.alternativas)
            + f"\nGabarito: {question.gabarito}"
        )

        response = Response(
            id=question.id,
            statement=question.enunciado,
            alternatives=question.alternativas,
            area_knowledge=question.area_conhecimento,
            area=question.area,
            subarea=question.subarea,
            answer=question.gabarito,
            reasoning="",
        )

        max_retries = 5
        retry_delay = 1
        attempt = 0

        while attempt < max_retries:
            start_time = time.time()
            try:
                output, summary = inference_model.execute(
                    system_prompt=system_prompt, user_prompt=user_prompt
                )
                response.reasoning = output
                elapsed = time.time() - start_time
                
                return response, summary, elapsed
            
            except Exception as e:
                attempt += 1
                wait = retry_delay * (2 ** attempt) + random.uniform(0, 1)
                if attempt == max_retries:
                    summary = InferenceSummary(
                        input_tokens=0,
                        output_tokens=0,
                        total_tokens=0,
                        estimated_cost=0.0,
                    )
                    response.reasoning = f"[Error after  {max_retries} retries] {str(e)}"
                    elapsed = time.time() - start_time

                    return response, summary, elapsed
                
                else:
                    print(f"[Attempt {attempt}/{max_retries}] Error: {e}. Trying again in {wait:.2f} secs...")
                    time.sleep(wait)


    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [executor.submit(process_single_question, q) for q in questions]

        for future in as_completed(futures):
            response, summary, elapsed = future.result()
            responses.append(response)

            total_execution_time += elapsed
            total_input_tokens += summary.input_tokens
            total_output_tokens += summary.output_tokens
            total_tokens += summary.total_tokens
            total_estimated_cost += summary.estimated_cost

            if verbose == 2:
                print(f"\n\n\n === SUMMARY ===", flush=True)
                print(f"Execution time: {elapsed:.2f}s", flush=True)
                print(f"Input tokens: {summary.input_tokens}", flush=True)
                print(f"Output tokens: {summary.output_tokens}", flush=True)
                print(f"Total tokens: {summary.total_tokens}", flush=True)
                print(f"Estimated cost: ${summary.estimated_cost:.6f}", flush=True)

    if verbose > 0:
        print("=" * 30)
        print(f"Total execution time: {total_execution_time:.2f}s")
        print(f"Total input tokens: {total_input_tokens}")
        print(f"Total output tokens: {total_output_tokens}")
        print(f"Total tokens: {total_tokens}")
        print(f"Total estimated cost: ${total_estimated_cost:.6f}")

    Responses = RootModel[list[Response]]
    json_responses = Responses(responses).model_dump_json(indent=2)

    with open(output_path, "w", encoding="utf-8") as f:
        f.write(json_responses)

# Single Teacher Model

### Constants

In [107]:
INPUT_PATH = "data/sample.json"
OUTPUT_PATH = "data/output.json"

BASE_URL = "https://api.deepinfra.com/v1/openai"

TEACHER_MODEL = "deepseek-ai/DeepSeek-R1-Turbo"
MAX_TOKENS = 10000

SYSTEM_PROMPT = (
      "Você é um professor experiente em ensino de Ciência da Computação, com foco em ajudar estudantes a entender as questões do POSCOMP.\n\n"

      "Objetivo:\n"
      "Analisar e resolver cada questão passo a passo, explicando detalhadamente cada decisão tomada no processo.\n\n"

      "Princípios orientadores:\n"
      "- Simule seu “processo de pensamento”, explicando cada etapa e raciocínio.\n"
      "- Explique todos os conceitos e teorias aplicadas.\n"
      "- Em questões de programação, destaque a lógica por trás do algoritmo.\n"
      "- Em questões matemáticas, mostre os cálculos claramente, passo a passo.\n"
      "- Evite repetir o texto da questão ou suas alternativas.\n"
      "- Reforce a explicação com pseudocódigo, fórmulas ou diagramas, se necessário.\n"
      "- Use uma linguagem simples e clara, como em uma tutoria particular para alunos brasileiros.\n"
      "- Finalize com: RESPOSTA FINAL: (letra).\n\n"
      "- **NUNCA** use outro idioma que não seja o português em suas respostas.\n"

      "Objetivo: Garantir que o aluno compreenda o conteúdo, e não apenas memorize respostas."
)

In [None]:
process_questions(
    input_path=INPUT_PATH,
    output_path=OUTPUT_PATH,
    model=TEACHER_MODEL,
    base_url=BASE_URL,
    system_prompt=SYSTEM_PROMPT,
    max_tokens=MAX_TOKENS,
    stream=False,
    verbose=2,
)

# Multiple Teacher Models

### Constants

In [None]:
INPUT_PATH = "data/sample.json"
SYSTEM_PROMPTS_PATH = "data/prompts.json"
OUTPUT_DIR = "data/results"

BASE_URL = "https://api.deepinfra.com/v1/openai"

TEACHER_MODELS = [
    "deepseek-ai/DeepSeek-R1",
    "deepseek-ai/DeepSeek-R1-Turbo",
    "microsoft/phi-4",
    "Qwen/QwQ-32B",
    "Qwen/Qwen2.5-72B-Instruct",
    "deepseek-ai/DeepSeek-Prover-V2-671B",
    "deepseek-ai/DeepSeek-V3-0324",
    "Qwen/Qwen3-235B-A22B",
]
MAX_TOKENS = 10000

In [None]:
with open(SYSTEM_PROMPTS_PATH, "r", encoding="utf-8") as f:
    system_prompts = json.load(f)
    
for model in TEACHER_MODELS:
    print(f"\n\n\n{'#'*30}")
    print(f"Model: {model}")
    
    process_questions(
        input_path=INPUT_PATH,
        output_path=f"{OUTPUT_DIR}/output-{model.split('/')[1]}.json",
        model=model,
        base_url=BASE_URL,
        system_prompt=system_prompts[model]["prompt"],
        max_tokens=MAX_TOKENS,
        stream=False,
    )