In [None]:
import os
import glob
from pathlib import Path
# !pip install pdf2image
from pdf2image import convert_from_path

# 1. Setup the file path
pdf_path = 'HandwrittenGrading-ReferenceSolution.pdf'

# 2. Convert PDF to a list of PIL Image objects
# If you're on Windows, add: poppler_path=r'C:\path\to\poppler-xx\Library\bin'
images = convert_from_path(pdf_path)

# 3. Loop through images and save them
for i, image in enumerate(images):
    filename = f'page_{i + 1}.jpg'
    image.save(filename, 'JPEG')
    print(f'Saved: {filename}')

In [8]:
import openai
import base64
from dotenv import load_dotenv
import os
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")


def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def stage_1_ocr(image_path):
    client = openai.OpenAI(api_key=api_key)
    base64_image = encode_image(image_path)
    prompt = f""""In this .jpg:
      
      1) Translate the question in English. 
      2) Transcribe student Answer. 
      3) If you see math, use LaTeX. 
      
      Output JSON = {{'Translated Question:"', Student Answer:""}}
      """
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                            "detail": "high"  # Critical for handwriting accuracy
                        }
                    }
                ],
            }
        ],
        response_format={"type": "json_object"}
    )
    return response.choices[0].message.content

def extract_student_work(image_url):
    client = openai.OpenAI(api_key=api_key)
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "OCR this student paper. If it is math, use LaTeX for symbols. Output ONLY valid JSON."},
                    {"type": "image_url", "image_url": {"url": image_url}}
                ],
            }
        ], 
        response_format={"type": "json_object"}
    )
    return response.choices[0].message.content


for img in glob.glob("real exam/*.jpg"):
    print(stage_1_ocr(img))



{"Translated Question": "1. (25%) In the use of voltage dividers, we encountered a compromise: for certain reasons, we want to build the divider with resistors of the smallest possible resistance, but on the other hand, we want the resistances to be as large as possible. Explain the contradiction and the reasons for it.", "Student Answer": "1. THEVENIN'S RESISTANCE OF THE DIVIDER IS \\( R_1 || R_2 \\), WHERE \\( R_1 \\) AND \\( R_2 \\) ARE THE RESISTORS THAT MAKE IT UP. WE WANT THE THEVENIN RESISTANCE TO BE AS SMALL AS POSSIBLE TO REDUCE THE LOAD VOLTAGE DROPS, => THE SMALLER THE RESISTOR. THE DIVIDER LOADS THE CURRENT SOURCE \\( V_1 \\), WHICH IS EQUAL TO \\( V_1 / (R_1 + R_2) \\). THE SMALLER THE RESISTORS, MORE CURRENT IS USED UP, AND GREATER LOSSES ARE CREATED => THE GREATER THE RESISTOR."}
{
  "Translated Question": "3. (50%) Two batteries with Thevenin voltages Ut1 and Ut2 and Thevenin internal resistances Rt1 and Rt2 are connected in parallel and connected to load Rb.\n   a. Dra

In [None]:
def stage_2_3_grade(transcription, rubric_type="science"):
    client = openai.OpenAI()
    
    # Define rubrics for your two samples
    rubrics = {
        "science": "1pt: Powerhouse concept. 1pt: ATP mention. Deduct 0.5pt: 'Makes energy' (incorrect phrasing).",
        "math": "1pt: Isolate x term. 1pt: Final answer. Deduct 1pt: Arithmetic error."
    }

    prompt = f"""
    You are a strict academic grader. 
    Rubric: {rubrics[rubric_type]}
    Student Work: {transcription}

    Follow this logic:
    1. internal_monologue: Break down the student's logic step-by-step.
    2. Identify specific strengths and weaknesses.
    3. Calculate the final score.
    
    Output JSON: {{ "internal_monologue": "", "score": 0.0, "strengths": [], "weaknesses": [], "feedback": "" }}
    """

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
        response_format={"type": "json_object"}
    )
    return response.choices[0].message.content

# stage_2_3_grade()

In [None]:
from pydantic import BaseModel
from typing import List

class RubricCriteria(BaseModel):
    name: str  # e.g., "Scientific Accuracy" or "Algebraic Logic"
    weight: float
    description: str

class MarkingTask(BaseModel):
    question: str
    model_answer: str
    student_answer: str
    criteria: List[RubricCriteria] # This makes it "General"

def generate_dynamic_grade(task: MarkingTask):
    # This prompt works for ANY subject
    system_prompt = f"""
    You are a professional examiner. 
    Grade the student's answer based on the following criteria:
    {[(c.name, c.description) for c in task.criteria]}
    
    Reference Model Answer: {task.model_answer}
    Student Work: {task.student_answer}
    """
    client = openai(api_key=api_key)
    client.chat.completions.create(
        model='gpt-4o',
        messages = 
    )
    # Send to OpenAI...

# Example usage:
# raw_json = grade_answer(extracted_text, science_rubric)
# validated_data = EvaluationSchema.model_validate_json(raw_json)
# print(validated_data.score)