In [1]:
import openai
import base64
from dotenv import load_dotenv
import os
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

client = openai.OpenAI()
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def stage_1_ocr(image_path):
    client = openai.OpenAI(api_key=api_key)
    base64_image = encode_image(image_path)
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Transcribe the student's work in this .jpg. If you see math, use LaTeX. Output JSON format with key 'transcription'."},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                            "detail": "high"  # Critical for handwriting accuracy
                        }
                    }
                ],
            }
        ],
        response_format={"type": "json_object"}
    )
    return response.choices[0].message.content

def extract_student_work(image_url):
    client = openai.OpenAI(api_key=api_key)
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "OCR this student paper. If it is math, use LaTeX for symbols. Output ONLY valid JSON."},
                    {"type": "image_url", "image_url": {"url": image_url}}
                ],
            }
        ],
        response_format={"type": "json_object"}
    )
    return response.choices[0].message.content

import os
import glob
from pathlib import Path
subjects = os.listdir('SMHD-forms/')
# img_url = 'file-2oS63JiKuqKpPWf5uShL3V'
# extract_student_work(img_url)
img_url = glob.glob(f"SMHD-forms/{subjects[0]}/{os.listdir(f'SMHD-forms/{subjects[0]}/')[0]}")[0]
stage_1_ocr(img_url)
# Sample Output for the Mitochondria prompt:
# {
#   "transcription": "The mitochondria is the power store of the cell because it makes energy",
#   "legibility_score": 0.95,
#   "detected_format": "text"
# }

'{"transcription": "viruses is most abondent entities. They are non entities having genitic material encapsuleted in a protein covering (protein coat). they are termed (posion substances) cating word (.venous). that bacteria can not pass through chamberland filters but agent causing Rovies do. True natur of virus TMV compound microscope silver crystals. protein in nature nucleotides. nucleic Acid in samallar amount of DNA/RNA. electro electro microscope => disease causing agent."}'

In [None]:
def stage_2_3_grade(transcription, rubric_type="science"):
    client = openai.OpenAI()
    
    # Define rubrics for your two samples
    rubrics = {
        "science": "1pt: Powerhouse concept. 1pt: ATP mention. Deduct 0.5pt: 'Makes energy' (incorrect phrasing).",
        "math": "1pt: Isolate x term. 1pt: Final answer. Deduct 1pt: Arithmetic error."
    }

    prompt = f"""
    You are a strict academic grader. 
    Rubric: {rubrics[rubric_type]}
    Student Work: {transcription}

    Follow this logic:
    1. internal_monologue: Break down the student's logic step-by-step.
    2. Identify specific strengths and weaknesses.
    3. Calculate the final score.
    
    Output JSON: {{ "internal_monologue": "", "score": 0.0, "strengths": [], "weaknesses": [], "feedback": "" }}
    """

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
        response_format={"type": "json_object"}
    )
    return response.choices[0].message.content

stage_2_3_grade()

In [None]:
from pydantic import BaseModel, Field, validator
from typing import List

class EvaluationSchema(BaseModel):
    score: float = Field(description="Score from 0 to 10")
    strengths: List[str]
    weaknesses: List[str]
    feedback_to_student: str
    
    @validator('score')
    def scale_check(cls, v):
        if v < 0 or v > 10:
            raise ValueError("Score must be between 0 and 10")
        return v

# Example usage:
# raw_json = grade_answer(extracted_text, science_rubric)
# validated_data = EvaluationSchema.model_validate_json(raw_json)
# print(validated_data.score)