In [1]:
import os
import re
import json
import base64
import dotenv
from google import genai

dotenv.load_dotenv()

def filepath_to_base64(filepath):
    with open(filepath, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")


def get_imagepaths(folder, pattern):
    images = []
    for root, _, files in os.walk(folder):
        for file in files:
            if re.match(pattern, file):
                images.append(os.path.join(root, file))
    # sort by integers in the filename
    images.sort(key=natural_sort_key)
    return images


def natural_sort_key(s):
    return [
        int(text) if text.isdigit() else text.lower() for text in re.split(r"(\d+)", s)
    ]

def parse_json(json_output):
    # Parsing out the markdown fencing
    lines = json_output.splitlines()
    for i, line in enumerate(lines):
        if line == "```json":
            json_output = "\n".join(lines[i+1:])  # Remove everything before "```json"
            json_output = json_output.split("```")[0]  # Remove everything after the closing "```"
            break  # Exit the loop once "```json" is found
    return json_output

In [22]:

client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"), http_options={'api_version':'v1alpha'})
# model_id = "gemini-2.0-flash-thinking-exp-1219"
model_id = "gemini-2.0-flash-exp"


In [None]:
response = client.models.generate_content(
    model=model_id, contents='How does RLHF work?'
)
for part in response.candidates[0].content.parts:
    if part.thought:
        print(f"Model Thought:\n{part.text}\n")
    else:
        print(f"\nModel Response:\n{part.text}\n")

In [25]:
pages = [1, 3]
folder = "imgs/q11/"
pattern = r"doc-\d+-page-[" + "".join([str(p) for p in pages]) + "]-[A-Z0-9]+.png"
imagepaths = get_imagepaths(folder, pattern)
imagepath = imagepaths[1]

prompt = "Describe the image in as much detail as possible."
from PIL import Image

im = Image.open(imagepath)

from google.genai import types
from pydantic import BaseModel, Field
UNIVERSITY_ID_LEN = 8
UNIVERSITY_ID_PATTERN = f"^[0-9]{{{UNIVERSITY_ID_LEN}}}$"
UNIVERSITY_ID_ALIAS = "ufid"
SECTION_NUMBER_PATTERN = r"^\d{5}$"

class QuizSubmissionSummary(BaseModel):
    # student_first_name: str
    # student_last_name: str
    student_full_name: str = Field(
        description="Full name of the student in the format First Last"
    )
    university_id: str = Field(
        # try also literal list of UFIDs
        pattern=UNIVERSITY_ID_PATTERN,
        alias=UNIVERSITY_ID_ALIAS,
        description=f"{UNIVERSITY_ID_LEN}-digit {UNIVERSITY_ID_ALIAS.capitalize()} of the student",
    )
    section_number: str = Field(
        pattern=SECTION_NUMBER_PATTERN,
        description="5-digit section number of the student",
    )

response = client.models.generate_content(
    model=model_id,
    contents=[
        im,
        prompt,
    ],
    config=types.GenerateContentConfig(
        response_mime_type="application/json", response_schema=QuizSubmissionSummary
    ),
)


In [None]:
for part in response.candidates[0].content.parts:
    if part.thought:
        print(f"Model Thought:\n{part.text}\n")
    else:
        print(f"\nModel Response:\n{part.text}\n")