In [1]:
import os
import json
import time
import copy
import asyncio
import base64
import io
import requests

from pathlib import Path
from dotenv import load_dotenv

import re
import numpy as np
import pandas as pd
from PIL import Image
from bs4 import BeautifulSoup

from tqdm import tqdm
from pydantic import BaseModel, Field
from openai import OpenAI, AsyncOpenAI
import instructor

In [None]:
OPENAI_KEY=os.getenv("OPENAI_KEY")

openai_client = OpenAI(api_key=OPENAI_KEY)
instructor_client = instructor.patch(OpenAI(api_key=OPENAI_KEY))
async_instruction_client = instructor.patch(AsyncOpenAI(api_key=OPENAI_KEY))

async_client_w_instructor = instructor.from_openai(
    AsyncOpenAI(api_key=OPENAI_KEY)
)

In [3]:
def encode_image_to_base64(img: Image.Image) -> str:
    img_buffer = io.BytesIO()
    try:
        img = img.convert("RGBA")
        img.save(img_buffer, format="PNG")
    except OSError:
        img.save(img_buffer, format="JPEG")
    return base64.b64encode(img_buffer.getvalue()).decode("utf-8")


def create_messages(
    prompt_system_formatted: str, prompt_user_formatted: str, images_b64_list: list
) -> list:
    messages = [
        {
            "role": "system",
            "content": [
                {
                    "type": "text",
                    "text": prompt_system_formatted,
                }
            ],
        },
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": prompt_user_formatted,
                }
            ],
        },
    ]

    if len(images_b64_list) > 0:
        for image_b64 in images_b64_list:
            image_content = {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{image_b64}",
                    "detail": "high",
                },
            }
            messages[1]["content"].append(image_content)

    return messages


def format_prompt(
    prompt_system: str,
    prompt_user: str,
    school_subject: str,
) -> tuple[str, str]:
    prompt_system_formatted = prompt_system
    prompt_user_formatted = prompt_user.format(
        school_subject=school_subject
    )

    return prompt_system_formatted, prompt_user_formatted

In [4]:
stem_sample = pd.read_parquet("data/stem_sample.parquet")

In [5]:
prompt_system = """Atue como um profissional da educação altamente especializado em Matemática, Física e Química. Seu objetivo hoje é descrever a imagem de uma questão, detalhando e descrevendo o que está acontecendo na imagem, de forma que outra pessoa possa reconstruíla.

Descreva a imagem de forma a:
    1- Identificar o que está acontecendo.
    2- Indicar as relações entre os elementos da imagem.
    3- Possibilitar que alguém, sem a imagem, consiga imaginar visualizar o que está acontecendo.

Seja sucinto e objetivo, descrevendo o que está acontecendo na imagem de forma simples e direta. Inicie a sentença solicitando que a imagem seja construída/feita.
"""
prompt_user = "A seguir está a imagem de {school_subject} para descrever:"

In [6]:
class Description(BaseModel):
    description: str = Field(..., description="Descrição detalhada da imagem.")
    #resolution: str = Field(..., description="Resolução da questão.")
    
async def gpt_classification(messages):
    response = await async_client_w_instructor.chat.completions.create_with_completion(
        model="gpt-4o-2024-08-06",
        response_model=Description,
        messages=messages,
        temperature=0.0,
        seed=42,
        top_p=0.2,
    )

    return response

async def gpt_classification_async(messages_list):
    tasks = [
        gpt_classification(messages_list[idx]) for idx in range(len(messages_list))
    ]

    response_results = await asyncio.gather(*tasks)

    return response_results

In [7]:
messagees_list = []
for i, row in tqdm(stem_sample.iterrows(), total=len(stem_sample)):
    raw_html = row["text"]

    soup = BeautifulSoup(raw_html, 'html.parser')

    img_tags = soup.find_all('img')
    image_urls = [img.get('src') for img in img_tags if img.get('src')]

    response = requests.get(image_urls[0])
    pil_img = Image.open(io.BytesIO(response.content))

    img_base64 = encode_image_to_base64(pil_img)

    school_subject = row["schoolSubjectValue"]

    prompt_system_formatted, prompt_user_formatted = format_prompt(
        prompt_system,
        prompt_user,
        school_subject,
    )

    messages = create_messages(
        prompt_system_formatted, prompt_user_formatted, [img_base64]
    )
    
    messagees_list.append(messages)

100%|██████████| 131/131 [00:27<00:00,  4.82it/s]


In [9]:
response_results = await gpt_classification_async(messagees_list)

In [19]:
import cv2
import matplotlib.pyplot as plt
from weasyprint import HTML, CSS

In [25]:
def pixels_should_be_conserved(pixels, threshold=0, max_black_ratio=0.999) -> bool:
    black_pixel_count = (pixels <= threshold).all(axis=1).sum()
    pixel_count = len(pixels)
    
    return pixel_count > 0 and black_pixel_count/pixel_count <= max_black_ratio
    
def edge_removal(image):
    num_rows, num_columns, _ = image.shape
    preserved_rows    = [r for r in range(num_rows)    if pixels_should_be_conserved(image[r, :, :])]
    preserved_columns = [c for c in range(num_columns) if pixels_should_be_conserved(image[:, c, :])]
    image = image[preserved_rows,:,:]
    image = image[:,preserved_columns,:]
    
    return image

def cut_white(image):
    idx_to_cut = -1
    for i in range(image.shape[0]):
        if np.min(image[-i,:,0]) != 255:
            idx_to_cut = image.shape[0] - i
            break
            
    return idx_to_cut


def print_question(reference):
    image = cv2.imread(f"data/questions_to_image/{reference}.png")
    plt.imshow(image)
    plt.show()
    
    return None

In [None]:
css = 'body {background: white;} ' #img { width: 500px;}'
css = CSS(string=css)
value_ans = ["A)","B)","C)","D)","E)","F)","G)","H)"]

for i, row in tqdm(stem_sample.iterrows(), total=len(stem_sample)):
    raw_html = row["text"]
    reference = row["reference"]
    schoolSubject = row["schoolSubjectValue"]
    options = row["options"]

    image_description = response_results[i][0].description

    soup = BeautifulSoup(raw_html, 'html.parser')

    old_tag = soup.find('img')
    # Criar nova tag
    new_tag = soup.new_tag('p')
    new_tag.string = image_description

    # Substituir
    old_tag.replace_with(new_tag)

    text = str(soup)

    if options is not None and len(options) != 0:
        options_string = ""
        for value, option in enumerate(options):
            p_split = option.split(">",1)
            if len(p_split) > 1:
                options_string = options_string + p_split[0] + f"> {value_ans[value]} " + p_split[1]
            else:
                options_string = options_string + option
        html = text + options_string
    else:
        html = text
    
    html = HTML(string=html)
    html.write_png("data/output.png", stylesheets=[css], resolution=128)
    
    question_image = cv2.imread("data/output.png", 1)
    question_image = edge_removal(question_image)
    idx_to_cut = cut_white(question_image)
    question_image = question_image[:idx_to_cut+20,:,:]
    #print(reference, question_image.shape)
    
    cv2.imwrite(f"data/questions_image_with_description/{schoolSubject}_{reference}.png", question_image)

100%|██████████| 131/131 [01:23<00:00,  1.56it/s]
