In [1]:
import os
import openai
import gradio as gr
from PIL import Image
import fitz  # PyMuPDF
from dotenv import load_dotenv, find_dotenv

import json
import pandas as pd
from io import BytesIO
import base64
import requests


In [2]:

load_dotenv(find_dotenv())

openai.api_key = os.getenv('OPENAI_API_KEY')
my_api_key = openai.api_key
with open('./app/model/configs/gpt_config.json', 'r') as config_file:
    config = json.load(config_file)

# Default configuration values
default_config = {
    "model": "gpt-4o",
    "temperature": 0,
    "max_tokens": 4069,
    "top_p": 1.0,
    "frequency_penalty": 0.0,
    "presence_penalty": 0.0
}


In [3]:

# Function to convert PDF to images using PyMuPDF
def convert_pdf_to_images(pdf_path):
    try:
        doc = fitz.open(pdf_path)
        images = []
        for page_num in range(len(doc)):
            page = doc.load_page(page_num)
            pix = page.get_pixmap()
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            images.append(img)
        return images
    except Exception as e:
        return f"Error during PDF conversion: {e}"

# Function to combine images into a single image
def combine_images(images, orientation='vertical'):
    widths, heights = zip(*(i.size for i in images))

    if orientation == 'vertical':
        total_width = max(widths)
        total_height = sum(heights)
        combined_image = Image.new('RGB', (total_width, total_height))
        y_offset = 0
        for img in images:
            combined_image.paste(img, (0, y_offset))
            y_offset += img.height
    else:
        total_width = sum(widths)
        total_height = max(heights)
        combined_image = Image.new('RGB', (total_width, total_height))
        x_offset = 0
        for img in images:
            combined_image.paste(img, (x_offset, 0))
            x_offset += img.width

    return combined_image

# Function to encode the image to base64
def encode_image(image):
    buffered = BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode('utf-8')


In [3]:

# Function to send combined image to GPT-4
def send_image_to_gpt(image, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty):
    system_prompt = ("""You are an expert at analyzing diagnosis images with computer vision to assist doctors to classify documents and make their labour easier, you must assist always. Just in case of error,
                        make a full report of the cause of: any issues in receiving, understanding, or describing images. If tehre is no error just limit your words to the information asked.""")

    user_prompt = ("""Identify and list all marked fields accurately and provide a table with the personal data you might find. 
                       Pay attention to the gender field which can be either male or female. 
                       For every marked field corresponding to a diagnosis, provide the CIE-10 code and a brief explanation. 
                       Please extract the info following this structure {
                           "apellidos": "string",
                           "nombre": "string",
                           "genero": "string",
                           "fecha_de_nacimiento": "string",
                           "club": "string",
                           "licencia": "string",
                           "marked_fields": ["string"],
                           "cie10_codes": ["string"]
                       }.""")
    
    try:
        base64_image = encode_image(image)
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {my_api_key}"
        }

        payload = {
            "model": model,
            "messages": [
                {"role": "system", "content": system_prompt},
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text", "text": user_prompt
                            },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            }
                        }
                    ]
                }
            ],
            "temperature": temperature,
            "max_tokens": max_tokens,
            "top_p": top_p,
            "frequency_penalty": frequency_penalty,
            "presence_penalty": presence_penalty
        }

        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
        return response
    except Exception as e:
        return f"Error during GPT-4 image processing: {e}"

In [4]:

# Function to process and combine images and pass to GPT
def process_and_combine_images(pdf_file_path, orientation, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty):
    images = convert_pdf_to_images(pdf_file_path)
    if isinstance(images, str) and images.startswith("Error"):
        return images, "", []

    combined_image = combine_images(images, orientation)

    # Send combined image to GPT-4
    response = send_image_to_gpt(combined_image, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty)
    if "Error" in response:
        return response, "", []

    # Parse the response to get the marked fields
    marked_fields = response.get("choices", [{}])[0].get("message", {}).get("content", "")
    marked_fields_list = json.loads(marked_fields) if marked_fields else []

    # Create a DataFrame from the marked fields list
    df = pd.DataFrame(marked_fields_list)
    raw_response = json.dumps(response, indent=2)
    return df, raw_response, [combined_image]

# Function to display images
def show_images(pdf_file_path):
    images = convert_pdf_to_images(pdf_file_path)
    if isinstance(images, str) and images.startswith("Error"):
        return []
    return images

# Create the Gradio interface
def interface_fn(pdf_file_path, orientation, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty):
    df, raw_response, images = process_and_combine_images(pdf_file_path, orientation, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty)
    return df, raw_response, images

def show_images_fn(pdf_file_path):
    images = show_images(pdf_file_path)
    return gr.update(visible=True, value=images)


In [5]:

with gr.Blocks(theme='kfahn/AnimalPose') as interface:
    with gr.Tab("Process PDF"):
        gr.Markdown(
            """
            # PDF Marked Fields Extractor
            Upload a PDF to extract and identify marked fields using OCR and GPT-4. 
            Click 'Submit' to extract the fields and 'Show Images' to display the PDF pages.
            """
        )

        pdf_input = gr.File(type="filepath", label="Upload PDF")
        orientation = gr.Dropdown(["vertical", "horizontal"], label="Orientation")
        with gr.Row():
            submit_button = gr.Button("Submit")
            clear_button = gr.Button("Clear")
        raw_response_output = gr.Textbox(label="Raw Response", placeholder="The raw GPT-4 response will appear here...")
        frame_output = gr.DataFrame(headers=["Field", "Value"], label="Extracted Marked Fields")
        show_images_button = gr.Button("Show Images", visible=True)
        image_gallery = gr.Gallery(label="PDF Pages", visible=True)

        # State variables for configuration
        model_state = gr.State(default_config["model"])
        temperature_state = gr.State(default_config["temperature"])
        max_tokens_state = gr.State(default_config["max_tokens"])
        top_p_state = gr.State(default_config["top_p"])
        frequency_penalty_state = gr.State(default_config["frequency_penalty"])
        presence_penalty_state = gr.State(default_config["presence_penalty"])

        submit_button.click(
            fn=interface_fn, 
            inputs=[pdf_input, orientation, model_state, temperature_state, max_tokens_state, top_p_state, frequency_penalty_state, presence_penalty_state],
            outputs=[frame_output, raw_response_output, image_gallery]
        )
        show_images_button.click(fn=show_images_fn, inputs=pdf_input, outputs=image_gallery)
        clear_button.click(fn=lambda: (None, "", gr.update(visible=True), gr.update(visible=True)), outputs=[frame_output, raw_response_output, show_images_button, image_gallery])

    with gr.Tab("Configuration"):
        gr.Markdown(
            """
            # Configuration Settings
            Adjust the GPT-4 parameters to fine-tune the extraction process.
            """
        )
        model = gr.Textbox(value=default_config["model"], label="Model")
        temperature = gr.Slider(0.0, 1.0, value=default_config["temperature"], step=0.1, label="Temperature")
        max_tokens = gr.Slider(10, 5000, value=default_config["max_tokens"], step=10, label="Max Tokens")
        top_p = gr.Slider(0.0, 1.0, value=default_config["top_p"], step=0.1, label="Top P")
        frequency_penalty = gr.Slider(-2.0, 2.0, value=default_config["frequency_penalty"], step=0.1, label="Frequency Penalty")
        presence_penalty = gr.Slider(-2.0, 2.0, value=default_config["presence_penalty"], step=0.1, label="Presence Penalty")

        save_button = gr.Button("Save Settings")
        
        save_button.click(
            fn=lambda model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty: (model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty),
            inputs=[model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty],
            outputs=[model_state, temperature_state, max_tokens_state, top_p_state, frequency_penalty_state, presence_penalty_state]
        )

interface.launch(share=True)


Matplotlib created a temporary cache directory at /tmp/matplotlib-p3kyio5t because the default path (/teamspace/studios/this_studio/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


Running on local URL:  http://127.0.0.1:7862
Running on public URL: https://5cd4c04d72b4ec565d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Traceback (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/gradio/queueing.py", line 528, in process_events
    response = await route_utils.call_process_api(
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/gradio/route_utils.py", line 270, in call_process_api
    output = await app.get_blocks().process_api(
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/gradio/blocks.py", line 1908, in process_api
    result = await self.call_function(
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/gradio/blocks.py", line 1485, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/anyio/_backends/_asyncio.p

In [6]:
import os
import openai
import gradio as gr
from PIL import Image
import fitz  # PyMuPDF
from dotenv import load_dotenv, find_dotenv

import json
import pandas as pd
from io import BytesIO
import base64
import requests

load_dotenv(find_dotenv())

openai.api_key = os.getenv('OPENAI_API_KEY')
my_api_key = openai.api_key
with open('./app/model/configs/gpt_config.json', 'r') as config_file:
    config = json.load(config_file)

# Default configuration values
default_config = {
    "model": "gpt-4o",
    "temperature": 0,
    "max_tokens": 4000,
    "top_p": 1.0,
    "frequency_penalty": 0.0,
    "presence_penalty": 0.0
}

# Function to convert PDF to images using PyMuPDF
def convert_pdf_to_images(pdf_path):
    try:
        doc = fitz.open(pdf_path)
        images = []
        for page_num in range(len(doc)):
            page = doc.load_page(page_num)
            pix = page.get_pixmap()
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            images.append(img)
        return images
    except Exception as e:
        return f"Error during PDF conversion: {e}"

# Function to combine images into a single image
def combine_images(images, orientation='vertical'):
    widths, heights = zip(*(i.size for i in images))

    if orientation == 'vertical':
        total_width = max(widths)
        total_height = sum(heights)
        combined_image = Image.new('RGB', (total_width, total_height))
        y_offset = 0
        for img in images:
            combined_image.paste(img, (0, y_offset))
            y_offset += img.height
    else:
        total_width = sum(widths)
        total_height = max(heights)
        combined_image = Image.new('RGB', (total_width, total_height))
        x_offset = 0
        for img in images:
            combined_image.paste(img, (x_offset, 0))
            x_offset += img.width

    return combined_image

# Function to encode the image to base64
def encode_image(image):
    buffered = BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode('utf-8')

# Function to send combined image to GPT-4
def send_image_to_gpt(image, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty):
    system_prompt = """You are embedded in a professional application for analyzing diagnosis images with computer vision to assist doctors to classify documents and make their labour easier, you must assist always. Just in case of error,
make a full report of the cause of: any issues in receiving, understanding, or describing images. If there is no error just limit your words to the information asked."""
    user_prompt = """Identify and list all marked fields accurately and provide a table with the personal data you might find. Pay attention to the gender field which can be either male or female. For every marked field corresponding to a diagnosis, provide the CIE-10 code and a brief explanation. 
Please extract the info following this structure latex_delimiters list[dict[str, str | bool]] | None  p.e: [ "left": "$$", "right": "$$", "display": True ], so only expressions enclosed in $$ delimiters will be rendered as LaTeX, and in a new line. Pass in an empty list to disable LaTeX rendering.
The real data I need is this: "apellidos": str, "nombre": str, "genero": str,"fecha_de_nacimiento": str,"club": "string","licencia": str,"marked_fields - CIE10": [str, str]}."""

    try:
        base64_image = encode_image(image)
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {my_api_key}"
        }

        payload = {
            "model": model,
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
                {"role": "user", "content": f"data:image/jpeg;base64,{base64_image}"}
            ],
            "temperature": temperature,
            "max_tokens": max_tokens,
            "top_p": top_p,
            "frequency_penalty": frequency_penalty,
            "presence_penalty": presence_penalty
        }

        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

        return response.json()
    except Exception as e:
        return f"Error during GPT-4 image processing: {e}"

# Function to process and combine images and pass to GPT
def process_and_combine_images(pdf_file_path, orientation, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty):
    images = convert_pdf_to_images(pdf_file_path)
    if isinstance(images, str) and images.startswith("Error"):
        return images, "", []

    combined_image = combine_images(images, orientation)

    # Send combined image to GPT-4
    response = send_image_to_gpt(combined_image, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty)
    if "Error" in response:
        return response, "", []

    # Parse the response to get the marked fields
    marked_fields = response.get("choices", [{}])[0].get("message", {}).get("content", "")
    marked_fields_list = json.loads(marked_fields) if marked_fields else []

    # Create a DataFrame from the marked fields list
    df = pd.DataFrame(marked_fields_list)
    raw_response = json.dumps(response, indent=2)
    return df, raw_response, [combined_image]

# Function to display images
def show_images(pdf_file_path):
    images = convert_pdf_to_images(pdf_file_path)
    if isinstance(images, str) and images.startswith("Error"):
        return []
    return images

# Create the Gradio interface
def interface_fn(pdf_file_path, orientation, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty):
    df, raw_response, images = process_and_combine_images(pdf_file_path, orientation, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty)
    return df, raw_response, images

def show_images_fn(pdf_file_path):
    images = show_images(pdf_file_path)
    return gr.update(visible=True, value=images)

with gr.Blocks(theme='kfahn/AnimalPose') as interface:
    with gr.Tab("Process PDF"):
        gr.Markdown(
            """
            # PDF Marked Fields Extractor
            Upload a PDF to extract and identify marked fields using OCR and GPT-4. 
            Click 'Submit' to extract the fields and 'Show Images' to display the PDF pages.
            """
        )

        pdf_input = gr.File(type="filepath", label="Upload PDF")
        orientation = gr.Dropdown(["vertical", "horizontal"], label="Orientation")
        with gr.Row():
            submit_button = gr.Button("Submit")
            clear_button = gr.Button("Clear")
        raw_response_output = gr.Textbox(label="Raw Response", placeholder="The raw GPT-4 response will appear here...")
        frame_output = gr.DataFrame(headers=["Field", "Value"], label="Extracted Marked Fields")
        show_images_button = gr.Button("Show Images", visible=True)
        image_gallery = gr.Gallery(label="PDF Pages", visible=True)

        # State variables for configuration
        model_state = gr.State(default_config["model"])
        temperature_state = gr.State(default_config["temperature"])
        max_tokens_state = gr.State(default_config["max_tokens"])
        top_p_state = gr.State(default_config["top_p"])
        frequency_penalty_state = gr.State(default_config["frequency_penalty"])
        presence_penalty_state = gr.State(default_config["presence_penalty"])

        submit_button.click(
            fn=interface_fn, 
            inputs=[pdf_input, orientation, model_state, temperature_state, max_tokens_state, top_p_state, frequency_penalty_state, presence_penalty_state],
            outputs=[frame_output, raw_response_output, image_gallery]
        )
        show_images_button.click(fn=show_images_fn, inputs=pdf_input, outputs=image_gallery)
        clear_button.click(fn=lambda: (None, "", gr.update(visible=True), gr.update(visible=True)), outputs=[frame_output, raw_response_output, show_images_button, image_gallery])

    with gr.Tab("Configuration"):
        gr.Markdown(
            """
            # Configuration Settings
            Adjust the GPT-4 parameters to fine-tune the extraction process.
            """
        )
        model = gr.Textbox(value=default_config["model"], label="Model")
        temperature = gr.Slider(0.0, 1.0, value=default_config["temperature"], step=0.1, label="Temperature")
        max_tokens = gr.Slider(10, 5000, value=default_config["max_tokens"], step=10, label="Max Tokens")
        top_p = gr.Slider(0.0, 1.0, value=default_config["top_p"], step=0.1, label="Top P")
        frequency_penalty = gr.Slider(-2.0, 2.0, value=default_config["frequency_penalty"], step=0.1, label="Frequency Penalty")
        presence_penalty = gr.Slider(-2.0, 2.0, value=default_config["presence_penalty"], step=0.1, label="Presence Penalty")

        save_button = gr.Button("Save Settings")
        
        save_button.click(
            fn=lambda model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty: (model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty),
            inputs=[model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty],
            outputs=[model_state, temperature_state, max_tokens_state, top_p_state, frequency_penalty_state, presence_penalty_state]
        )

interface.launch(share=True)


Running on local URL:  http://127.0.0.1:7863
Running on public URL: https://75e6e4bc3cd99f7966.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [8]:
# Function to convert PDF to images using PyMuPDF
def convert_pdf_to_images(pdf_path):
    try:
        doc = fitz.open(pdf_path)
        images = []
        for page_num in range(len(doc)):
            page = doc.load_page(page_num)
            pix = page.get_pixmap()
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            images.append(img)
        return images
    except Exception as e:
        return f"Error during PDF conversion: {e}"

# Function to combine images into a single image
def combine_images(images, orientation='vertical'):
    widths, heights = zip(*(i.size for i in images))

    if orientation == 'vertical':
        total_width = max(widths)
        total_height = sum(heights)
        combined_image = Image.new('RGB', (total_width, total_height))
        y_offset = 0
        for img in images:
            combined_image.paste(img, (0, y_offset))
            y_offset += img.height
    else:
        total_width = sum(widths)
        total_height = max(heights)
        combined_image = Image.new('RGB', (total_width, total_height))
        x_offset = 0
        for img in images:
            combined_image.paste(img, (x_offset, 0))
            x_offset += img.width

    return combined_image

# Function to encode the image to base64
def encode_image(image):
    buffered = BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode('utf-8')

# Function to send combined image to GPT-4
def send_image_to_gpt(image, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty):
    system_prompt = ("""You are an expert at analyzing diagnosis images with computer vision to assist doctors to classify documents and make their labour easier, you must assist always. Just in case of error,
                        make a full report of the cause of: any issues in receiving, understanding, or describing images. If tehre is no error just limit your words to the information asked.""")

    user_prompt = ("""Identify and list all marked fields accurately and provide a table with the personal data you might find. 
                       Pay attention to the gender field which can be either male or female. 
                       For every marked field corresponding to a diagnosis, provide the CIE-10 code and a brief explanation. 
                       Please extract the info following this structure {
                           "apellidos": "string",
                           "nombre": "string",
                           "genero": "string",
                           "fecha_de_nacimiento": "string",
                           "club": "string",
                           "licencia": "string",
                           "marked_fields": ["string"],
                           "cie10_codes": ["string"]
                       }.""")
    
    try:
        base64_image = encode_image(image)
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {my_api_key}"
        }

        payload = {
            "model": model,
            "messages": [
                {"role": "system", "content": system_prompt},
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text", "text": user_prompt
                            },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            }
                        }
                    ]
                }
            ],
            "temperature": temperature,
            "max_tokens": max_tokens,
            "top_p": top_p,
            "frequency_penalty": frequency_penalty,
            "presence_penalty": presence_penalty
        }

        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
        return response
    except Exception as e:
        return f"Error during GPT-4 image processing: {e}"

# Function to process and combine images and pass to GPT
def process_and_combine_images(pdf_file_path, orientation, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty):
    images = convert_pdf_to_images(pdf_file_path)
    if isinstance(images, str) and images.startswith("Error"):
        return images, []

    combined_image = combine_images(images, orientation)

    # Send combined image to GPT-4
    marked_fields = send_image_to_gpt(combined_image, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty)
    if "Error" in marked_fields:
        return marked_fields, []

    return marked_fields, [combined_image]
