In [4]:
import os
import json
import base64
import requests
import ast
from io import BytesIO
from PIL import Image
import pandas as pd
import pypdfium2 as pdfium
from dotenv import load_dotenv
load_dotenv()
from openai import OpenAI

client = OpenAI(api_key = os.getenv('OPEN_AI_KEY'))
with open(os.path.join(os.getcwd(), 'prompt_8names.txt'), 'r') as f:
    prompt_txt = f.read()

In [1]:
import os
import json
import base64
import requests
import ast
from io import BytesIO
from PIL import Image
import pandas as pd
import pypdfium2 as pdfium
from dotenv import load_dotenv
load_dotenv()
from openai import OpenAI

# read api key from env file
client = OpenAI(api_key = os.getenv('OPEN_AI_KEY'))

# please place the correct path of prompt txt file
with open(os.path.join(os.getcwd(), 'prompt_9_dropdown.txt'), 'r') as f:
    prompt_txt = f.read()
    
def encode_page(image):
    '''
    Converts the image of a page to a base64 string
    Input: PIL image
    '''
    # create an in memory buffer
    buffered = BytesIO()
    # save the input PIL image in the buffer as a png file.
    image.save(buffered, format="png")
    # convert the png file to base64 string
    img_base64_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
    
    return img_base64_str

def parse_pdf_fetch_img(pdf_path, page_num):
    '''
    Given a path of the pdf (Datasheet) and a page number, this function extracts that particular page and scales it 3 times and returns.
    Input: pdf_path, page_num
    '''
    # Read the pdf file from the path
    pdf = pdfium.PdfDocument(pdf_path)
    # Get the page object from the pdf object
    page = pdf[page_num]

    # Convert page object to PIL image and scale it 3 times.
    img = page.render_topil(
        scale = 3,
        rotation = 0,
        crop = (0, 0, 0, 0),
        greyscale = False,
        optimise_mode  = pdfium.OptimiseMode.NONE
    )
    return img

def call_openai_model(content_payload):
    '''
    Given the content, this function fetches the response from the OpenAI api endpoint. 
    The content from the response is either converted to a JSON or a JSON string.
    Input: content_payload (contains prompt and base64 image)
    '''    
    # create headers
    headers = {
      "Content-Type": "application/json",
      "Authorization": f"Bearer {os.getenv('OPEN_AI_KEY')}"
    }

    # initialise payload and set paramas
    payload = {
    "model": "gpt-4o",
    "messages": [
        {
        "role": "user",
        "content": content_payload
        }
    ],
    "max_tokens": 1024,
    "temperature": 0.01
    }

    # get the response
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

    # extract the content and convert it to json or json string
    if response.status_code == 200:
        try:
            # evaluate the json string and convert the string to json
            response_json = ast.literal_eval(response.json()['choices'][0]['message']['content'].replace("```", "").replace("json",""))
            return response_json
        except Exception as e:
            # return json string
            return response.json()['choices'][0]['message']['content'].replace("```", "").replace("json","")  
    elif response.status_code == 429:
        print(f"Rate limit reached{response.status_code}")
        time.sleep(20)  # 20 seconds sleep
    elif response.status_code == 500:
        print("Issue in request..")
    else:
        pass 

def fetch_attribute_json(model_num, pdf_path, page_num):
    '''
    Given the path of the datasheet pdf, and the page number in which the order info table is present,
    this method uses all the other methods to extract the page as an image from the pdf and, adds the model number to the prompt,
    and calls and fetches the response from the OpenAI endpoint.
    '''    
    # extract page image from the datasheet
    page_img = parse_pdf_fetch_img(pdf_path, page_num)
    # get the base64 of the page image
    base64_img = encode_page(page_img)
    # add model number to the prompt
    openai_prompt =  prompt_txt + model_num

    # create content payload
    common_prompt_content = { "type": "text", "text": openai_prompt }
    content_payload_ = []  
    content_payload_.append(common_prompt_content)

    # create base64 template
    base64_content_template = { 
        "type": "image_url", 
        "image_url": {
                "url": f"data:image/png;base64,{base64_img}",
                "detail": "high"
                }
    }
    
    content_payload_.append(base64_content_template)
    
    # fetch the response
    resp = call_openai_model(content_payload_)
    return resp

In [2]:
model_num = 'GALN-SA4C-740-U-T4FT-WM-XX'
pdf_path = '/home/sriteja-code/info_table_extraction/1.pdf'
page_num = 1
x = fetch_attribute_json(model_num, pdf_path, page_num)

In [3]:
x, type(x)

({'Series Name': {'GALN': 'GALN'},
  'CCT': {'40': '4000K'},
  'CRI': {'7': '70'},
  'Voltage': {'U': 'Universal (120-277V)'},
  'Lumens': None,
  'Environment': None,
  'Mounting': {'WM': 'Wall Mount'},
  'Dimming Protocol': None},
 dict)