In [None]:
import google.generativeai as genai
from pdf2jpg import pdf2jpg
from dotenv import load_dotenv
import os

load_dotenv()
GOOGLE_API_KEY= os.getenv("GOOGLE_API_KEY")


In [None]:
filename='example.pdf'

In [None]:
# def generateImg
def converttoimg(filename, destname):    
    pdf2jpg.convert_pdf2jpg(filename,destname, pages="ALL")
converttoimg(filename=filename,destname="./")

In [None]:
genai.configure(api_key=GOOGLE_API_KEY)

#### LIST OF MODELS

In [None]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)


In [None]:
# Model Configuration
MODEL_CONFIG = {
  "temperature": 0.2,
  "top_p": 1,
  "top_k": 32,
  "max_output_tokens": 4096,
}

## Safety Settings of Model
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  }
]

## LOAD GEMINI MODEL WITH MODEL CONFIGURATIONS

In [None]:
model = genai.GenerativeModel(model_name = "gemini-pro-vision",
                              generation_config = MODEL_CONFIG,
                              safety_settings = safety_settings)



## DEFINE IMAGE FORMAT TO INPUT IN GEMINI

In [None]:
from pathlib import Path

def image_format(image_path):
    img = Path(image_path)

    if not img.exists():
        raise FileNotFoundError(f"Could not find image: {img}")

    image_parts = [
        {
            "mime_type": "image/png", ## Mime type are PNG - image/png. JPEG - image/jpeg. WEBP - image/webp
            "data": img.read_bytes()
        }
    ]
    return image_parts


## GEMINI MODEL OUTPUT

In [None]:
def gemini_output(image_path, system_prompt, user_prompt):
    image_info = image_format(image_path)
    input_prompt= [system_prompt, image_info[0], user_prompt]
    response = model.generate_content(input_prompt)
    print(response.text)
    return response.text

### EXTRACTING WHOLE DATA IN JSON FROM INVOICE

In [None]:
system_prompt = """
               You are a specialist in comprehending invoices.
               Input images in the form of invoices will be provided to you,
               and your task is to Convert Invoice data into JSON format with appropriate json tags as required for the data in image"
               """
#system_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "
image_path = f"{filename}_dir/0_{filename}.jpg"
user_prompt = "Convert Invoice data into JSON format with appropriate json tags as required for the data in image "

In [None]:
try:
    output = gemini_output(image_path, system_prompt, user_prompt)
except:
    print("Error")

In [None]:
output

In [None]:
from IPython.display import Markdown
Markdown(output)