In [1]:
!pip install google-generativeai

Collecting protobuf (from google-generativeai)
  Using cached protobuf-5.28.2-cp310-abi3-win_amd64.whl.metadata (592 bytes)
Using cached protobuf-5.28.2-cp310-abi3-win_amd64.whl (431 kB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 4.25.5
    Uninstalling protobuf-4.25.5:
      Successfully uninstalled protobuf-4.25.5
Successfully installed protobuf-5.28.2


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-intel 2.17.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 5.28.2 which is incompatible.


In [2]:
# Importing GenerativeAI
import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
genai.configure(api_key='AIzaSyAwxsQslayMrRVEx5f2CgtPkc5gGYWU90M')

In [4]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-pro-exp-0801
models/gemini-1.5-pro-exp-0827
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-exp-0827
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924


In [5]:
# Model Configuration
MODEL_CONFIG = {
  "temperature": 0.2,
  "top_p": 1,
  "top_k": 32,
  "max_output_tokens": 4096,
}

## Safety Settings of Model
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  }
]

In [6]:
# Load Gemini Pro Vision model
model = genai.GenerativeModel(model_name = "gemini-1.5-flash",
                              generation_config = MODEL_CONFIG,
                              safety_settings = safety_settings)

In [7]:
# Setting Image Format
from pathlib import Path

def image_format(image_path):
    img = Path(image_path)

    if not img.exists():
        raise FileNotFoundError(f"Could not find image: {img}")

    image_parts = [
        {
            "mime_type": "image/png", ## Mime type are PNG - image/png. JPEG - image/jpeg. WEBP - image/webp
            "data": img.read_bytes()
        }
    ]
    return image_parts

In [8]:
# Model Output
def gemini_output(image_path, system_prompt, user_prompt):

    image_info = image_format(image_path)
    input_prompt= [system_prompt, image_info[0], user_prompt]
    response = model.generate_content(input_prompt)
    return response.text

In [9]:
system_prompt = """
               You are a specialist in comprehending receipts.
               Input images in the form of receipts will be provided to you,
               and your task is to respond to questions based on the content of the input image.
               """

image_path = r"c:/Users/edominer/Python Project/Extracting Text from Invoice/Edominer-1201_page-1.jpg"

user_prompt = "What is the name of buyer in the image?"

gemini_output(image_path, system_prompt, user_prompt)

"The buyer's name is EDominer Technologies Pvt. Ltd."

In [10]:
user_prompt = "What is the name of purchased item in the image?"

gemini_output(image_path, system_prompt, user_prompt)

'The purchased item is a Dedicated Server.'

In [11]:
user_prompt = "What is the name of seller name in the image?"

gemini_output(image_path, system_prompt, user_prompt)

'The seller name is DIADEM TECHNOLOGIES PRIVATE LIMITED.'

In [12]:
user_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "
output=gemini_output(image_path, system_prompt, user_prompt)

In [13]:
output

'```json\n{"IRN": "5b8fe4c4fee658aa423aaa8f98aa900970832600c70c3fa182417192479502", "Ack No.": "182417192479502", "Ack Date": "20-Sep-24", "Invoice No.": "DTPL/2425/1201", "Dated": "20-Sep-24", "Buyer (Bill to)": "EDominer Technologies Pvt. Ltd.", "Address": "304, PS Continental\\n83/2/1, Topsia Road (South)\\nKolkata - 700046", "GSTIN/UIN": "19AAJCA4591Q1ZT", "State Name": "West Bengal, Code : 19", "Place of Supply": "West Bengal", "Description of Services": "Dedicated Server\\nWindows - Dedicated\\nProcessor: E5-2680 V4 - 2.4 Ghz\\n- 14 Cores / 28 Threads\\nHDDs: 960 GB x 2 SSD in RAID +\\n1 x 240 GB m.2 nvme SSD\\nRAM: 128 GB DDR4 RAM\\nRAID: NA\\nCPU Cache: 35 MB Smart cache\\nNumber of IPs: 1 Public IP Addresses\\nwith Firewall Protection\\nBandwidth: 1 TB per month\\nIPs: 1 Public IP Addresses with Fortinet IPS Protection\\nOS: Windows Server 2019 Std Edition with HyperV\\nVMs: 2 Windows 2019 VMs included\\nFirewall: Fortigate Network Protection\\nwith NAT enabled IP\\nServer Bac

In [14]:
output=output[8:].split("\n```")[0]

In [15]:
output

'{"IRN": "5b8fe4c4fee658aa423aaa8f98aa900970832600c70c3fa182417192479502", "Ack No.": "182417192479502", "Ack Date": "20-Sep-24", "Invoice No.": "DTPL/2425/1201", "Dated": "20-Sep-24", "Buyer (Bill to)": "EDominer Technologies Pvt. Ltd.", "Address": "304, PS Continental\\n83/2/1, Topsia Road (South)\\nKolkata - 700046", "GSTIN/UIN": "19AAJCA4591Q1ZT", "State Name": "West Bengal, Code : 19", "Place of Supply": "West Bengal", "Description of Services": "Dedicated Server\\nWindows - Dedicated\\nProcessor: E5-2680 V4 - 2.4 Ghz\\n- 14 Cores / 28 Threads\\nHDDs: 960 GB x 2 SSD in RAID +\\n1 x 240 GB m.2 nvme SSD\\nRAM: 128 GB DDR4 RAM\\nRAID: NA\\nCPU Cache: 35 MB Smart cache\\nNumber of IPs: 1 Public IP Addresses\\nwith Firewall Protection\\nBandwidth: 1 TB per month\\nIPs: 1 Public IP Addresses with Fortinet IPS Protection\\nOS: Windows Server 2019 Std Edition with HyperV\\nVMs: 2 Windows 2019 VMs included\\nFirewall: Fortigate Network Protection\\nwith NAT enabled IP\\nServer Backup: Dail

In [16]:
import ast

# Convert string to dictionary
data = ast.literal_eval(output)

# Print the dictionary
print(data)


{'IRN': '5b8fe4c4fee658aa423aaa8f98aa900970832600c70c3fa182417192479502', 'Ack No.': '182417192479502', 'Ack Date': '20-Sep-24', 'Invoice No.': 'DTPL/2425/1201', 'Dated': '20-Sep-24', 'Buyer (Bill to)': 'EDominer Technologies Pvt. Ltd.', 'Address': '304, PS Continental\n83/2/1, Topsia Road (South)\nKolkata - 700046', 'GSTIN/UIN': '19AAJCA4591Q1ZT', 'State Name': 'West Bengal, Code : 19', 'Place of Supply': 'West Bengal', 'Description of Services': 'Dedicated Server\nWindows - Dedicated\nProcessor: E5-2680 V4 - 2.4 Ghz\n- 14 Cores / 28 Threads\nHDDs: 960 GB x 2 SSD in RAID +\n1 x 240 GB m.2 nvme SSD\nRAM: 128 GB DDR4 RAM\nRAID: NA\nCPU Cache: 35 MB Smart cache\nNumber of IPs: 1 Public IP Addresses\nwith Firewall Protection\nBandwidth: 1 TB per month\nIPs: 1 Public IP Addresses with Fortinet IPS Protection\nOS: Windows Server 2019 Std Edition with HyperV\nVMs: 2 Windows 2019 VMs included\nFirewall: Fortigate Network Protection\nwith NAT enabled IP\nServer Backup: Daily automated backups\

In [17]:
import json

json_object = json.dumps(data)

print(json_object)

{"IRN": "5b8fe4c4fee658aa423aaa8f98aa900970832600c70c3fa182417192479502", "Ack No.": "182417192479502", "Ack Date": "20-Sep-24", "Invoice No.": "DTPL/2425/1201", "Dated": "20-Sep-24", "Buyer (Bill to)": "EDominer Technologies Pvt. Ltd.", "Address": "304, PS Continental\n83/2/1, Topsia Road (South)\nKolkata - 700046", "GSTIN/UIN": "19AAJCA4591Q1ZT", "State Name": "West Bengal, Code : 19", "Place of Supply": "West Bengal", "Description of Services": "Dedicated Server\nWindows - Dedicated\nProcessor: E5-2680 V4 - 2.4 Ghz\n- 14 Cores / 28 Threads\nHDDs: 960 GB x 2 SSD in RAID +\n1 x 240 GB m.2 nvme SSD\nRAM: 128 GB DDR4 RAM\nRAID: NA\nCPU Cache: 35 MB Smart cache\nNumber of IPs: 1 Public IP Addresses\nwith Firewall Protection\nBandwidth: 1 TB per month\nIPs: 1 Public IP Addresses with Fortinet IPS Protection\nOS: Windows Server 2019 Std Edition with HyperV\nVMs: 2 Windows 2019 VMs included\nFirewall: Fortigate Network Protection\nwith NAT enabled IP\nServer Backup: Daily automated backups\

In [18]:
user_prompt = "What is the total amount in the image?"

gemini_output(image_path, system_prompt, user_prompt)

'The total amount in the image is 7000.00.'

In [19]:
user_prompt = "What is the place of supply in the image?"

gemini_output(image_path, system_prompt, user_prompt)

'The place of supply is West Bengal.'

# Extracting from PDF

In [20]:
from pathlib import Path

def pdf_format(pdf_path):
    pdf = Path(pdf_path)
    if not pdf.exists():
        raise FileNotFoundError(f"Could not find PDF: {pdf}")
    
    pdf_parts = [
        {
            "mime_type": "application/pdf",  # MIME type for PDF
            "data": pdf.read_bytes()
        }
    ]
    return pdf_parts

# Example usage
pdf_path = r"c:/Users/edominer/Python Project/Extracting Text from Invoice/Edominer-1201.pdf"
try:
    formatted_pdf = pdf_format(pdf_path)
    print("Formatted PDF:", formatted_pdf)
except FileNotFoundError as e:
    print(e)


Formatted PDF: [{'mime_type': 'application/pdf', 'data': b'%PDF-1.5\n%\xa7\xe3\xf1\xf1\n2 0 obj\n<<\n/Type /Catalog\n/Pages 4 0 R\n/AcroForm 5 0 R\n/Version /1#2E5\n>>\nendobj\n10 0 obj\n<<\n/Filter /FlateDecode\n/Length 10\n>>\nstream\r\nx\x9c+\xe4\x02\x00\x00\xee\x00|\r\nendstream\nendobj\n11 0 obj\n<<\n/Filter /FlateDecode\n/Length 10\n>>\nstream\r\nx\x9c+\xe4\x02\x00\x00\xee\x00|\r\nendstream\nendobj\n12 0 obj\n<<\n/Filter /FlateDecode\n/Length 10\n>>\nstream\r\nx\x9c+\xe4\x02\x00\x00\xee\x00|\r\nendstream\nendobj\n13 0 obj\n<<\n/Filter /FlateDecode\n/Length 10\n>>\nstream\r\nx\x9c+\xe4\x02\x00\x00\xee\x00|\r\nendstream\nendobj\n14 0 obj\n<<\n/Filter /FlateDecode\n/Length 1752\n>>\nstream\r\nx\x9c\x95Y\xdfW\xe2\xc8\x12~\xe7\xaf\xa8G\xf7\\\x81\xeeN\'\x9d\xf0\x06\x84q2\xa3\xc8\x92\xe8\xdc\xb3;\xf7!\x86Vs\x84\x84\rA\xd7\xfd\xebou@\x87\x84\x0e\xcd\x1e=*\x9d\xae\xaf\xab\xaa\xbf\xfa\x15GQ\xa7\xff\x85\x02e\x10=v(\x10\xf5\xd5\xf3\x80:\x1c\\jA\xb4\xeaTKP<u\\\x17\xa2\x7f:\x7f^D\xc3\xffB0\xbd

In [21]:
# Model Output
def gemini_output(pdf_path, system_prompt, user_prompt):

    pdf_info = pdf_format(pdf_path)
    input_prompt= [system_prompt, pdf_info[0], user_prompt]
    response = model.generate_content(input_prompt)
    return response.text

In [22]:
system_prompt = """
               You are a specialist in comprehending receipts.
               Input images in the form of receipts will be provided to you,
               and your task is to respond to questions based on the content of the input image.
               """

user_prompt = "What is the name of buyer in the image?"

gemini_output(pdf_path, system_prompt, user_prompt)

"The buyer's name is EDominer Technologies Pvt. Ltd."

In [23]:
user_prompt = "What are the name of purchased item in the image?"

gemini_output(pdf_path, system_prompt, user_prompt)

'The purchased items are:\n- Dedicated Server\n- SQL Server Web Edition\n- Windows VPS\n- Additional Windows VMs\n- 1 x Automated Agentless VM Backups\n- 1 x IPv4 Address\n- 1 Windows OS'

In [24]:
user_prompt = "What is the total amount in the image?"

gemini_output(pdf_path, system_prompt, user_prompt)

'The total amount is 9,817.60.'

In [25]:
user_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "
output=gemini_output(pdf_path, system_prompt, user_prompt)

In [26]:
output=output[8:].split("\n```")[0]

In [27]:
output

'{\n  "invoice_data": {\n    "invoice_number": "DTPL/2425/1201",\n    "invoice_date": "20-Sep-24",\n    "supplier_name": "DIADEM TECHNOLOGIES PRIVATE LIMITED",\n    "supplier_address": "DIADEM Primarc Tower, Suite No. 502/503\\n5th Floor, DN - 36, Sector - V\\nSalt Lake, Kolkata - 700091",\n    "supplier_gst": "19AABCD9005E1ZW",\n    "supplier_state": "West Bengal",\n    "supplier_state_code": "19",\n    "supplier_cin": "U72200WB2003PTC096904",\n    "supplier_contact": "6634 1414",\n    "supplier_email": "sales@diadam.in",\n    "buyer_name": "EDominer Technologies Pvt. Ltd.",\n    "buyer_address": "304, PS Continental\\n83/2/1, Topsia Road (South)\\nKolkata - 700046",\n    "buyer_gst": "19AAJCA4591Q1ZT",\n    "buyer_state": "West Bengal",\n    "buyer_state_code": "19",\n    "buyer_place_of_supply": "West Bengal",\n    "items": [\n      {\n        "description": "Dedicated Server\\nWindows - Dedicated\\nProcessor: E5-2680 v4 - 2.4\\nGhz\\n- 14 Cores / 28 Threads\\nHDDs: 960 GB x 2 SSD i

In [28]:
import ast

# Convert string to dictionary
data = ast.literal_eval(output)

# Print the dictionary
print(data)

ValueError: malformed node or string on line 27: <ast.Name object at 0x0000014A3410E140>