<a href="https://colab.research.google.com/github/Leptons-Multiconcept/invoice-extraction/blob/main/InvoiceExtraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q -U google-generativeai


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/160.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━[0m [32m122.9/160.8 kB[0m [31m4.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m160.8/160.8 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/760.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m757.8/760.0 kB[0m [31m31.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m760.0/760.0 kB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import google.generativeai as genai

In [5]:
from google.colab import userdata

GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [6]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-pro-exp-0801
models/gemini-1.5-pro-exp-0827
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-exp-0827
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924


In [12]:
# Model Configuration

MODEL_CONVIG ={
    'temperature': 0.2,
    'top_k': 32,
    'top_p': 1,
    'max_output_tokens': 4096,
}

## Safety Settings of Model
safety_settings = [
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_MEDIUM_AND_ABOVE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_MEDIUM_AND_ABOVE",

    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_MEDIUM_AND_ABOVE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_MEDIUM_AND_ABOVE",
    }
]

Load GEMINI Model with Model Configuration

In [17]:
model = genai.GenerativeModel(
    model_name='gemini-1.5-flash',
    generation_config=MODEL_CONVIG,
    safety_settings=safety_settings
)

## Define Image Format to Input in Gemini

In [18]:
from pathlib import Path

def image_format(image_path):
  img = Path(image_path)

  if not img.exists():
    raise FileNotFoundError(f"Could not find image: {img}")


  image_paths = [
      {
          'mime_type': 'image/jpeg',
          'data': img.read_bytes()
      }
  ]
  return image_paths

## Gemini Model Output

In [16]:
def gemini_output(image_path, system_prompt, user_prompt):
  image_info = image_format(image_path)
  input_prompt = [system_prompt, image_info[0], user_prompt]
  response = model.generate_content(input_prompt)
  return response.text

## Extracting Part of the Information From Invoice

In [23]:
system_prompt = """
                You are a specialist in comprehending reciepts.
                Input images in the form of reciepts will be provided to you,
                and your task is to respond to questions based on the content of the input image.
                """
image_path = '/content/invoice.png'

user_prompt = 'What is the invoice number?'

gemini_output(image_path, system_prompt, user_prompt)

'The invoice number is 12345.'

## Extracting Whole Data in JSON from Invoice

In [26]:
system_prompt = """
                You are a specialist in comprehending reciepts.
                Input images in the form of reciepts will be provided to you,
                and your task is to respond to questions based on the content of the input image.
                Convert invoice data into JSON format with approprait JSON tags as required for the data in image.
                """
image_path = '/content/invoice.png'

user_prompt = 'Convert invoice data into JSON format with appropraite JSON tags as required.'


In [25]:
output = gemini_output(image_path, system_prompt, user_prompt)

In [27]:
from IPython.display import Markdown
Markdown(output)

```json
{
  "invoice_number": "12345",
  "invoice_date": "16 June 2025",
  "bill_to": {
    "name": "Imani Olowe",
    "phone": "+123-456-7890",
    "address": "63 Ivy Road, Hawkville, GA, USA 31036"
  },
  "items": [
    {
      "item": "Eggshell Camisole Top",
      "quantity": 1,
      "unit_price": 123,
      "total": 123
    },
    {
      "item": "Cuban Collar Shirt",
      "quantity": 2,
      "unit_price": 127,
      "total": 254
    },
    {
      "item": "Floral Cotton Dress",
      "quantity": 1,
      "unit_price": 123,
      "total": 123
    }
  ],
  "subtotal": 500,
  "tax": 0,
  "total": 500,
  "payment_information": {
    "bank": "Briard Bank",
    "account_name": "Samira Hadid",
    "account_number": "123-456-7890",
    "due_date": "5 July 2025"
  },
  "seller": {
    "name": "Samira Hadid",
    "address": "123 Anywhere St., Any City, ST 12345"
  }
}
```