# GetSwipe LLM Assignment

## Santhosh Krishna R - mail2santhoshkrishnaoffl@gmail.com

Built the following Invoice information extractor using Gemini 1.5 Pro . Built seperate models for PDF and PNG files. Printing the information in the form of JSON for better readability and usability.

In [57]:
#importing necessary packages

import google.generativeai as genai
import os

In [58]:
#Providing Key access

genai.configure(api_key="AIzaSyCoDRrzZthh43kAjYgsidRk9YRyY6TDQf0")

In [59]:
#Looking at all the available models

for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro
models/gemini-1.5-pro-exp-0801
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash
models/gemini-1.5-flash-001-tuning


In [60]:
# Model Configuration

MODEL_CONFIG = {
  "temperature": 0.1,
  "top_p": 1,
  "top_k": 32,
  "max_output_tokens": 4096,
}

In [61]:
#Using gemini-1.5-pro model

model = genai.GenerativeModel(model_name = "gemini-1.5-pro", generation_config = MODEL_CONFIG)

## For PNG

In [62]:
#Function to read the image as input and store it in bytes

from pathlib import Path

def image_format(image_path):
    img = Path(image_path)

    if not img.exists():
        raise FileNotFoundError(f"Could not find image: {img}")

    image_parts = [
        {
            "mime_type": "image/png", #PNG - image/png. JPEG - image/jpeg. WEBP - image/webp
            "data": img.read_bytes()  #Image is stored in bytes
        }
    ]
    return image_parts

In [63]:
#Function to provide output

def gemini_output(image_path, system_prompt):

    image_info = image_format(image_path)
    input_prompt= [system_prompt, image_info[0]]
    response = model.generate_content(input_prompt)
    return response.text

In [64]:
system_prompt = """
               Input images in the form of receipts will be provided,
               and your task is to just print the customer details (with their name, address, phone number and email as seperate sub tags), products, the tax paid and total amount in JSON format with necessary tags.
               """

image_path = r"C:\Users\SANTHOSH\Downloads\Sample Invoice image.png"


output = gemini_output(image_path, system_prompt)

In [65]:
#Printing it as JSON format for ease of further usablility in any other work and for the ease of usability

from IPython.display import Markdown
Markdown(output)

```json
{"customer_details": {"name": "TEST", "address": "Hyderabad, TELANGANA, 500089", "phone number": "9108239284", "email": "test@gmail.com"}, "products": [{"item": "WASTE AND SCRAP OF STAINLESS STEEL", "HSN": "72042190", "Rate": "95.00", "Quantity": "6,790 KGS", "Total Amount": "6,45,050.00"}], "tax paid": "1,16,109.00", "total amount": "7,68,771.00"}
```

## For PDF

In [66]:
from pathlib import Path

def pdf_format(pdf_path):
    pdf = Path(pdf_path)

    if not pdf.exists():
        raise FileNotFoundError(f"Could not find PDF: {pdf}")

    pdf_parts = [
        {
            "mime_type": "application/pdf",  # PDF files use the MIME type 'application/pdf'
            "data": pdf.read_bytes()  # The PDF is stored in bytes
        }
    ]
    return pdf_parts

In [67]:
#Function to provide output

def gemini_output(pdf_path, system_prompt):

    pdf_info = pdf_format(pdf_path)
    input_prompt= [system_prompt, pdf_info[0]]
    response = model.generate_content(input_prompt)
    return response.text

In [68]:
system_prompt = """
               Input pdf in the form of receipts will be provided,
               and your task is to just print the customer details (with their name, address, phone number and email as seperate sub tags), products, the tax paid and total amount in JSON format with necessary tags.
               """

pdf_path = r"C:\Users\SANTHOSH\Downloads\Sample Invoice.pdf"


output = gemini_output(pdf_path, system_prompt)

In [69]:
#Printing it as JSON format for ease of further usablility in any other work and for the ease of usability

from IPython.display import Markdown
Markdown(output)

```json
{
 "customer_details": {
  "name": "TEST",
  "address": "Hyderabad, TELANGANA, 500089",
  "phone_number": "9108239284",
  "email": "test@gmail.com"
 },
 "products": [
  {
   "description": "WASTE AND SCRAP OF STAINLESS STEEL",
   "hsn_sac": "72042190",
   "rate": 95.00,
   "quantity": "6,790 KGS",
   "amount": 645050.00,
   "igst": "1,16,109.00",
   "total_amount": 761159.00
  }
 ],
 "tax_paid": 116109.00,
 "total_amount": 768771.00
}
```

In [70]:
pdf_path = r"C:\Users\SANTHOSH\Downloads\c669abb4-f485-4880-8973-cc7fdfeee22e.pdf"


output = gemini_output(pdf_path, system_prompt)

In [71]:
from IPython.display import Markdown
Markdown(output)

```json
{"customer_details": {"name": "TEST", "address": "Test\nHyderabad, TELANGANA, 500089", "phone_number": "9108239284", "email": "test@gmail.com"}, "products": [{"IMEI_NO": "1", "Item": "WASTE AND SCRAP OF STAINLESS STEEL", "HSN": "72042190", "Rate": "95.00", "Quantity": "6,790 KGS", "Total Amount": "6,45,050.00"}], "tax_paid": "1,16,109.00", "total_amount": "7,68,771.00"}
```