In [1]:
if __name__ == "__main__":
    import os
    os.chdir("..")

This if statement is only executed if the file is run directly, and not when it is imported as a module.
its function is to change the working directory to the parent directory of the current file.

# Import libraries

In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
from langchain.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
import base64
from utils.img64 import read_image_base64

1. Langchain_google_genai allows you to use google genai models with langchain.
2. dotenv allows you to use environment variables.
3. langchain.prompts allows you to use prompts to fine-tune your LLM models.
4. utils.img64 allows you to convert images to base64 because genai models only accept base64 images.

# Loading our enviroment vairiables (API Keys)

In [3]:
load_dotenv()

True

# Identifying our LLM + Adjusting it's settings for our needs

In [4]:
llm = ChatGoogleGenerativeAI(
    model="models/gemini-1.5-pro-latest",
    temperature=0
)


# Creating the InvoiceData classes + Identifying our output parser

Pydantic is used here as it is a validation library, to ensure the correct format and input of our data. It lets us define data models using Python classes, where each field has a type (like str, int, or List). When data is passed in, Pydantic automatically checks if the types match and throws an error if something’s off. This makes your code safer and helps catch bugs early. It's super useful when working with LLMs or APIs because it ensures the model’s output matches what your app expects.

In [5]:
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List, Optional

class InvoiceItem(BaseModel):
    item_name: str = Field(..., description="Name of the item")
    quantity: int = Field(..., description="Quantity of the item")
    taxes: Optional[float] = Field(..., description="Taxes applied to the item")
    total_price: float = Field(..., description="Total price for the item")

class InvoiceData(BaseModel):
    buyer_name: str = Field(..., description="Name of the buyer")
    invoice_date : str = Field(..., description="Date of the invoice")
    items: List[InvoiceItem] = Field(..., description="List of items in the invoice")

parser = PydanticOutputParser(pydantic_object=InvoiceData)

    

# Creating our ground_truth_examples 

Ground truth examples are the correct input-output pairs we manually define. These are super important because they help the model understand what kind of answer we expect. For example, if we feed it an invoice image and the correct extracted data (like total amount, invoice number, and date), the model learns to match that pattern. In few-shot learning, we give the model a few of these examples directly in the prompt so it can imitate the format. This improves accuracy and helps reduce wrong or inconsistent outputs, especially when the input is messy like scanned documents.

## Images example

In [6]:
example = [
    {
        "invoice_img": read_image_base64(r"data\invoices\FACTU2016020050.jpg"),
        "invoice_data": InvoiceData(
            buyer_name="Gemini Furniture",
            invoice_date="2016-02-02",
            items = [
                InvoiceItem(
                    item_name="Bureau personnalisable",
                    quantity=94000,
                    taxes=0.2,
                    total_price=47000
                ),
            ],
        ),
    },
]


## Text examples

In [7]:
text_example = [
    {
        "invoice_text": """
            Invoice Date: 2023-09-15
            Buyer: Alpha Supplies Co.
            
            Items:
            - Item: Desk Chair, Quantity: 3, Unit Price: $120.00, Tax: $21.60, Total: $381.60
            - Item: Standing Desk, Quantity: 2, Unit Price: $450.00, Tax: $72.00, Total: $972.00
        """,
        "invoice_data": InvoiceData(
            buyer_name="Alpha Supplies Co.",
            invoice_date="2023-09-15",
            items=[
                InvoiceItem(
                    item_name="Desk Chair",
                    quantity="3",
                    taxes=21.60,
                    total_price=381.60
                ),
                InvoiceItem(
                    item_name="Standing Desk",
                    quantity="2",
                    taxes=72.00,
                    total_price=972.00
                ),
            ],
        ),
    },
]

# Identifying our Few shot examples

Few-shot learning is a technique in machine learning where a model is trained to perform tasks with very limited amounts of labeled data. Instead of needing large datasets, like traditional machine learning models, few-shot learning allows the model to generalize from just a few examples.

In [8]:
few_shot_examples = ChatPromptTemplate.from_messages([
    {
        "role": "user",
        "content": [
            {
                "type": "image",
                "source_type": "base64",
                "data": read_image_base64(r"data\invoices\FACTU2016020050.jpg"),
                "mime_type": "image/jpeg"
            },
        ],
    },
    ("assistant", "{invoice_data}"),
])

few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=few_shot_examples,
    examples= example
)

if __name__ == "__main__":   
    for example in few_shot_prompt.invoke({}).to_messages():
        example.pretty_print()



[{'type': 'image', 'source_type': 'base64', 'data': '/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAoHBwgHBgoICAgLCgoLDhgQDg0NDh0VFhEYIx8lJCIfIiEmKzcvJik0KSEiMEExNDk7Pj4+JS5ESUM8SDc9Pjv/2wBDAQoLCw4NDhwQEBw7KCIoOzs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozv/wAARCAQAAtQDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD2aiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACi

## Text Examples

In [9]:
few_shot_text_examples = ChatPromptTemplate.from_messages([
    {
        "role": "user",
        "content": "{invoice_text}"
    },
    ("assistant", "{invoice_data}")
])

few_shot_prompt_text = FewShotChatMessagePromptTemplate(
    example_prompt=few_shot_text_examples,
    examples=text_example
)

if __name__ == "__main__":   
    for example in few_shot_prompt_text.invoke({}).to_messages():
        example.pretty_print()



            Invoice Date: 2023-09-15
            Buyer: Alpha Supplies Co.
            
            Items:
            - Item: Desk Chair, Quantity: 3, Unit Price: $120.00, Tax: $21.60, Total: $381.60
            - Item: Standing Desk, Quantity: 2, Unit Price: $450.00, Tax: $72.00, Total: $972.00
        

buyer_name='Alpha Supplies Co.' invoice_date='2023-09-15' items=[InvoiceItem(item_name='Desk Chair', quantity=3, taxes=21.6, total_price=381.6), InvoiceItem(item_name='Standing Desk', quantity=2, taxes=72.0, total_price=972.0)]


# Multimodal Invoice Extraction with Few-Shot Prompting

In [10]:
from langchain_core.messages import HumanMessage, SystemMessage
import json

def process_invoice(image_path=None, invoice_text=None):
    # Prepare the system message with instructions
    messages = [
        SystemMessage(
            content=f"Extract the data from this invoice. Follow this output format: {parser.get_format_instructions()}"
        )
    ]
    
    few_shot_prompt_messages = few_shot_prompt.invoke({}).to_messages()
    for example in few_shot_prompt_messages:
        messages.append(example)
    
    if image_path:
        image_data = read_image_base64(image_path)
        if not image_data:
            raise ValueError("Image could not be read or encoded.")
        
        messages.append(HumanMessage(
            content=[{
                "type": "image",
                "source_type": "base64",
                "data": image_data,
                "mime_type": "image/jpeg",
            }]))  # Add the image data

    if invoice_text:
        messages.append(HumanMessage(content=invoice_text))  

    output = llm.invoke(messages)
    
    return output  


# Cleanly Display Parsed Model Output

In [11]:
def format_invoice_data(output):

    try:
        raw_json_string = output.content.strip("```json\n").strip()
        invoice_data = json.loads(raw_json_string)

        formatted_output = f"""Buyer: {invoice_data['buyer_name']}
Invoice Date: {invoice_data['invoice_date']}

Items:"""

        for item in invoice_data['items']:
            qty = item.get("quantity", 1) or 1 # Handle None values
            tax = item.get("taxes")
            tax_display = f"{tax * 100:.2f}%" if tax is not None else "N/A"
            unit_price = item['total_price'] / qty

            formatted_output += f"""
- Item: {item['item_name']}
  Quantity: {qty}
  Unit Price: ${unit_price:.2f}
  Tax: {tax_display}
  Total Price: ${item['total_price']:.2f}
"""

        return formatted_output

    except Exception as e:
        return f"Error processing output: {str(e)}"


## Testing our output

In [12]:
# Call the function with invoice text (or image)
output = process_invoice(invoice_text="""Invoice Date: 2023-09-15
Buyer: Alpha Supplies Co.
Items:
- Item: Desk Chair, Quantity: 3, Unit Price: $120.00, Tax: 0.216, Total: $381.60
- Item: Standing Desk, Quantity: 2, Unit Price: $450.00, Tax: 0.72, Total: $972.00
""")

# Format the invoice data into human-readable text
formatted_output = format_invoice_data(output)

# Print the formatted output
if __name__ == "__main__":
    print(formatted_output)

Buyer: Alpha Supplies Co.
Invoice Date: 2023-09-15

Items:
- Item: Desk Chair
  Quantity: 3
  Unit Price: $127.20
  Tax: 21.60%
  Total Price: $381.60

- Item: Standing Desk
  Quantity: 2
  Unit Price: $486.00
  Tax: 72.00%
  Total Price: $972.00



In [13]:
# Process the invoice (image or text)
output = process_invoice(image_path=r"data\invoices\FACTU2016020050.jpg") 

# Format the invoice data into human-readable text
formatted_output = format_invoice_data(output)

# Print the formatted output
if __name__ == "__main__":
    print(formatted_output)

Buyer: Gemini Furniture
Invoice Date: 2016-02-02

Items:
- Item: Bureau personnalisable
  Quantity: 94
  Unit Price: $500.00
  Tax: 20.00%
  Total Price: $47000.00



## Taxes Barchart graph generator

In [14]:
import matplotlib.pyplot as plt

def plot_tax_per_item(formatted_output):
    try:
        # Parse the formatted text to extract data
        lines = formatted_output.strip().split('\n')
        items = []
        current_item = {}
        
        for line in lines:
            line = line.strip()
            if line.startswith('- Item:'):
                if current_item:
                    items.append(current_item)
                current_item = {}
                current_item['item_name'] = line.split('Item:')[1].strip()
            elif 'Tax:' in line and '%' in line:
                # Extract tax value (remove % and convert to float)
                tax_value = float(line.split('Tax:')[1].strip().replace('%', ''))
                current_item['taxes'] = tax_value
        
        if current_item:
            items.append(current_item)
        
        # Extract data for plotting
        names = []
        taxes = []

        for item in items:
            name = item.get("item_name", "Unknown")
            tax = item.get("taxes")
            if isinstance(tax, (int, float)):
                names.append(name)
                taxes.append(tax)
            else:
                print(f"Skipping item without valid tax: {name}")

        # Create a figure and axis explicitly
        fig, ax = plt.subplots(figsize=(10, 6))
        ax.bar(names, taxes, color="skyblue")
        ax.set_xlabel("Item")
        ax.set_ylabel("Tax Percentage")
        ax.set_title("Tax per Invoice Item")
        ax.set_xticks(range(len(names)))
        ax.set_xticklabels(names, rotation=45)
        fig.tight_layout()

        return fig  # Return figure to be used with st.pyplot(fig)
    
    except Exception as e:
        raise ValueError(f"Error processing data: {str(e)}\nMake sure the formatted output contains valid item and tax information.")
