In [1]:
from ai_tools import ask_ai
from IPython.display import Markdown

In [None]:
def extract_invoice_data(invoice_contents):
    extraction_prompt = f"""Extract the following information from this invoice:
    - Date
    - Total Amount
    
    Invoice contents:
    {invoice_contents}
    
    Return only the date and total amount separated by dashes
    like this:
    
    2024-04-15 - $18,900.00
    2024-04-18 - $48,727.50 
    2023-12-31 - €2,350.44
    
    Extracted information:
    """
    
    extracted_data = ask_ai(extraction_prompt)
    return extracted_data

example_data = """
TECH SOLUTIONS INC.
789 Innovation Drive
Seattle, WA 98101
Tax ID: 98-7654321

INVOICE

Bill To:                                    Invoice No: INV-2024-0103 
Sarah Johnson                               Date: April 18, 2024
789 Enterprise Road                         Due Date: May 18, 2024
Chicago, IL 60601

Description                     Quantity    Rate        Amount
-----------------------------------------------------------------
AI Model Development              120      $200.00    $24,000.00
Data Processing Services           80      $125.00    $10,000.00
System Integration                 40      $175.00     $7,000.00
Hardware Configuration             1     $3,500.00     $3,500.00
                                                    ------------
                                           Subtotal:  $44,500.00
                                           Tax (9.5%): $4,227.50
                                           Total:     $48,727.50

"""

extract_invoice_data(example_data)

'2024-04-18 - $48,727.50'

Now, let's apply this to all the invoice data we have!

In [None]:
folder_with_invoices = "./assets-resources/fake-invoices/"
invoice_files = ["invoice1.txt", "invoice2.txt", "invoice3.txt"]

invoice_data_list = []
for invoice_file in invoice_files:
    file_path = folder_with_invoices + invoice_file
    with open(file_path, "r") as f:
        invoice_contents = f.read()
    
    extracted_data = extract_invoice_data(invoice_contents)
    date, amount = extracted_data.split(" - ")
    invoice_data = {
        "file": invoice_file,
        "date": date,
        "amount": amount
    }
    
    invoice_data_list.append(invoice_data)

invoice_data_list

[{'file': 'invoice1.txt', 'date': '2023-12-31', 'amount': '€2,350.44'},
 {'file': 'invoice2.txt', 'date': '2024-04-15', 'amount': '$18,900.00'},
 {'file': 'invoice3.txt', 'date': '2024-04-18', 'amount': '$48,727.50'}]

In [None]:
# Display the markdown content in the notebook
markdown_content = "# Invoice Data Summary\n\n"
for invoice in invoice_data_list:
    markdown_content += f"## {invoice['file']}\n\n"
    markdown_content += f"**Date:** {invoice['date']}\n\n"
    markdown_content += f"**Amount:** {invoice['amount']}\n\n"

Markdown(markdown_content)

# Invoice Data Summary

## invoice1.txt

**Date:** 2023-12-31

**Amount:** €2,350.44

## invoice2.txt

**Date:** 2024-04-15

**Amount:** $18,900.00

## invoice3.txt

**Date:** 2024-04-18

**Amount:** $48,727.50



## Practical Example: Document Analysis System

Let's create a system that analyzes text documents and extracts key information:

In [None]:
def analyze_document(filename):
    """
    Analyzes a document and extracts key metrics
    """
    with open(filename, "r") as file:
        content = file.read()
    
    # Basic metrics
    metrics = {
        "filename": filename,
        "total_chars": len(content),
        "total_words": len(content.split()),
        "total_lines": len(content.splitlines()),
        "unique_words": len(set(content.lower().split()))
    }
    
    return metrics

def generate_report(metrics):
    """
    Creates a formatted report from document metrics
    """
    report = f"""Document Analysis Report
========================
Filename: {metrics['filename']}
Character Count: {metrics['total_chars']}
Word Count: {metrics['total_words']}
Line Count: {metrics['total_lines']}
Unique Words: {metrics['unique_words']}
"""
    return report

# Example usage
sample_text = """This is a sample document.
It contains multiple lines of text.
We will analyze this document."""

with open("sample_doc.txt", "w") as file:
    file.write(sample_text)

metrics = analyze_document("sample_doc.txt")
print(generate_report(metrics))

Document Analysis Report
Filename: sample_doc.txt
Character Count: 93
Word Count: 16
Line Count: 3
Unique Words: 14



## Working with Multiple Files

Here's how to process multiple files in a directory:

In [None]:
def batch_process_files(file_list, processor_func):
    """
    Process multiple files using a given processor function
    """
    results = []
    for filename in file_list:
        try:
            with open(filename, "r") as file:
                content = file.read()
                result = processor_func(content)
                results.append({
                    "filename": filename,
                    "result": result
                })
        except FileNotFoundError:
            print(f"Could not find file: {filename}")
    return results

# Example processor function
def count_words(content):
    return len(content.split())

# Example usage
files = ["file1.txt", "file2.txt", "file3.txt"]
word_counts = batch_process_files(files, count_words)

Could not find file: file1.txt
Could not find file: file2.txt
Could not find file: file3.txt


## Creating a Simple Log System

Let's implement a basic logging system:

In [None]:
from datetime import datetime

class SimpleLogger:
    def __init__(self, log_file):
        self.log_file = log_file
    
    def log(self, message, level="INFO"):
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        log_entry = f"[{timestamp}] {level}: {message}\n"
        
        with open(self.log_file, "a") as file:
            file.write(log_entry)
    
    def read_logs(self):
        try:
            with open(self.log_file, "r") as file:
                return file.read()
        except FileNotFoundError:
            return "No logs found"

# Example usage
logger = SimpleLogger("app.log")
logger.log("Application started")
logger.log("Processing data...", "DEBUG")
logger.log("Error in data processing", "ERROR")
print("\nLog contents:")
print(logger.read_logs())


Log contents:
[2025-01-30 11:10:57] INFO: Application started
[2025-01-30 11:10:57] DEBUG: Processing data...
[2025-01-30 11:10:57] ERROR: Error in data processing

