Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/.funcignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.git*
.vscode
__azurite_db*__.json
__blobstorage__
__queuestorage__
local.settings.json
test
.venv
135 changes: 135 additions & 0 deletions src/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don’t work, or not
# install all needed dependencies.
#Pipfile.lock

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Azure Functions artifacts
bin
obj
appsettings.json
local.settings.json

# Azurite artifacts
__blobstorage__
__queuestorage__
__azurite_db*__.json
.python_packages
6 changes: 6 additions & 0 deletions src/.vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"recommendations": [
"ms-azuretools.vscode-azurefunctions",
"ms-python.python"
]
}
15 changes: 15 additions & 0 deletions src/.vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Attach to Python Functions",
"type": "debugpy",
"request": "attach",
"connect": {
"host": "localhost",
"port": 9091
},
"preLaunchTask": "func: host start"
}
]
}
9 changes: 9 additions & 0 deletions src/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"azureFunctions.deploySubpath": ".",
"azureFunctions.scmDoBuildDuringDeployment": true,
"azureFunctions.pythonVenv": ".venv",
"azureFunctions.projectLanguage": "Python",
"azureFunctions.projectRuntime": "~4",
"debug.internalConsoleOptions": "neverOpen",
"azureFunctions.projectLanguageModel": 2
}
27 changes: 27 additions & 0 deletions src/.vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "func",
"label": "func: host start",
"command": "host start",
"problemMatcher": "$func-python-watch",
"isBackground": true,
"dependsOn": "pip install (functions)"
},
{
"label": "pip install (functions)",
"type": "shell",
"osx": {
"command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt"
},
"windows": {
"command": "${config:azureFunctions.pythonVenv}\\Scripts\\python -m pip install -r requirements.txt"
},
"linux": {
"command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt"
},
"problemMatcher": []
}
]
}
125 changes: 125 additions & 0 deletions src/function_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import azure.functions as func
import logging
import json
import os
import uuid
import io
from pdfminer.high_level import extract_text
from azure.cosmos import CosmosClient, PartitionKey

app = func.FunctionApp(http_auth_level=func.AuthLevel.FUNCTION)

def read_pdf_content(myblob):
# Read the blob content into a BytesIO stream
blob_bytes = myblob.read()
pdf_stream = io.BytesIO(blob_bytes)

# Extract text from the PDF stream
text = extract_text(pdf_stream)
return text

def extract_invoice_data(text):
lines = text.split('\n')
invoice_data = {
"id": generate_id(),
"customer_name": "",
"customer_email": "",
"customer_address": "",
"company_name": "",
"company_phone": "",
"company_address": "",
"rentals": []
}

for i, line in enumerate(lines):
if "BILL TO:" in line:
invoice_data["customer_name"] = lines[i + 1].strip()
invoice_data["customer_email"] = lines[i + 2].strip()
invoice_data["customer_address"] = lines[i + 3].strip()
elif "Company Information:" in line:
invoice_data["company_name"] = lines[i + 1].strip()
invoice_data["company_phone"] = lines[i + 2].strip()
invoice_data["company_address"] = lines[i + 3].strip()
elif "Rental Date" in line:
for j in range(i + 1, len(lines)):
if lines[j].strip() == "":
break
rental_details = lines[j].split()
rental_date = rental_details[0]
title = " ".join(rental_details[1:-3])
description = rental_details[-3]
quantity = rental_details[-2]
total_price = rental_details[-1]
invoice_data["rentals"].append({
"rental_date": rental_date,
"title": title,
"description": description,
"quantity": quantity,
"total_price": total_price
})

logging.info("Successfully extracted invoice data.")
return invoice_data

def save_invoice_data_to_cosmos(invoice_data, blob_name):
try:
endpoint = os.getenv("COSMOS_DB_ENDPOINT")
key = os.getenv("COSMOS_DB_KEY")
client = CosmosClient(endpoint, key)
logging.info("Successfully connected to Cosmos DB.")
except Exception as e:
logging.error(f"Error connecting to Cosmos DB: {e}")
return

database_name = 'ContosoDBAIDemo'
container_name = 'Invoices'

try:
database = client.create_database_if_not_exists(id=database_name)
container = database.create_container_if_not_exists(
id=container_name,
partition_key=PartitionKey(path="/invoice_number"),
offer_throughput=400
)
logging.info("Successfully ensured database and container exist.")
except Exception as e:
logging.error(f"Error creating database or container: {e}")
return

try:
response = container.upsert_item(invoice_data)
logging.info(f"Saved processed invoice data to Cosmos DB: {response}")
except Exception as e:
logging.error(f"Error inserting item into Cosmos DB: {e}")

def generate_id():
return str(uuid.uuid4())

@app.blob_trigger(arg_name="myblob", path="pdfinvoices/{name}",
connection="contosostorageaidemo_STORAGE")
def BlobTriggerContosoPDFInvoicesRaw(myblob: func.InputStream):
logging.info(f"Python blob trigger function processed blob\n"
f"Name: {myblob.name}\n"
f"Blob Size: {myblob.length} bytes")

try:
text = read_pdf_content(myblob)
logging.info("Successfully read and extracted text from PDF.")
except Exception as e:
logging.error(f"Error reading PDF: {e}")
return

logging.info(f"Extracted text from PDF: {text}")

try:
invoice_data = extract_invoice_data(text)
logging.info(f"Extracted invoice data: {invoice_data}")
except Exception as e:
logging.error(f"Error extracting invoice data: {e}")
return

try:
save_invoice_data_to_cosmos(invoice_data, myblob.name)
logging.info("Successfully saved invoice data to Cosmos DB.")
except Exception as e:
logging.error(f"Error saving invoice data to Cosmos DB: {e}")
15 changes: 15 additions & 0 deletions src/host.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"version": "2.0",
"logging": {
"applicationInsights": {
"samplingSettings": {
"isEnabled": true,
"excludedTypes": "Request"
}
}
},
"extensionBundle": {
"id": "Microsoft.Azure.Functions.ExtensionBundle",
"version": "[4.*, 5.0.0)"
}
}
7 changes: 7 additions & 0 deletions src/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# DO NOT include azure-functions-worker in this file
# The Python Worker is managed by Azure Functions platform
# Manually managing azure-functions-worker may cause unexpected issues

azure-functions
pdfminer.six
azure-cosmos==4.3.0