# Azure Document Intelligence Docker ReadAPI & phi3

<img src="container.png" width=200>

https://learn.microsoft.com/en-us/azure/ai-services/document-intelligence/containers/install-run?view=doc-intel-3.0.0&tabs=read

## 1. Librairies

In [1]:
#%pip install azure-ai-formrecognizer

In [1]:
from doc_intelligences_container import *
start_docker_compose("docker-compose.yml")
endpoint=get_endpoint_from_docker_compose("docker-compose.yml")

In [None]:
import sys
import os

# Afficher le répertoire de travail courant (pour vérification)
print(f"Répertoire de travail courant : {os.getcwd()}")

# Importer directement le script (puisqu'ils sont dans le même répertoire)
try:
    from doc_intelligences_container import *
    print("Module doc_intelligences_container importé avec succès.")
except ImportError as e:
    print(f"Erreur d'importation : {e}")
    print("Vérifiez que doc_intelligences_container.py est dans le même répertoire que le notebook.")
    print(f"Contenu de sys.path : {sys.path}")  # Afficher sys.path pour le débogage
    sys.exit()


In [None]:
# ... le reste de votre code (utilisant les fonctions de doc_intelligences_container)
yml_path = "docker-compose.yml"
azure_env_path = "azure.env"
document_file = "../document/Phi4-TechReport.pdf"

# Utilisation du module importé avec gestion des erreurs
try:
    result_start_docker = start_docker_compose(yml_path)
    print(result_start_docker)

    endpoint = get_endpoint_from_docker_compose(yml_path)
    key = get_key(azure_env_path)

    if endpoint.startswith("http://"):
        azure_document_intelligence_client = get_document_intelligence_client(endpoint, key)

        start = time.time()
        with open(document_file, "rb") as file:
            poller = azure_document_intelligence_client.begin_analyze_document(
                "prebuilt-read",
                file.read())
            result = poller.result()
        elapsed = time.time() - start
        print(f"Done in {time.strftime('%H:%M:%S.' + str(elapsed % 1)[2:15], time.gmtime(elapsed))}")
        get_results(result)
    else:
        print(endpoint)  # Affiche le message d'erreur
except AttributeError as e:
    print(f"Erreur d'attribut : {e}. Vérifiez que la fonction existe dans le module importé.")
except FileNotFoundError as e:
    print(f"Erreur de fichier non trouvé : {e}. Vérifiez le chemin du fichier.")
except Exception as e:
    print(f"Une erreur inattendue s'est produite : {e}")

In [29]:
import datetime
import os
#import openai
import sys
import platform
import time

from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.core.credentials import AzureKeyCredential
from dotenv import load_dotenv
from PIL import Image
import subprocess
from pathlib import Path
import yaml


In [None]:
def start_docker_compose(yml_path: str):
    """
    Starts containers defined in a docker-compose.yml file.

    Args:
        yml_path (str): Path to the docker-compose.yml file.

    Returns:
        str: Success or error message.
    """
    try:
        # Check if the provided path exists and is a valid file
        yml_file = Path(yml_path)
        if not yml_file.is_file():
            return f"Error: The specified file '{yml_path}' does not exist or is not a valid file."

        # Execute the docker-compose up -d command
        subprocess.run(
            ["docker-compose", "-f", str(yml_file), "up", "-d"],
            check=True
        )
        return f"Containers started successfully using the file '{yml_path}'."
    except subprocess.CalledProcessError as e:
        return f"Error while starting containers: {e}"
    except Exception as e:
        return f"An unexpected error occurred: {e}"


In [None]:
result = start_docker_compose(r"docker-compose.yml")
print(result)


In [None]:
import azure.ai.documentintelligence
print("Azure Document Intelligence version: ", azure.ai.documentintelligence.__version__)

In [None]:
sys.version

In [None]:
print(f"Today is {datetime.datetime.today().strftime('%d-%b-%Y %H:%M:%S')}")

In [None]:
def get_system_info():
    """
    Get system information
    """
    system_info = {
        "System": platform.system(),
        "Machine": platform.machine(),
        "Processor": platform.processor(),
    }
    return system_info


info = get_system_info()
for key, value in info.items():
    print(f"{key}: {value}")

## 2. Settings for Azure Document Intelligence in a connected container

In [34]:
def get_endpoint_from_docker_compose(yml_path: str) -> str:
    """
    Extracts the port from a docker-compose.yml file and constructs the endpoint URL.

    Args:
        yml_path (str): Path to the docker-compose.yml file.

    Returns:
        str: The constructed endpoint (e.g., "http://localhost:5000").
    """
    try:
        # Check if the provided path exists and is a valid file
        yml_file = Path(yml_path)
        if not yml_file.is_file():
            return f"Error: The specified file '{yml_path}' does not exist or is not a valid file."

        # Load the docker-compose.yml file
        with open(yml_file, 'r') as file:
            compose_data = yaml.safe_load(file)

        # Extract the port mapping from the YAML content
        services = compose_data.get('services', {})
        for service_name, service_config in services.items():
            ports = service_config.get('ports', [])
            for port_mapping in ports:
                # Split the port mapping (e.g., "5000:5000")
                if isinstance(port_mapping, str) and ':' in port_mapping:
                    host_port, container_port = port_mapping.split(':')
                    if container_port == '5000':  # Look for the container port 5000
                        return f"http://localhost:{host_port}"

        return "Error: No service exposing port 5000 found in the docker-compose.yml file."

    except yaml.YAMLError as e:
        return f"Error parsing YAML file: {e}"
    except Exception as e:
        return f"An unexpected error occurred: {e}"


In [None]:
yml_path = r"docker-compose.yml"
endpoint = get_endpoint_from_docker_compose(yml_path)
print(endpoint)


#endpoint = "http://localhost:5000"

In [None]:
import os
os.getcwd()

In [None]:
def get_key(azure_env_path: str) -> str:
    load_dotenv(azure_env_path)
    key = os.getenv("azure_doc_intelligence_key")
    return key

key = get_key("azure.env")

http://localhost:5000

http://localhost:5000/api-docs/index.html

In [None]:
def get_document_intelligence_client(endpoint, key):
    azure_document_intelligence_client = DocumentAnalysisClient(endpoint=endpoint, credential=AzureKeyCredential(key))
    return azure_document_intelligence_client

azure_document_intelligence_client = get_document_intelligence_client(endpoint, key)

## 3. Functions

In [26]:
def get_words(page, line):
    """
    Extracts words from a given page that fall within the specified line spans.

    Args:
        page (Page): The page object containing words to be extracted.
        line (Line): The line object containing spans that define the boundaries for word extraction.

    Returns:
        list: A list of words that are within the specified line spans.
    """
    result = []

    for word in page.words:
        if _in_span(word, line.spans):
            result.append(word)

    return result

In [27]:
def count_words_in_file(file_path):
    """
    Counts the number of words in a text file.

    Args:
        file_path (str): The path to the text file.

    Returns:
        int: The number of words in the file.
    """
    with open(file_path, 'r') as file:
        text = file.read()
        words = text.split()
        return len(words)

In [28]:
def _in_span(word, spans):
    """
    Checks if a word falls within any of the specified spans.

    Args:
        word (Word): The word object to be checked.
        spans (list): A list of span objects that define the boundaries for the word.

    Returns:
        bool: True if the word is within any of the spans, False otherwise.
    """
    for span in spans:
        if word.span.offset >= span.offset and (
                word.span.offset + word.span.length) <= (span.offset +
                                                         span.length):
            return True

    return False

In [16]:
def get_results(result):
    """
    Prints the Azure Document Intelligence results.

    This function prints detailed information about the document's layout, such as the presence of handwritten content, 
    the dimensions of each page, the lines and words within each page, selection marks, barcodes, and tables.

    Input: result
    Returns: None
    """
    print("Document Layout Analysis:")
    print("\033[1;31;34m")

    if result.styles and any([style.is_handwritten
                              for style in result.styles]):
        print("Document contains handwritten content\n")
    else:
        print("Document does not contain handwritten content\n")

    for page in result.pages:
        print(f"*** Analyzing layout from page #{page.page_number} ***")
        print(
            f"Page has width: {page.width} and height: {page.height}, measured with unit: {page.unit}"
        )
        print()

        if page.lines:
            for line_idx, line in enumerate(page.lines):
                words = get_words(page, line)
                print(
                    f"\n- Line # {line_idx} has word count {len(words)} and text '{line.content}' "
                    f"within bounding polygon '{line.polygon}'")

                for word in words:
                    print(
                        f"\tWord '{word.content}' has a confidence of {word.confidence}"
                    )

        if page.selection_marks:
            for selection_mark in page.selection_marks:
                print(
                    f"Selection mark is '{selection_mark.state}' within bounding polygon "
                    f"'{selection_mark.polygon}' and has a confidence of {selection_mark.confidence}"
                )

        if page.barcodes:
            print(f"Detected {len(page.barcodes)} barcodes:")
            for barcode_idx, barcode in enumerate(page.barcodes):
                print(f"Barcode #{barcode_idx}: {barcode.value}")
                print(f"\tKind: {barcode.kind}")
                print(f"\tConfidence: {barcode.confidence}")
                print(f"\tBounding regions: {barcode.polygon}")

    if result.tables:
        for table_idx, table in enumerate(result.tables):
            print(f"Table # {table_idx} has {table.row_count} rows and "
                  f"{table.column_count} columns")
            if table.bounding_regions:
                for region in table.bounding_regions:
                    print(
                        f"Table # {table_idx} location on page: {region.page_number} is {region.polygon}"
                    )
            for cell in table.cells:
                print(
                    f"Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'"
                )
            if cell.bounding_regions:
                for region in cell.bounding_regions:
                    print(
                        f"content on page {region.page_number} is within bounding polygon '{region.polygon}'"
                    )

In [17]:
def azure_document_intelligence_costs(pages: int) -> float:
    """
    Calculate Azure Document Intelligence costs.
    The cost for Prebuilt Models (Layout) is $10 per 1,000 pages.

    Output in USD.
    """
    cost = 10 * (pages / 1_000)

    return cost

## 4. Tests

### Test 1

In [None]:
import os
document_file = "../document/Phi4-TechReport.pdf"


os.path.exists(document_file)

In [None]:
start = time.time()

with open(document_file, "rb") as file:
    poller = azure_document_intelligence_client.begin_analyze_document(
        "prebuilt-read",
        file.read())
    result = poller.result()

elapsed = time.time() - start
print(
    f"Done in {time.strftime('%H:%M:%S.' + str(elapsed % 1)[2:15], time.gmtime(elapsed))}"
)

In [37]:
def get_paragraphs(result):
    paragraphs = []
    for idx, paragraph in enumerate(result.paragraphs):
        item = {
            "id": "/paragraphs/" + str(idx),
            "content": paragraph.content if paragraph.content else "",
            "role": paragraph.role if paragraph.role else "",
            "polygon": paragraph.get("boundingRegions")[0]["polygon"],
            "pageNumber": paragraph.get("boundingRegions")[0]["pageNumber"]
        }
        paragraphs.append(item)
    return paragraphs

In [38]:
def get_tables(result):
    tables = []
    for table_idx, table in enumerate(result.tables):
        cells = []
        for cell in table.cells: 
            cells.append( {
                "row_index": cell.row_index,
                "column_index": cell.column_index,
                "content": cell.content,
            })
        tab = {
                "row_count": table.row_count,
                "column_count": table.column_count,
                "cells": cells
        }
        tables.append(tab)
        return tables

In [None]:
print(result)

In [None]:
get_results(result)

### Exporting results to a .txt file

In [20]:
ocr_file = "../document/ocr.txt"

In [21]:
with open(ocr_file, "w", encoding="utf-8") as file:
    for page in result.pages:
        for line in page.lines:
            file.write(f"{line.content}\n")

In [None]:
word_count = count_words_in_file(ocr_file)
print(f"The number of words in the file {ocr_file} is = {word_count}")

In [None]:
# Reading file results
with open(ocr_file, 'r', encoding="utf-8") as f:
    print(f.read())

In [None]:
print("\033[1;31;32m")
for page in result.pages:
    for line in page.lines:
        print(f"{line.content}")

### Test 2

In [26]:
document_file = "documents/letter.png"

In [None]:
img = Image.open(document_file)
img

In [None]:
start = time.time()

model = "prebuilt-read"

with open(document_file, "rb") as file:
    poller = azure_document_intelligence_client.begin_analyze_document("prebuilt-read", file.read())
    result = poller.result()

elapsed = time.time() - start
print(f"Done in {time.strftime('%H:%M:%S.' + str(elapsed % 1)[2:15], time.gmtime(elapsed))}")

In [None]:
get_results(result)

In [None]:
print("\033[1;31;32m")
for page in result.pages:
    for line in page.lines:
        print(f"{line.content}")

## 5. GenAI using Phi-3

In [None]:
!ollama

In [None]:
!ollama --version

In [None]:
!ollama list

In [None]:
!ollama show phi3:medium

### With ollama client

In [None]:
ocr_file = "documents/ocr.txt"

with open(ocr_file, 'r') as file:
    ocr_text = file.read()

word_count = count_words_in_file(ocr_file)
print(f"The number of words in the file {ocr_file} is: {word_count}")

In [36]:
ollama_client = openai.OpenAI(
    base_url="http://localhost:11434/v1",
    api_key="nokeyneeded",
)

In [37]:
def phi3_ollama_model(prompt, temperature=0.7, model="phi3"):
    """
    Generates a response from the Phi-3 model using the Ollama API.

    Args:
        prompt (str): The input prompt to generate a response for.
        temperature (float, optional): The sampling temperature to use.
        Defaults to 0.7.

    Returns:
        str: The generated response from the model.
    """
    try:
        response = ollama_client.chat.completions.create(
            model=model,
            temperature=temperature,
            n=1,
            messages=[{
                "role": "system",
                "content": "You are an AI helpful assistant."
            }, {
                "role": "user",
                "content": prompt,
            }],
        )
        return response.choices[0].message.content

    except Exception as e:
        print(f"[ERROR] An error occurred: {e}")
        return None

### Tests

In [None]:
prompt = f"Can you summarize this text: {ocr_text}"

answer = phi3_ollama_model(prompt, model="phi3:medium")
print("\033[1;31;34m")
print(answer)

In [None]:
prompt = f"Generate a description in one line for this text: {ocr_text}"

answer = phi3_ollama_model(prompt, model="phi3")
print("\033[1;31;34m")
print(answer)

In [None]:
prompt = f"Generate a description in one line for this text with some keywords and emojis: {ocr_text}"

answer = phi3_ollama_model(prompt, model="phi3")
print("\033[1;31;34m")
print(answer)

In [None]:
prompt = f"What are the companies mentionned in this text: {ocr_text}"

answer = phi3_ollama_model(prompt, model="phi3")
print("\033[1;31;34m")
print(answer)

### Using ollama lib

In [52]:
import ollama

In [53]:
def phi3_ollama_lib_model(prompt, model="phi3"):
    """
    Sends a prompt to the 'phi3' model using the Ollama chat API and returns the response content.

    Args:
        prompt (str): The input prompt to be sent to the 'phi3' model.

    Returns:
        str: The content of the response message from the 'phi3' model.
    """
    try:
        response = ollama.chat(
            model,
            messages=[{
                'role': 'user',
                'content': prompt,
            }],
        )
        return response.message.content

    except Exception as e:
        print(f"'[Error] An error occurred: {e}")
        return None

In [None]:
prompt = f"Generate a one line description for this text: {ocr_text}"

answer = phi3_ollama_lib_model(prompt, model="phi3:medium")
print(answer)

In [None]:
prompt = f"Generate some keywords to describe this text: {ocr_text}"

answer = phi3_ollama_lib_model(prompt)
print("\033[1;31;34m")
print(answer)

In [None]:
prompt = f"Classify this text into one of these categories: [ART], [TRAVEL], [FINANCE], [IT], [SPORTS] {ocr_text}"

answer = phi3_ollama_lib_model(prompt)
print("\033[1;31;34m")
print(answer)

In [None]:
prompt = f"What are the urls mentionned in this text: {ocr_text}"

answer = phi3_ollama_lib_model(prompt)
print("\033[1;31;34m")
print(answer)