Import libraries.

In [None]:
import os
from azure.ai.vision.imageanalysis import ImageAnalysisClient
from azure.ai.vision.imageanalysis.models import VisualFeatures
from azure.core.credentials import AzureKeyCredential
from pdf2image import convert_from_path

Helper function for turning pdfs to jpgs.

In [None]:
def pdf_to_jpg(pdf_path, output_path="converted_image.jpg"):
    """
    Convert the first page of a PDF to a JPG image.

    Args:
        pdf_path (str): Path to the input PDF file.
        output_path (str): Path to save the converted JPG image.

    Returns:
        str: Path to the saved JPG image.
    """
    # Convert the first page of the PDF to an image
    images = convert_from_path(pdf_path, first_page=1, last_page=1)
    if images:
        images[0].save(output_path, format="JPEG")
        return output_path
    else:
        raise ValueError("No pages found in the PDF file.")

Set up the values for the endpoint and api key.

In [None]:
try:
    endpoint = ""
    key = ""
except KeyError:
    print("Missing environment variable 'VISION_ENDPOINT' or 'VISION_KEY'")
    print("Set them before running this sample.")

Set up an Image Analysis client.

In [None]:
client = ImageAnalysisClient(
    endpoint=endpoint,
    credential=AzureKeyCredential(key)
)

Initiate the image.

In [None]:
#image_path = "200.jpg"

image_path = pdf_to_jpg("200_tegnforklaring.pdf")

with open(image_path, "rb") as f:
    image_data = f.read()

Define the visual features you want to analyze.

In [None]:
visual_features =[
    VisualFeatures.TAGS,
    VisualFeatures.OBJECTS,
    VisualFeatures.CAPTION,
    VisualFeatures.DENSE_CAPTIONS,
    VisualFeatures.READ,
    VisualFeatures.SMART_CROPS,
    VisualFeatures.PEOPLE,
]

Analyze the image using the analyze method.

In [None]:
result = client.analyze(
    image_data=image_data,
    visual_features=visual_features,
    gender_neutral_caption=True, 
    language="en"
)

Print all the analysis results.

In [None]:
print("Image analysis results:")

if result.caption is not None:
    print(" Caption:")
    print(f"   '{result.caption.text}', Confidence {result.caption.confidence:.4f}")

if result.dense_captions is not None:
    print(" Dense Captions:")
    for caption in result.dense_captions.list:
        print(f"   '{caption.text}', {caption.bounding_box}, Confidence: {caption.confidence:.4f}")

if result.read is not None:
    print(" Read:")
    for line in result.read.blocks[0].lines:
        print(f"   Line: '{line.text}', Bounding box {line.bounding_polygon}")
        for word in line.words:
            print(f"     Word: '{word.text}', Bounding polygon {word.bounding_polygon}, Confidence {word.confidence:.4f}")

if result.tags is not None:
    print(" Tags:")
    for tag in result.tags.list:
        print(f"   '{tag.name}', Confidence {tag.confidence:.4f}")

if result.objects is not None:
    print(" Objects:")
    for object in result.objects.list:
        print(f"   '{object.tags[0].name}', {object.bounding_box}, Confidence: {object.tags[0].confidence:.4f}")

if result.people is not None:
    print(" People:")
    for person in result.people.list:
        print(f"   {person.bounding_box}, Confidence {person.confidence:.4f}")

if result.smart_crops is not None:
    print(" Smart Cropping:")
    for smart_crop in result.smart_crops.list:
        print(f"   Aspect ratio {smart_crop.aspect_ratio}: Smart crop {smart_crop.bounding_box}")

print(f" Image height: {result.metadata.height}")
print(f" Image width: {result.metadata.width}")
print(f" Model version: {result.model_version}")