# Import libraries and environment keys

In [1]:
from dotenv import load_dotenv
load_dotenv()

True

# Helper functions
Used to load images as base64 strings

In [2]:
import base64
from pathlib import Path
import mimetypes

def load_image_as_base64(path: str) -> tuple[str, str]:
    """Return (base64_data, mime_type) for an image file."""
    image_bytes = Path(path).read_bytes()
    mime = mimetypes.guess_type(path)[0] or "image/png"
    return base64.b64encode(image_bytes).decode("utf-8"), mime

def load_image_from_path(path: str) -> bytes:
    """Return image bytes for an image file."""
    with open(path, "rb") as image_file:
        return image_file.read()


# Native Client Integrations

## Amazon and Anthropic Models




### Amazon Bedrock Models - text only

In [3]:
from gen_ai_hub.proxy.native.amazon.clients import Session

# Model parameters
temperature = 0.6
max_Tokens = 1000
model = "anthropic--claude-4-sonnet"

# Create a session with the model
bedrock = Session().client(model_name=model)
messages = [
    {
        "role": "user",
        "content": [
            {
                "text": "What is the capital of France?"
            }
        ],
    }
]
response = bedrock.converse(
    messages=messages,
    inferenceConfig={"maxTokens": max_Tokens, "temperature": temperature},
)
print(response)


{'ResponseMetadata': {'RequestId': '1f57e1d3-640a-48ec-999b-3890c2efe1f1', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Thu, 08 Jan 2026 14:18:35 GMT', 'content-type': 'application/json', 'content-length': '345', 'x-aicore-request-id': '57d50cce-0b45-99a4-98bb-a5c016e83ccf', 'x-amzn-requestid': '1f57e1d3-640a-48ec-999b-3890c2efe1f1', 'x-upstream-service-time': '1639'}, 'RetryAttempts': 0}, 'output': {'message': {'role': 'assistant', 'content': [{'text': 'The capital of France is Paris.'}]}}, 'stopReason': 'end_turn', 'usage': {'inputTokens': 14, 'outputTokens': 10, 'totalTokens': 24, 'cacheReadInputTokens': 0, 'cacheWriteInputTokens': 0}, 'metrics': {'latencyMs': 1490}}


In [4]:
print(response['output']['message']['content'][0]['text'])

The capital of France is Paris.


### Amazon Bedrock Models - text and images

In [5]:
from gen_ai_hub.proxy.native.amazon.clients import Session

# Model parameters
temperature = 0.6
max_Tokens = 1000
model = "anthropic--claude-4-sonnet"

# Create a session with the model
bedrock = Session().client(model_name=model)

# Load image and convert to base64
image_path = "SAP_logo.png"
fmt = "png"  # Format of the image
image_data = load_image_from_path(image_path)


messages = [
    {
        "role": "user",
        "content": [
            {
                "text": "What is the content of the image?"
            },
            {
                "image": {
                    "format": fmt, "source":{"bytes": image_data}
                }
            }
        ]
    }
]

response = bedrock.converse(
    messages=messages,
    inferenceConfig={"maxTokens": max_Tokens, "temperature": temperature},
)
print(response)

{'ResponseMetadata': {'RequestId': 'c2a60192-ef65-4d0f-926e-96987031644c', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Thu, 08 Jan 2026 14:18:39 GMT', 'content-type': 'application/json', 'content-length': '752', 'x-aicore-request-id': '5cd315ec-7135-9e17-bc02-34ba812db30d', 'x-amzn-requestid': 'c2a60192-ef65-4d0f-926e-96987031644c', 'x-upstream-service-time': '3925'}, 'RetryAttempts': 0}, 'output': {'message': {'role': 'assistant', 'content': [{'text': 'The image shows the SAP logo on a grid background. The logo consists of the letters "SAP" in large white text displayed on a blue geometric shape that resembles a parallelogram or angular banner. The background appears to be graph paper or a technical drawing grid with fine lines. This is the recognizable corporate logo of SAP SE, the German multinational software corporation known for enterprise software and business solutions.'}]}}, 'stopReason': 'end_turn', 'usage': {'inputTokens': 1194, 'outputTokens': 88, 'totalTokens': 1282, '

In [6]:
print(response['output']['message']['content'][0]['text'])

The image shows the SAP logo on a grid background. The logo consists of the letters "SAP" in large white text displayed on a blue geometric shape that resembles a parallelogram or angular banner. The background appears to be graph paper or a technical drawing grid with fine lines. This is the recognizable corporate logo of SAP SE, the German multinational software corporation known for enterprise software and business solutions.


## OpenAI Models



### OpenAI Models - text only

In [7]:
from gen_ai_hub.proxy.native.openai import chat

# Model parameters
temperature = 0.6
max_Tokens = 1000
model = "gpt-4o"  # Also compatible with Meta models like meta-llama3.1-70b-instruct
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What is the capital of France?"}
]

# Create a session with the model
response = chat.completions.create(messages=messages, model=model, temperature=temperature, max_tokens=max_Tokens)
print(response)


ChatCompletion(id='chatcmpl-Cvl8SEl6DKcBkNyKHd7GGl3ovlTbC', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The capital of France is Paris.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None), content_filter_results={'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}})], created=1767881920, model='gpt-4o-2024-05-13', object='chat.completion', service_tier=None, system_fingerprint='fp_ee1d74bde0', usage=CompletionUsage(completion_tokens=8, prompt_tokens=24, total_tokens=32, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)), prompt_filter_results=[{'content_filter_res

In [8]:
print(response.choices[0].message.content)
usage = getattr(response, "usage", None)
if usage:
    print(f"Prompt tokens: {usage.prompt_tokens}")
    print(f"Completion tokens: {usage.completion_tokens}")
    print(f"Total tokens: {usage.total_tokens}")

The capital of France is Paris.
Prompt tokens: 24
Completion tokens: 8
Total tokens: 32


### OpenAI Models - text and images

In [9]:
model = "gpt-4o"
image_path = "SAP_logo.png"
base64_data, mime_type = load_image_as_base64(image_path)
messages = [
    {
        "role": "system",
        "content": "You are a helpful assistant that can answer questions and help with tasks."
    },
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "What is the content of the image?"
            },
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:{mime_type};base64,{base64_data}"
                }
            }
        ]
    }
]


response = chat.completions.create(
    messages=messages,
    model=model,
    temperature=temperature,
)

print(response.choices[0].message.content)
usage = getattr(response, "usage", None)
if usage:
    print(f"Prompt tokens: {usage.prompt_tokens}")
    

The image contains the SAP logo. The logo features the letters "SAP" in white, set against a blue background that forms a triangular shape.
Prompt tokens: 1138


### OpenAI Models - Reasoning models
For reasoning tasks, we can define the "thinking budget"

In [10]:
# Model parameters
model = "gpt-5" 
messages = [
    {"role": "user", "content": "What is Green theorem?"}
]

# Create a session with the model
import time

# List of reasoning efforts
reasoning_efforts = ["minimal", "low", "medium", "high"]
responses = {}

# Time the response for each reasoning effort
for effort in reasoning_efforts:
    start_time = time.time()
    response = chat.completions.create(messages=messages, model=model, reasoning_effort=effort)  # OpenAI reasoning models do not allow temperature or max_tokens settings
    end_time = time.time()
    elapsed = end_time - start_time
    responses[effort] = {
        "response": response,
        "time_seconds": elapsed
    }
    print(f"Reasoning effort: {effort}, Time taken: {elapsed:.3f} seconds")
print(response.choices[0].message.content[:300] + '...')


Reasoning effort: minimal, Time taken: 2.674 seconds
Reasoning effort: low, Time taken: 10.278 seconds
Reasoning effort: medium, Time taken: 18.988 seconds
Reasoning effort: high, Time taken: 27.993 seconds
Greenâ€™s theorem is a fundamental result in plane vector calculus that relates a line integral around a closed curve to a double integral over the region it encloses.

Statement (circulation form):
If C is a positively oriented (counterclockwise), simple, closed, piecewise-smooth curve bounding a reg...


# Google Vertex AI Models

### Google Vertex AI Models - text only

In [11]:
from gen_ai_hub.proxy.native.google_vertexai.clients import GenerativeModel

model_name = "gemini-2.5-flash"

chat = GenerativeModel(model_name)
prompt = "What is the capital of France?"
generation_config = {"temperature": 0.}

response = chat.generate_content(contents=prompt, generation_config=generation_config)
print(response.text)




  from google.cloud.aiplatform.utils import gcs_utils


The capital of France is **Paris**.


### Google Vertex AI Models - text and images

In [12]:
from gen_ai_hub.proxy.native.google_vertexai.clients import GenerativeModel

model_name = "gemini-2.5-flash"
image_path = "SAP_logo.png"
base64_data, mime_type = load_image_as_base64(image_path)

chat = GenerativeModel(model_name)
contents = [
    {
        "role": "user",
        "parts": [
            {"text": "What is the content of the image?"},
            {"inline_data": {"mime_type": mime_type, "data": base64_data}}
        ] 
    }
]
generation_config = {"temperature": 0.}

response = chat.generate_content(contents=contents, generation_config=generation_config)
print(response.text)



The image displays the **SAP logo** prominently against a grid background.

Here's a breakdown of the content:

1.  **SAP Logo:** The word "SAP" is written in large, bold, white, sans-serif capital letters.
2.  **Blue Gradient Background:** The "SAP" text is set against a blue gradient shape. This shape transitions from a lighter, brighter blue on the left to a darker blue on the right. The shape is rectangular on the left and bottom, but features a sharp diagonal cut on its right side, giving it a dynamic, almost arrow-like or flag-like appearance.
3.  **Grid Background:** The entire logo is placed on a light gray background with a fine white grid pattern, resembling graph paper or a technical design layout.
4.  **Ruler Markings:** Along the top and bottom edges of the image, there are subtle ruler-like markings, further emphasizing a design or measurement context.

In essence, it's a clean, professional depiction of the SAP logo, presented as if on a design blueprint or grid.


## Google Vertex AI Models - Multi-modal models
Gemini models can use as input not only text and images, but also audio and video, together with text.

In [13]:
from gen_ai_hub.proxy.native.google_vertexai.clients import GenerativeModel
import base64
model_name = "gemini-2.5-flash"

# Load the media file
media_file = open("output.mp4", "rb")
encoded_media = base64.b64encode(media_file.read()).decode("utf-8")

# Detect the MIME type of the media file
def get_mime_type(file_path: str) -> str:
    """Determine MIME type based on file extension."""
    extension = file_path.lower().split('.')[-1]
    
    mime_types = {
        'mp4': 'video/mp4',
        'avi': 'video/avi', 
        'mov': 'video/mov',
        'webm': 'video/webm',
        'mp3': 'audio/mpeg',
        'wav': 'audio/wav',
        'flac': 'audio/flac',
        'm4a': 'audio/mp4',
        'ogg': 'audio/ogg'
    }
    
    return mime_types.get(extension, f'application/{extension}')

mime_type = get_mime_type("output.mp4")


chat = GenerativeModel(model_name)
contents = [
    {
        "role": "user",
        "parts": [
            {"text": "1. Are there any safety violations in the video? 2. Are the railings visible on the stairs? If not, is it dangerous? 3. What safety measures should be taken based on what I saw? 4. List all safety violation and seconds"},
            {"inline_data": {"mime_type": mime_type, "data": encoded_media}}
        ] 
    }
]
generation_config = {"temperature": 0.}

response = chat.generate_content(contents=contents, generation_config=generation_config)
print(response.text)

Based on the video provided, here's an analysis of the safety aspects:

1.  **Are there any safety violations in the video?**
    Yes, there are several significant safety violations visible in the video.

2.  **Are the railings visible on the stairs? If not, is it dangerous?**
    No, the stairs leading up to the elevated platform where the drilling rods are stacked **do not have visible handrails or guardrails**. This is **extremely dangerous**. Without railings, there is a high risk of falls, especially when carrying equipment, in wet or slippery conditions, or if a worker loses balance.

3.  **What safety measures should be taken based on what I saw?**
    Based on the observations, the following safety measures should be implemented:
    *   **Install Handrails and Guardrails:** All stairs, elevated platforms, and open-sided floors or platforms where there is a fall hazard should be equipped with standard handrails and guardrails.
    *   **Fall Protection:** For any work at heigh