Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 23 additions & 10 deletions sdk/python/agentfield/media_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@

# Fal image size presets
FalImageSize = Literal[
"square_hd", # 1024x1024
"square", # 512x512
"portrait_4_3", # 768x1024
"square_hd", # 1024x1024
"square", # 512x512
"portrait_4_3", # 768x1024
"portrait_16_9", # 576x1024
"landscape_4_3", # 1024x768
"landscape_16_9", # 1024x576
"landscape_16_9", # 1024x576
]


Expand Down Expand Up @@ -191,6 +191,7 @@ def _get_client(self):

if self._api_key:
import os

os.environ["FAL_KEY"] = self._api_key

self._client = fal_client
Expand All @@ -200,9 +201,7 @@ def _get_client(self):
)
return self._client

def _parse_image_size(
self, size: str
) -> Union[str, Dict[str, int]]:
def _parse_image_size(self, size: str) -> Union[str, Dict[str, int]]:
"""
Parse image size into fal format.

Expand All @@ -214,8 +213,12 @@ def _parse_image_size(
"""
# Check if it's a fal preset
fal_presets = {
"square_hd", "square", "portrait_4_3", "portrait_16_9",
"landscape_4_3", "landscape_16_9"
"square_hd",
"square",
"portrait_4_3",
"portrait_16_9",
"landscape_4_3",
"landscape_16_9",
}
if size in fal_presets:
return size
Expand Down Expand Up @@ -346,6 +349,7 @@ async def generate_image(

except Exception as e:
from agentfield.logger import log_error

log_error(f"Fal image generation failed: {e}")
raise

Expand Down Expand Up @@ -434,6 +438,7 @@ async def generate_audio(

except Exception as e:
from agentfield.logger import log_error

log_error(f"Fal audio generation failed: {e}")
raise

Expand Down Expand Up @@ -532,6 +537,7 @@ async def generate_video(

except Exception as e:
from agentfield.logger import log_error

log_error(f"Fal video generation failed: {e}")
raise

Expand Down Expand Up @@ -587,6 +593,7 @@ async def transcribe_audio(

except Exception as e:
from agentfield.logger import log_error

log_error(f"Fal transcription failed: {e}")
raise

Expand Down Expand Up @@ -730,9 +737,14 @@ async def generate_image(
model: Optional[str] = None,
size: str = "1024x1024",
quality: str = "standard",
image_config: Optional[Dict[str, Any]] = None,
**kwargs,
) -> MultimodalResponse:
"""Generate image using OpenRouter's chat completions API."""
"""Generate image using OpenRouter's chat completions API.

Note: image_config is an OpenRouter-specific extension not present
in the base MediaProvider.generate_image() interface.
"""
from agentfield import vision

model = model or "openrouter/google/gemini-2.5-flash-image-preview"
Expand All @@ -748,6 +760,7 @@ async def generate_image(
quality=quality,
style=None,
response_format="url",
image_config=image_config,
**kwargs,
)

Expand Down
18 changes: 11 additions & 7 deletions sdk/python/agentfield/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import asyncio
import os
from typing import Any, Optional
from typing import Any, Dict, Optional
from agentfield.logger import log_error


Expand Down Expand Up @@ -92,6 +92,7 @@ async def generate_image_openrouter(
quality: str,
style: Optional[str],
response_format: str,
image_config: Optional[Dict[str, Any]] = None,
**kwargs,
) -> Any:
"""
Expand All @@ -112,18 +113,17 @@ async def generate_image_openrouter(
quality: Image quality (may not be used by all OpenRouter models)
style: Image style (may not be used by all OpenRouter models)
response_format: Response format (may not be used by all OpenRouter models)
**kwargs: Additional OpenRouter-specific parameters (e.g., image_config)
image_config: Optional dict of OpenRouter image generation settings
(e.g., {"aspect_ratio": "16:9"}). Pass an empty dict to use
provider defaults explicitly.
**kwargs: Additional OpenRouter-specific parameters

Returns:
MultimodalResponse with generated image(s)

Raises:
ImportError: If litellm is not installed
Exception: If image generation fails

Note:
OpenRouter-specific parameters like `image_config` should be passed via kwargs.
Example: image_config={"aspect_ratio": "16:9"}
"""
try:
import litellm
Expand All @@ -143,9 +143,13 @@ async def generate_image_openrouter(
"model": model,
"messages": messages,
"modalities": ["image", "text"],
**kwargs, # Pass through any additional kwargs (e.g., image_config)
**kwargs,
}

# Add image_config if provided
if image_config is not None:
completion_params["image_config"] = image_config

try:
# Use LiteLLM's completion function (OpenRouter uses chat API)
# Wrap with timeout to prevent silent hangs
Expand Down
58 changes: 58 additions & 0 deletions sdk/python/tests/test_image_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import pytest
from unittest.mock import AsyncMock, MagicMock, patch


def _mock_openrouter_response():
"""Create a mock OpenRouter/litellm response."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "test"
mock_response.choices[0].message.images = []
return mock_response


@pytest.mark.asyncio
async def test_openrouter_image_config_passthrough():
"""Test that image_config is properly passed to OpenRouter API."""
mock_acompletion = AsyncMock(return_value=_mock_openrouter_response())

with patch("litellm.acompletion", mock_acompletion):
from agentfield.vision import generate_image_openrouter

await generate_image_openrouter(
prompt="A landscape",
model="openrouter/google/gemini-3.1-flash-image-preview",
size="1024x1024",
quality="standard",
style=None,
response_format="url",
image_config={"aspect_ratio": "16:9", "image_size": "4K"},
)

# Verify image_config was passed
call_kwargs = mock_acompletion.call_args[1]
assert call_kwargs.get("image_config") == {
"aspect_ratio": "16:9",
"image_size": "4K",
}


@pytest.mark.asyncio
async def test_openrouter_image_without_config():
"""Test that image generation works without image_config."""
mock_acompletion = AsyncMock(return_value=_mock_openrouter_response())

with patch("litellm.acompletion", mock_acompletion):
from agentfield.vision import generate_image_openrouter

await generate_image_openrouter(
prompt="A sunset",
model="openrouter/google/gemini-2.5-flash-image-preview",
size="1024x1024",
quality="standard",
style=None,
response_format="url",
)

call_kwargs = mock_acompletion.call_args[1]
assert "image_config" not in call_kwargs
Loading