From 398690f8caa4c56c54e615d54032e49d6e10c9df Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Wed, 10 Sep 2025 21:18:26 -0700 Subject: [PATCH 01/53] feat: update object detection test cases. --- tests/test_object_detection.py | 158 +++++++++++++++++++++++++++------ 1 file changed, 130 insertions(+), 28 deletions(-) diff --git a/tests/test_object_detection.py b/tests/test_object_detection.py index 521189c..031459f 100644 --- a/tests/test_object_detection.py +++ b/tests/test_object_detection.py @@ -1,43 +1,145 @@ -from unittest.mock import MagicMock -import unittest +import requests from jigsawstack.exceptions import JigsawStackError import jigsawstack import pytest -import asyncio import logging - +from dotenv import load_dotenv +load_dotenv() +import os logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack() -async_jigsaw = jigsawstack.AsyncJigsawStack() +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) + +IMAGE_URL = "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" -def test_object_detection_response(): - try: - result = jigsaw.vision.object_detection( - { - "url": "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" - } - ) - print(result) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") +TEST_CASES = [ + { + "name": "with_url_only", + "params": { + "url": IMAGE_URL + }, + "blob": None, + "options": None, + }, + { + "name": "with_blob_only", + "params": None, + "blob": IMAGE_URL, + "options": None, + }, + { + "name": "annotated_image_true", + "blob": IMAGE_URL, + "options": { + "annotated_image": True + }, + }, + { + "name": "with_annotated_image_false", + "blob": IMAGE_URL, + "options": { + "annotated_image": False + }, + }, + { + "name": "with_blob_both_features", + "blob": IMAGE_URL, + "options": { + "features": ["object_detection", "gui"], + "annotated_image": True, + "return_type": "url" + }, + }, + { + "name": "with_blob_gui_features", + "blob": IMAGE_URL, + "options": { + "features": ["gui"], + "annotated_image": False + }, + }, + { + "name": "with_blob_object_detection_features", + "blob": IMAGE_URL, + "options": { + "features": ["object_detection"], + "annotated_image": True, + "return_type": "base64" + }, + }, + { + "name": "with_prompts", + "blob": IMAGE_URL, + "options": { + "prompts": ["castle", "tree"], + "annotated_image": True, + }, + }, + { + "name": "with_all_options", + "blob": IMAGE_URL, + "options": { + "features": ["object_detection", "gui"], + "prompts": ["car", "road", "tree"], + "annotated_image": True, + "return_type": "base64", + "return_masks": False, + }, + }, + ] -def test_object_detection_response_async(): - async def _test(): - client = jigsawstack.AsyncJigsawStack() +class TestObjectDetectionSync: + """Test synchronous object detection methods""" + + sync_test_cases = TEST_CASES + + @pytest.mark.parametrize("test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases]) + def test_object_detection(self, test_case): + """Test synchronous object detection with various inputs""" try: - result = await client.vision.object_detection( - { - "url": "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" - } - ) - print(result) + if test_case.get("blob"): + # Download blob content + blob_content = requests.get(test_case["blob"]).content + result = jigsaw.vision.object_detection( + blob_content, + test_case.get("options", {}) + ) + else: + # Use params directly + result = jigsaw.vision.object_detection(test_case["params"]) + + print(f"Test {test_case['name']}: {result}") assert result["success"] == True except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestObjectDetectionAsync: + """Test asynchronous object detection methods""" - asyncio.run(_test()) + async_test_cases = TEST_CASES + + @pytest.mark.parametrize("test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases]) + @pytest.mark.asyncio + async def test_object_detection_async(self, test_case): + """Test asynchronous object detection with various inputs""" + try: + if test_case.get("blob"): + # Download blob content + blob_content = requests.get(test_case["blob"]).content + result = await async_jigsaw.vision.object_detection( + blob_content, + test_case.get("options", {}) + ) + else: + # Use params directly + result = await async_jigsaw.vision.object_detection(test_case["params"]) + + print(f"Test {test_case['name']}: {result}") + assert result["success"] == True + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") \ No newline at end of file From fc18d4e0f787bff26037bf061b2095982bd433d4 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Wed, 10 Sep 2025 21:23:56 -0700 Subject: [PATCH 02/53] feat: updating testcases for file_store api. --- tests/test_file_store.py | 177 +++++++++++++++++++++++++++++---------- 1 file changed, 132 insertions(+), 45 deletions(-) diff --git a/tests/test_file_store.py b/tests/test_file_store.py index daef198..c44090b 100644 --- a/tests/test_file_store.py +++ b/tests/test_file_store.py @@ -1,64 +1,151 @@ -from unittest.mock import MagicMock -import unittest +import requests from jigsawstack.exceptions import JigsawStackError -from jigsawstack import JigsawStack - +import jigsawstack import pytest +import logging +from dotenv import load_dotenv +import os +import uuid -# flake8: noqa +load_dotenv() +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) -client = JigsawStack() +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +TEXT_FILE_CONTENT = b"This is a test file content for JigsawStack storage" +JSON_FILE_CONTENT = b'{"test": "data", "key": "value"}' +BINARY_FILE_CONTENT = requests.get("https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg").content -@pytest.mark.skip(reason="Skipping TestStoreAPI class for now") -class TestStoreAPI(unittest.TestCase): - def test_upload_success_response(self) -> None: - # Sample file content as bytes - file_content = b"This is a test file content" - options = { - "key": "test-file.txt", +TEST_CASES_UPLOAD = [ + { + "name": "upload_text_file_with_key", + "file": TEXT_FILE_CONTENT, + "options": { + "key": "sample_file.txt", "content_type": "text/plain", "overwrite": True, + }, + }, + { + "name": "upload_image_with_temp_url", + "file": BINARY_FILE_CONTENT, + "options": { + "key": f"test_image.jpg", + "content_type": "image/jpeg", + "overwrite": True, "temp_public_url": True, - } - try: - result = client.store.upload(file_content, options) - assert result["success"] == True - except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." + }, + }, + { + "name": "upload_binary_file", + "file": BINARY_FILE_CONTENT, + "options": { + "overwrite": True, + }, + }, + { + "name": "upload_file_no_options", + "file": TEXT_FILE_CONTENT, + "options": None, + }, +] - def test_get_success_response(self) -> None: - key = "test-file.txt" + +class TestFileStoreSync: + """Test synchronous file store operations""" + + uploaded_keys = [] # Track uploaded files for cleanup + + @pytest.mark.parametrize("test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD]) + def test_file_upload(self, test_case): + """Test synchronous file upload with various options""" try: - result = client.store.get(key) - # For file retrieval, we expect the actual file content - assert result is not None + result = jigsaw.store.upload(test_case["file"], test_case["options"]) + + print(f"Upload test {test_case['name']}: {result}") + assert result.get("key") is not None + assert result.get("url") is not None + assert result.get("size") > 0 + + # Check temp_public_url if requested + if test_case.get("options") and test_case["options"].get("temp_public_url"): + assert result.get("temp_public_url") is not None + + # Store key for cleanup + self.uploaded_keys.append(result["key"]) + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." - - def test_delete_success_response(self) -> None: - key = "test-file.txt" + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + def test_file_get(self): + """Test synchronous file retrieval""" + # First upload a file to retrieve + test_key = f"test-get-{uuid.uuid4().hex[:8]}.txt" try: - result = client.store.delete(key) - assert result["success"] == True + upload_result = jigsaw.store.upload( + TEXT_FILE_CONTENT, + {"key": test_key, "content_type": "text/plain"} + ) + + # Now retrieve it + file_content = jigsaw.store.get(upload_result["key"]) + assert file_content is not None + print(f"Retrieved file with key {upload_result['key']}") + + # Cleanup + self.uploaded_keys.append(upload_result["key"]) + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." + pytest.fail(f"Unexpected JigsawStackError in file get: {e}") - def test_upload_without_options_success_response(self) -> None: - # Test upload without optional parameters - file_content = b"This is another test file content" + +class TestFileStoreAsync: + """Test asynchronous file store operations""" + + uploaded_keys = [] # Track uploaded files for cleanup + + @pytest.mark.parametrize("test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD]) + @pytest.mark.asyncio + async def test_file_upload_async(self, test_case): + """Test asynchronous file upload with various options""" try: - result = client.store.upload(file_content) - assert result["success"] == True + result = await async_jigsaw.store.upload(test_case["file"], test_case["options"]) + + print(f"Async upload test {test_case['name']}: {result}") + assert result.get("key") is not None + assert result.get("url") is not None + assert result.get("size") > 0 + + # Check temp_public_url if requested + if test_case.get("options") and test_case["options"].get("temp_public_url"): + assert result.get("temp_public_url") is not None + + # Store key for cleanup + self.uploaded_keys.append(result["key"]) + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." - - def test_upload_with_partial_options_success_response(self) -> None: - # Test upload with partial options - file_content = b"This is a test file with partial options" - options = {"key": "partial-test-file.txt", "overwrite": False} + pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") + + @pytest.mark.asyncio + async def test_file_get_async(self): + """Test asynchronous file retrieval""" + # First upload a file to retrieve + test_key = f"test-async-get-{uuid.uuid4().hex[:8]}.txt" try: - result = client.store.upload(file_content, options) - assert result["success"] == True + upload_result = await async_jigsaw.store.upload( + TEXT_FILE_CONTENT, + {"key": test_key, "content_type": "text/plain"} + ) + + # Now retrieve it + file_content = await async_jigsaw.store.get(upload_result["key"]) + assert file_content is not None + print(f"Async retrieved file with key {upload_result['key']}") + + # Cleanup + self.uploaded_keys.append(upload_result["key"]) + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." + pytest.fail(f"Unexpected JigsawStackError in async file get: {e}") \ No newline at end of file From 7792bb1efa59ec48416eb7e69d67c1aaaf8b2c8f Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Wed, 10 Sep 2025 21:36:22 -0700 Subject: [PATCH 03/53] test: dropping test cases for geo service (merged/deprecated with v3 in April) --- tests/test_geo.py | 38 -------------------------------------- 1 file changed, 38 deletions(-) delete mode 100644 tests/test_geo.py diff --git a/tests/test_geo.py b/tests/test_geo.py deleted file mode 100644 index e97e3fb..0000000 --- a/tests/test_geo.py +++ /dev/null @@ -1,38 +0,0 @@ -from unittest.mock import MagicMock -import unittest -from jigsawstack.exceptions import JigsawStackError -from jigsawstack import AsyncJigsawStack -import pytest -import asyncio -import logging - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -@pytest.mark.skip(reason="Skipping TestWebAPI class for now") -def test_async_country_response(): - async def _test(): - client = AsyncJigsawStack() - try: - result = await client.geo.country({"country_code": "SGP"}) - logger.info(result) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) - - -@pytest.mark.skip(reason="Skipping TestWebAPI class for now") -def test_async_search_response(): - async def _test(): - client = AsyncJigsawStack() - try: - result = await client.geo.search({"search_value": "Nigeria"}) - logger.info(result) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) From c57091356df64f6a2ee2df7583378bb51177cde9 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Wed, 10 Sep 2025 21:39:43 -0700 Subject: [PATCH 04/53] refactor: del custom_types dot py, utlized by text to speech. --- jigsawstack/custom_typing.py | 574 ----------------------------------- 1 file changed, 574 deletions(-) delete mode 100644 jigsawstack/custom_typing.py diff --git a/jigsawstack/custom_typing.py b/jigsawstack/custom_typing.py deleted file mode 100644 index e77adde..0000000 --- a/jigsawstack/custom_typing.py +++ /dev/null @@ -1,574 +0,0 @@ -from typing import Literal - -SupportedAccents = Literal[ - "af-ZA-female-1", - "af-ZA-male-1", - "am-ET-female-1", - "am-ET-male-1", - "ar-AE-female-1", - "ar-AE-male-1", - "ar-BH-female-1", - "ar-BH-male-1", - "ar-DZ-female-1", - "ar-DZ-male-1", - "ar-EG-female-1", - "ar-EG-male-1", - "ar-IQ-female-1", - "ar-IQ-male-1", - "ar-JO-female-1", - "ar-JO-male-1", - "ar-KW-female-1", - "ar-KW-male-1", - "ar-LB-female-1", - "ar-LB-male-1", - "ar-LY-female-1", - "ar-LY-male-1", - "ar-MA-female-1", - "ar-MA-male-1", - "ar-OM-female-1", - "ar-OM-male-1", - "ar-QA-female-1", - "ar-QA-male-1", - "ar-SA-female-1", - "ar-SA-male-1", - "ar-SY-female-1", - "ar-SY-male-1", - "ar-TN-female-1", - "ar-TN-male-1", - "ar-YE-female-1", - "ar-YE-male-1", - "as-IN-male-1", - "as-IN-female-1", - "az-AZ-female-1", - "az-AZ-male-1", - "bg-BG-female-1", - "bg-BG-male-1", - "bn-BD-female-1", - "bn-BD-male-1", - "bn-IN-female-1", - "bn-IN-male-1", - "bs-BA-female-1", - "bs-BA-male-1", - "ca-ES-female-1", - "ca-ES-male-1", - "ca-ES-female-2", - "cs-CZ-female-1", - "cs-CZ-male-1", - "cy-GB-female-1", - "cy-GB-male-1", - "da-DK-female-1", - "da-DK-male-1", - "de-AT-female-1", - "de-AT-male-1", - "de-CH-female-1", - "de-CH-male-1", - "de-DE-female-1", - "de-DE-male-1", - "de-DE-female-2", - "de-DE-male-2", - "de-DE-male-3", - "de-DE-female-3", - "de-DE-male-4", - "de-DE-male-5", - "de-DE-female-4", - "de-DE-male-6", - "de-DE-male-7", - "de-DE-female-5", - "de-DE-male-8", - "de-DE-female-6", - "de-DE-female-7", - "de-DE-male-9", - "de-DE-female-8", - "de-DE-female-9", - "de-DE-female-10", - "el-GR-female-2", - "el-GR-male-2", - "en-AU-female-2", - "en-AU-male-2", - "en-AU-female-3", - "en-AU-female-4", - "en-AU-male-3", - "en-AU-male-4", - "en-AU-female-5", - "en-AU-female-6", - "en-AU-female-7", - "en-AU-male-5", - "en-AU-female-8", - "en-AU-male-6", - "en-AU-male-7", - "en-AU-female-9", - "en-CA-female-2", - "en-CA-male-2", - "en-GB-female-2", - "en-GB-male-2", - "en-GB-female-3", - "en-GB-female-4", - "en-GB-male-3", - "en-GB-female-5", - "en-GB-male-4", - "en-GB-male-5", - "en-GB-female-6", - "en-GB-female-7", - "en-GB-male-6", - "en-GB-male-7", - "en-GB-female-8", - "en-GB-male-8", - "en-GB-female-9", - "en-GB-female-10", - "en-GB-male-9", - "en-GB-male-10", - "en-GB-female-11", - "en-HK-female-1", - "en-HK-male-1", - "en-IE-female-3", - "en-IE-male-3", - "en-IN-female-3", - "en-IN-male-3", - "en-IN-male-4", - "en-IN-female-4", - "en-IN-female-5", - "en-IN-female-6", - "en-IN-male-5", - "en-IN-male-6", - "en-KE-female-1", - "en-KE-male-1", - "en-NG-female-1", - "en-NG-male-1", - "en-NZ-female-1", - "en-NZ-male-1", - "en-PH-female-1", - "en-PH-male-1", - "en-SG-female-1", - "en-SG-male-1", - "en-TZ-female-1", - "en-TZ-male-1", - "en-US-female-3", - "en-US-female-4", - "en-US-male-3", - "en-US-male-4", - "en-US-female-5", - "en-US-female-6", - "en-US-male-5", - "en-US-male-6", - "en-US-female-7", - "en-US-male-7", - "en-US-female-8", - "en-US-male-8", - "en-US-female-9", - "en-US-male-9", - "en-US-female-10", - "en-US-male-10", - "en-US-female-11", - "en-US-male-11", - "en-US-female-12", - "en-US-male-12", - "en-US-female-13", - "en-US-female-14", - "en-US-female-15", - "en-US-female-16", - "en-US-male-13", - "en-US-male-14", - "en-US-female-17", - "en-US-female-18", - "en-US-male-15", - "en-US-male-16", - "en-US-female-19", - "en-US-female-20", - "en-US-female-21", - "en-US-female-22", - "en-US-male-17", - "en-US-male-18", - "en-US-male-19", - "en-US-male-20", - "en-US-male-21", - "en-US-female-23", - "en-US-male-22", - "en-US-male-23", - "en-US-neutral-1", - "en-US-male-24", - "en-US-male-25", - "en-US-male-26", - "en-US-male-27", - "en-US-female-24", - "en-US-female-25", - "en-US-female-26", - "en-US-female-27", - "en-US-male-28", - "en-US-female-28", - "en-US-female-29", - "en-US-female-30", - "en-US-male-29", - "en-US-male-30", - "en-ZA-female-1", - "en-ZA-male-1", - "es-AR-female-1", - "es-AR-male-1", - "es-BO-female-1", - "es-BO-male-1", - "es-CL-female-1", - "es-CL-male-1", - "es-CO-female-1", - "es-CO-male-1", - "es-CR-female-1", - "es-CR-male-1", - "es-CU-female-1", - "es-CU-male-1", - "es-DO-female-1", - "es-DO-male-1", - "es-EC-female-1", - "es-EC-male-1", - "es-ES-female-9", - "es-ES-male-10", - "es-ES-female-10", - "es-ES-male-11", - "es-ES-male-12", - "es-ES-male-13", - "es-ES-female-11", - "es-ES-female-12", - "es-ES-female-13", - "es-ES-female-14", - "es-ES-male-14", - "es-ES-male-15", - "es-ES-male-16", - "es-ES-female-15", - "es-ES-female-16", - "es-ES-female-17", - "es-ES-female-18", - "es-ES-female-19", - "es-ES-female-20", - "es-ES-female-21", - "es-ES-male-17", - "es-ES-male-18", - "es-ES-female-22", - "es-ES-female-23", - "es-GQ-female-1", - "es-GQ-male-1", - "es-GT-female-1", - "es-GT-male-1", - "es-HN-female-1", - "es-HN-male-1", - "es-MX-female-12", - "es-MX-male-11", - "es-MX-female-13", - "es-MX-female-14", - "es-MX-female-15", - "es-MX-male-12", - "es-MX-male-13", - "es-MX-female-16", - "es-MX-male-14", - "es-MX-male-15", - "es-MX-female-17", - "es-MX-female-18", - "es-MX-male-16", - "es-MX-female-19", - "es-MX-male-17", - "es-NI-female-1", - "es-NI-male-1", - "es-PA-female-1", - "es-PA-male-1", - "es-PE-female-1", - "es-PE-male-1", - "es-PR-female-1", - "es-PR-male-1", - "es-PY-female-1", - "es-PY-male-1", - "es-SV-female-1", - "es-SV-male-1", - "es-US-female-1", - "es-US-male-1", - "es-UY-female-1", - "es-UY-male-1", - "es-VE-female-1", - "es-VE-male-1", - "et-EE-female-11", - "et-EE-male-10", - "eu-ES-female-11", - "eu-ES-male-10", - "fa-IR-female-11", - "fa-IR-male-10", - "fi-FI-female-12", - "fi-FI-male-11", - "fi-FI-female-13", - "fil-PH-female-11", - "fil-PH-male-10", - "fr-BE-female-12", - "fr-BE-male-11", - "fr-CA-female-12", - "fr-CA-male-11", - "fr-CA-male-12", - "fr-CA-male-13", - "fr-CH-female-12", - "fr-CH-male-11", - "fr-FR-female-12", - "fr-FR-male-11", - "fr-FR-male-12", - "fr-FR-female-13", - "fr-FR-female-14", - "fr-FR-male-13", - "fr-FR-female-15", - "fr-FR-female-16", - "fr-FR-female-17", - "fr-FR-male-14", - "fr-FR-female-18", - "fr-FR-male-15", - "fr-FR-male-16", - "fr-FR-male-17", - "fr-FR-female-19", - "fr-FR-female-20", - "fr-FR-male-18", - "fr-FR-female-21", - "fr-FR-male-19", - "fr-FR-male-20", - "ga-IE-female-12", - "ga-IE-male-12", - "gl-ES-female-12", - "gl-ES-male-12", - "gu-IN-female-1", - "gu-IN-male-1", - "he-IL-female-12", - "he-IL-male-12", - "hi-IN-female-13", - "hi-IN-male-13", - "hi-IN-male-14", - "hi-IN-female-14", - "hi-IN-female-15", - "hi-IN-male-15", - "hi-IN-male-16", - "hr-HR-female-12", - "hr-HR-male-12", - "hu-HU-female-13", - "hu-HU-male-13", - "hy-AM-female-12", - "hy-AM-male-12", - "id-ID-female-13", - "id-ID-male-13", - "is-IS-female-12", - "is-IS-male-12", - "it-IT-female-13", - "it-IT-female-14", - "it-IT-male-13", - "it-IT-male-14", - "it-IT-male-15", - "it-IT-male-16", - "it-IT-female-15", - "it-IT-female-16", - "it-IT-male-17", - "it-IT-male-18", - "it-IT-female-17", - "it-IT-female-18", - "it-IT-male-19", - "it-IT-female-19", - "it-IT-female-20", - "it-IT-male-20", - "it-IT-male-21", - "it-IT-male-22", - "it-IT-male-23", - "it-IT-male-24", - "it-IT-female-21", - "it-IT-female-22", - "it-IT-male-25", - "it-IT-male-26", - "iu-Cans-CA-female-1", - "iu-Cans-CA-male-1", - "iu-Latn-CA-female-1", - "iu-Latn-CA-male-1", - "ja-JP-female-14", - "ja-JP-male-16", - "ja-JP-female-15", - "ja-JP-male-17", - "ja-JP-female-16", - "ja-JP-male-18", - "ja-JP-female-17", - "ja-JP-male-19", - "ja-JP-male-20", - "jv-ID-female-13", - "jv-ID-male-16", - "ka-GE-female-13", - "ka-GE-male-16", - "kk-KZ-female-13", - "kk-KZ-male-16", - "km-KH-female-13", - "km-KH-male-16", - "kn-IN-female-13", - "kn-IN-male-16", - "ko-KR-female-14", - "ko-KR-male-17", - "ko-KR-male-18", - "ko-KR-male-19", - "ko-KR-male-20", - "ko-KR-female-15", - "ko-KR-female-16", - "ko-KR-female-17", - "ko-KR-female-18", - "ko-KR-male-21", - "ko-KR-male-22", - "lo-LA-female-13", - "lo-LA-male-17", - "lt-LT-female-13", - "lt-LT-male-17", - "lv-LV-female-13", - "lv-LV-male-17", - "mk-MK-female-13", - "mk-MK-male-17", - "ml-IN-female-13", - "ml-IN-male-17", - "mn-MN-female-13", - "mn-MN-male-17", - "mr-IN-female-1", - "mr-IN-male-1", - "ms-MY-female-13", - "ms-MY-male-17", - "mt-MT-female-13", - "mt-MT-male-17", - "my-MM-female-13", - "my-MM-male-17", - "nb-NO-female-14", - "nb-NO-male-18", - "nb-NO-female-15", - "ne-NP-female-13", - "ne-NP-male-17", - "nl-BE-female-14", - "nl-BE-male-18", - "nl-NL-female-14", - "nl-NL-male-18", - "nl-NL-female-15", - "or-IN-female-1", - "or-IN-male-1", - "pa-IN-male-1", - "pa-IN-female-1", - "pl-PL-female-14", - "pl-PL-male-18", - "pl-PL-female-15", - "ps-AF-female-13", - "ps-AF-male-17", - "pt-BR-female-14", - "pt-BR-male-18", - "pt-BR-female-15", - "pt-BR-male-19", - "pt-BR-female-16", - "pt-BR-male-20", - "pt-BR-female-17", - "pt-BR-male-21", - "pt-BR-male-22", - "pt-BR-female-18", - "pt-BR-female-19", - "pt-BR-female-20", - "pt-BR-male-23", - "pt-BR-female-21", - "pt-BR-male-24", - "pt-BR-female-22", - "pt-BR-male-25", - "pt-BR-male-26", - "pt-BR-female-23", - "pt-BR-female-24", - "pt-PT-female-15", - "pt-PT-male-19", - "pt-PT-female-16", - "ro-RO-female-14", - "ro-RO-male-18", - "ru-RU-female-15", - "ru-RU-male-19", - "ru-RU-female-16", - "si-LK-female-14", - "si-LK-male-18", - "sk-SK-female-14", - "sk-SK-male-18", - "sl-SI-female-14", - "sl-SI-male-18", - "so-SO-female-14", - "so-SO-male-18", - "sq-AL-female-14", - "sq-AL-male-18", - "sr-Latn-RS-male-1", - "sr-Latn-RS-female-1", - "sr-RS-female-14", - "sr-RS-male-18", - "su-ID-female-14", - "su-ID-male-18", - "sv-SE-female-15", - "sv-SE-male-19", - "sv-SE-female-16", - "sw-KE-female-14", - "sw-KE-male-18", - "sw-TZ-female-1", - "sw-TZ-male-1", - "ta-IN-female-14", - "ta-IN-male-18", - "ta-LK-female-1", - "ta-LK-male-1", - "ta-MY-female-1", - "ta-MY-male-1", - "ta-SG-female-1", - "ta-SG-male-1", - "te-IN-female-14", - "te-IN-male-18", - "th-TH-female-15", - "th-TH-male-19", - "th-TH-female-16", - "tr-TR-female-15", - "tr-TR-male-19", - "uk-UA-female-14", - "uk-UA-male-18", - "ur-IN-female-1", - "ur-IN-male-1", - "ur-PK-female-14", - "ur-PK-male-18", - "uz-UZ-female-14", - "uz-UZ-male-18", - "vi-VN-female-14", - "vi-VN-male-18", - "wuu-CN-female-1", - "wuu-CN-male-1", - "yue-CN-female-1", - "yue-CN-male-1", - "zh-CN-female-15", - "zh-CN-male-19", - "zh-CN-male-20", - "zh-CN-female-16", - "zh-CN-male-21", - "zh-CN-female-17", - "zh-CN-female-18", - "zh-CN-female-19", - "zh-CN-female-20", - "zh-CN-female-21", - "zh-CN-female-22", - "zh-CN-female-23", - "zh-CN-female-24", - "zh-CN-female-25", - "zh-CN-female-26", - "zh-CN-female-27", - "zh-CN-female-28", - "zh-CN-female-29", - "zh-CN-female-30", - "zh-CN-female-31", - "zh-CN-female-32", - "zh-CN-female-33", - "zh-CN-female-34", - "zh-CN-male-22", - "zh-CN-male-23", - "zh-CN-male-24", - "zh-CN-male-25", - "zh-CN-male-26", - "zh-CN-male-27", - "zh-CN-male-28", - "zh-CN-male-29", - "zh-CN-male-30", - "zh-CN-male-31", - "zh-CN-male-32", - "zh-CN-male-33", - "zh-CN-guangxi-male-1", - "zh-CN-henan-male-1", - "zh-CN-liaoning-female-2", - "zh-CN-liaoning-male-1", - "zh-CN-shaanxi-female-2", - "zh-CN-shandong-male-1", - "zh-CN-sichuan-male-1", - "zh-HK-female-18", - "zh-HK-male-22", - "zh-HK-female-19", - "zh-TW-female-19", - "zh-TW-male-22", - "zh-TW-female-20", - "zu-ZA-female-17", - "zu-ZA-male-21", -] From 43fc3c57a0b3ba8bba248a4ec4f65a2de570e689 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Wed, 10 Sep 2025 22:23:40 -0700 Subject: [PATCH 05/53] fix: updating response types, and formatting for image_generation endpoint. --- jigsawstack/image_generation.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/jigsawstack/image_generation.py b/jigsawstack/image_generation.py index b868ada..d615b6d 100644 --- a/jigsawstack/image_generation.py +++ b/jigsawstack/image_generation.py @@ -1,9 +1,8 @@ -from typing import Any, Dict, List, Union, cast +from typing import Any, Dict, Union, cast from typing_extensions import NotRequired, TypedDict, Literal, Required from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig @@ -77,9 +76,9 @@ class ImageGenerationResponse(TypedDict): """ Indicates whether the image generation was successful. """ - image: bytes + url: NotRequired[str] """ - The generated image as a blob. + The generated image as a URL or base64 string. """ @@ -103,7 +102,7 @@ def __init__( def image_generation( self, params: ImageGenerationParams - ) -> ImageGenerationResponse: + ) -> Union[ImageGenerationResponse, bytes]: path = "/ai/image_generation" resp = Request( config=self.config, @@ -134,7 +133,7 @@ def __init__( async def image_generation( self, params: ImageGenerationParams - ) -> ImageGenerationResponse: + ) -> Union[ImageGenerationResponse, bytes]: path = "/ai/image_generation" resp = await AsyncRequest( config=self.config, From 3421bf44cc7e5b6eccecaa9518818ee11cf0ec78 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Wed, 10 Sep 2025 22:24:28 -0700 Subject: [PATCH 06/53] test: updating testcases for image-generation service. --- tests/test_image_generation.py | 247 ++++++++++++++++++++++++++++----- 1 file changed, 209 insertions(+), 38 deletions(-) diff --git a/tests/test_image_generation.py b/tests/test_image_generation.py index 6cf275a..fe2dc79 100644 --- a/tests/test_image_generation.py +++ b/tests/test_image_generation.py @@ -1,57 +1,228 @@ -from unittest.mock import MagicMock -import unittest +import requests from jigsawstack.exceptions import JigsawStackError import jigsawstack import pytest -import asyncio import logging -import io +from dotenv import load_dotenv +import os +import base64 +load_dotenv() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack() -async_jigsaw = jigsawstack.AsyncJigsawStack() +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +IMAGE_URL = "https://images.unsplash.com/photo-1494588024300-e9df7ff98d78?q=80&w=1284&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" +FILE_STORE_KEY = jigsaw.store.upload(requests.get(IMAGE_URL).content, { + "filename": "test_image.jpg", + "content_type": "image/jpeg", + "overwrite": True + }) -def test_image_generation_response(): - async def _test(): - client = jigsawstack.AsyncJigsawStack() +TEST_CASES = [ + { + "name": "basic_generation_with_prompt", + "params": { + "prompt": "A beautiful mountain landscape at sunset", + }, + }, + { + "name": "with_aspect_ratio", + "params": { + "prompt": "A serene lake with mountains in the background", + "aspect_ratio": "16:9" + }, + }, + { + "name": "with_custom_dimensions", + "params": { + "prompt": "A futuristic city skyline", + "width": 1024, + "height": 768 + }, + }, + { + "name": "with_output_format_png", + "params": { + "prompt": "A colorful abstract painting", + "output_format": "png" + }, + }, + { + "name": "with_advanced_config", + "params": { + "prompt": "A realistic portrait of a person", + "advance_config": { + "negative_prompt": "blurry, low quality, distorted", + "guidance": 7, + "seed": 42 + } + }, + }, + { + "name": "with_steps", + "params": { + "prompt": "A detailed botanical illustration", + "steps": 30, + "aspect_ratio": "3:4", + "return_type": "base64" + }, + }, + { + "name": "with_return_type_url", + "params": { + "prompt": "A vintage car on a desert road", + "return_type": "url" + }, + }, + { + "name": "with_return_type_base64", + "params": { + "prompt": "A fantasy castle on a hill", + "return_type": "base64" + } + }, + { + "name": "with_all_options", + "params": { + "prompt": "An intricate steampunk clockwork mechanism", + "aspect_ratio": "4:3", + "steps": 25, + "output_format": "png", + "advance_config": { + "negative_prompt": "simple, plain, boring", + "guidance": 8, + "seed": 12345 + }, + "return_type": "base64" + }, + }, +] + +# Test cases for image-to-image generation (using existing images as input) +IMAGE_TO_IMAGE_TEST_CASES = [ + { + "name": "with_url", + "params": { + "prompt": "Add snow effects to this image", + "url": IMAGE_URL, + "return_type": "base64" + }, + }, + { + "name": "with_file_store_key", + "params": { + "prompt": "Apply a cyberpunk style to this image", + "file_store_key": FILE_STORE_KEY, + }, + } +] + + +class TestImageGenerationSync: + """Test synchronous image generation methods""" + + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) + def test_image_generation(self, test_case): + """Test synchronous image generation with various parameters""" try: - result = await client.image_generation( - { - "prompt": "A beautiful mountain landscape at sunset", - "aspect_ratio": "16:9", - } - ) - # Just check if we got some data back + result = jigsaw.image_generation(test_case["params"]) + + print(type(result)) + + if isinstance(result, dict): + print(result) + # Check response structure assert result is not None - assert len(result) > 0 - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - asyncio.run(_test()) + if type(result) is dict: + # Check for image data based on return_type + if test_case["params"].get("return_type") == "url": + assert result.get("url") is not None + assert requests.get(result["url"]).status_code == 200 + assert isinstance(result["url"], str) + elif test_case["params"].get("return_type") == "base64": + assert result.get("url") is not None + elif test_case["params"].get("return_type") == "url": + assert result.get("url") is not None + assert requests.get(result["url"]).status_code == 200 + else: + assert isinstance(result, bytes) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize("test_case", IMAGE_TO_IMAGE_TEST_CASES[:1], ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]]) + def test_image_to_image_generation(self, test_case): + """Test image-to-image generation with URL input""" + try: + + result = jigsaw.image_generation(test_case["params"]) + + print(f"Test {test_case['name']}: Generated image from input") + assert result is not None + + if type(result) is dict: + assert result.get("success") == True + assert result.get("url") is not None + elif type(result) is bytes: + assert isinstance(result, bytes) + else: + pytest.fail(f"Unexpected result type in {test_case['name']}: {type(result)}") + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") -def test_image_generation_with_advanced_config(): - async def _test(): - client = jigsawstack.AsyncJigsawStack() + +class TestImageGenerationAsync: + """Test asynchronous image generation methods""" + + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) + @pytest.mark.asyncio + async def test_image_generation_async(self, test_case): + """Test asynchronous image generation with various parameters""" try: - result = await client.image_generation( - { - "prompt": "A beautiful mountain landscape at sunset", - "output_format": "png", - "advance_config": { - "negative_prompt": "blurry, low quality", - "guidance": 7, - "seed": 42, - }, - } - ) - # Just check if we got some data back + result = await async_jigsaw.image_generation(test_case["params"]) + + print(f"Async test {test_case['name']}: Generated image") + + # Check response structure assert result is not None - assert len(result) > 0 + if type(result) is dict: + # Check for image data based on return_type + if test_case["params"].get("return_type") == "url": + assert result.get("url") is not None + assert requests.get(result["url"]).status_code == 200 + assert isinstance(result["url"], str) + assert result["url"].startswith("http") + elif test_case["params"].get("return_type") == "base64": + assert result.get("url") is not None + elif test_case["params"].get("return_type") == "url": + assert result.get("url") is not None + assert requests.get(result["url"]).status_code == 200 + else: + assert isinstance(result, bytes) + except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") + pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") + + @pytest.mark.parametrize("test_case", IMAGE_TO_IMAGE_TEST_CASES[:1], ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]]) + @pytest.mark.asyncio + async def test_image_to_image_generation_async(self, test_case): + """Test asynchronous image-to-image generation with URL input""" + try: + result = await async_jigsaw.image_generation(test_case["params"]) - asyncio.run(_test()) + assert result is not None + if type(result) is dict: + assert result.get("success") == True + assert result.get("url") is not None + elif type(result) is bytes: + assert isinstance(result, bytes) + else: + pytest.fail(f"Unexpected result type in {test_case['name']}: {type(result)}") + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") \ No newline at end of file From 2b7f91ecd0a73648bdd49a40e11fdee8f693333c Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 07:18:22 -0700 Subject: [PATCH 07/53] test: adding new test cases for STT. --- tests/test_audio.py | 262 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 tests/test_audio.py diff --git a/tests/test_audio.py b/tests/test_audio.py new file mode 100644 index 0000000..96169d2 --- /dev/null +++ b/tests/test_audio.py @@ -0,0 +1,262 @@ +import requests +from jigsawstack.exceptions import JigsawStackError +import jigsawstack +import pytest +import logging +from dotenv import load_dotenv +import os + +load_dotenv() +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) + +# Sample audio URLs for testing +AUDIO_URL = AUDIO_URL_LONG = "https://jigsawstack.com/preview/stt-example.wav" + + +TEST_CASES = [ + { + "name": "with_url_only", + "params": { + "url": AUDIO_URL + }, + "blob": None, + "options": None, + }, + { + "name": "with_url_and_language", + "params": { + "url": AUDIO_URL, + "language": "en" + }, + "blob": None, + "options": None, + }, + { + "name": "with_url_auto_detect_language", + "params": { + "url": AUDIO_URL, + "language": "auto" + }, + "blob": None, + "options": None, + }, + { + "name": "with_url_and_translate", + "params": { + "url": AUDIO_URL, + "translate": True + }, + "blob": None, + "options": None, + }, + { + "name": "with_blob_only", + "params": None, + "blob": AUDIO_URL, + "options": None, + }, + { + "name": "with_blob_and_language", + "params": None, + "blob": AUDIO_URL, + "options": { + "language": "en" + }, + }, + { + "name": "with_blob_auto_detect", + "params": None, + "blob": AUDIO_URL, + "options": { + "language": "auto" + }, + }, + { + "name": "with_blob_and_translate", + "params": None, + "blob": AUDIO_URL, + "options": { + "translate": True, + "language": "en" + }, + }, + { + "name": "with_by_speaker", + "params": { + "url": AUDIO_URL_LONG, + "by_speaker": True + }, + "blob": None, + "options": None, + }, + { + "name": "with_chunk_settings", + "params": { + "url": AUDIO_URL, + "batch_size": 5, + "chunk_duration": 15 + }, + "blob": None, + "options": None, + }, + { + "name": "with_all_options", + "params": None, + "blob": AUDIO_URL_LONG, + "options": { + "language": "auto", + "translate": False, + "by_speaker": True, + "batch_size": 10, + "chunk_duration": 15 + }, + }, +] + +# Test cases with webhook (separate as they return different response) +WEBHOOK_TEST_CASES = [ + { + "name": "with_webhook_url", + "params": { + "url": AUDIO_URL, + "webhook_url": "https://webhook.site/test-webhook" + }, + "blob": None, + "options": None, + }, + { + "name": "with_blob_and_webhook", + "params": None, + "blob": AUDIO_URL, + "options": { + "webhook_url": "https://webhook.site/test-webhook", + "language": "en" + }, + }, +] + + +class TestAudioSync: + """Test synchronous audio speech-to-text methods""" + + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) + def test_speech_to_text(self, test_case): + """Test synchronous speech-to-text with various inputs""" + try: + if test_case.get("blob"): + # Download audio content + blob_content = requests.get(test_case["blob"]).content + result = jigsaw.audio.speech_to_text( + blob_content, + test_case.get("options", {}) + ) + else: + # Use params directly + result = jigsaw.audio.speech_to_text(test_case["params"]) + # Verify response structure + assert result["success"] == True + assert result.get("text", None) is not None and isinstance(result["text"], str) + + # Check for chunks + if result.get("chunks", None): + assert isinstance(result["chunks"], list) + + # Check for speaker diarization if requested + if result.get("speakers", None): + assert isinstance(result["speakers"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize("test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES]) + def test_speech_to_text_webhook(self, test_case): + """Test synchronous speech-to-text with webhook""" + try: + if test_case.get("blob"): + # Download audio content + blob_content = requests.get(test_case["blob"]).content + result = jigsaw.audio.speech_to_text( + blob_content, + test_case.get("options", {}) + ) + else: + # Use params directly + result = jigsaw.audio.speech_to_text(test_case["params"]) + + print(f"Test {test_case['name']}: Webhook response") + + # Verify webhook response structure + assert result["success"] == True + assert result.get("status") in ["processing", "error"] + assert "id" in result + assert isinstance(result["id"], str) + + except JigsawStackError as e: + # Webhook URLs might fail if invalid + print(f"Expected possible error for webhook test {test_case['name']}: {e}") + + +class TestAudioAsync: + """Test asynchronous audio speech-to-text methods""" + + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) + @pytest.mark.asyncio + async def test_speech_to_text_async(self, test_case): + """Test asynchronous speech-to-text with various inputs""" + try: + if test_case.get("blob"): + # Download audio content + blob_content = requests.get(test_case["blob"]).content + result = await async_jigsaw.audio.speech_to_text( + blob_content, + test_case.get("options", {}) + ) + else: + # Use params directly + result = await async_jigsaw.audio.speech_to_text(test_case["params"]) + + # Verify response structure + assert result["success"] == True + assert result.get("text", None) is not None and isinstance(result["text"], str) + + # Check for chunks + if result.get("chunks", None): + assert isinstance(result["chunks"], list) + + # Check for speaker diarization if requested + if result.get("speakers", None): + assert isinstance(result["speakers"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") + + @pytest.mark.parametrize("test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES]) + @pytest.mark.asyncio + async def test_speech_to_text_webhook_async(self, test_case): + """Test asynchronous speech-to-text with webhook""" + try: + if test_case.get("blob"): + # Download audio content + blob_content = requests.get(test_case["blob"]).content + result = await async_jigsaw.audio.speech_to_text( + blob_content, + test_case.get("options", {}) + ) + else: + # Use params directly + result = await async_jigsaw.audio.speech_to_text(test_case["params"]) + + print(f"Async test {test_case['name']}: Webhook response") + + # Verify webhook response structure + assert result["success"] == True + assert result.get("status") in ["processing", "error"] + assert "id" in result + assert isinstance(result["id"], str) + + except JigsawStackError as e: + # Webhook URLs might fail if invalid + print(f"Expected possible error for async webhook test {test_case['name']}: {e}") \ No newline at end of file From 17eb2de96fde5d548448b5c91f7824b2d4fd4f60 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 08:19:58 -0700 Subject: [PATCH 08/53] fix: formatting, unused imports and param encoding. --- jigsawstack/audio.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index 2046c58..cb4f199 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -3,9 +3,7 @@ from .request import Request, RequestConfig from .async_request import AsyncRequest, AsyncRequestConfig from ._config import ClientConfig -from typing import Any, Dict, List, cast -from typing_extensions import NotRequired, TypedDict, Literal -from .custom_typing import SupportedAccents +from typing_extensions import Literal from .helpers import build_path from ._types import BaseResponse @@ -80,22 +78,21 @@ def speech_to_text( blob: Union[SpeechToTextParams, bytes], options: Optional[SpeechToTextParams] = None, ) -> Union[SpeechToTextResponse, SpeechToTextWebhookResponse]: + options = options or {} + path = "/ai/transcribe" + content_type = options.get("content_type", "application/octet-stream") + headers = {"Content-Type": content_type} if isinstance( blob, dict ): # If params is provided as a dict, we assume it's the first argument resp = Request( config=self.config, - path="/ai/transcribe", + path=path, params=cast(Dict[Any, Any], blob), verb="post", ).perform_with_content() return resp - options = options or {} - path = build_path(base_path="/ai/transcribe", params=options) - content_type = options.get("content_type", "application/octet-stream") - headers = {"Content-Type": content_type} - resp = Request( config=self.config, path=path, @@ -137,20 +134,19 @@ async def speech_to_text( blob: Union[SpeechToTextParams, bytes], options: Optional[SpeechToTextParams] = None, ) -> Union[SpeechToTextResponse, SpeechToTextWebhookResponse]: + options = options or {} + path = "/ai/transcribe" + content_type = options.get("content_type", "application/octet-stream") + headers = {"Content-Type": content_type} if isinstance(blob, dict): resp = await AsyncRequest( config=self.config, - path="/ai/transcribe", + path=path, params=cast(Dict[Any, Any], blob), verb="post", ).perform_with_content() return resp - options = options or {} - path = build_path(base_path="/ai/transcribe", params=options) - content_type = options.get("content_type", "application/octet-stream") - headers = {"Content-Type": content_type} - resp = await AsyncRequest( config=self.config, path=path, From 88388918b6944f6d8fe323b451d32ef07f38d24e Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 08:21:42 -0700 Subject: [PATCH 09/53] fix: form requests for multipart blob + params async req. --- jigsawstack/async_request.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index 033b39b..b87ca1b 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -3,6 +3,7 @@ from typing_extensions import Literal, TypeVar from .exceptions import NoContentError, raise_for_code_and_type import json +from io import BytesIO RequestVerb = Literal["get", "post", "put", "patch", "delete"] @@ -243,12 +244,26 @@ async def make_request( ) else: if data is not None: + form_data = aiohttp.FormData() + form_data.add_field('file', BytesIO(data), content_type=headers.get("Content-Type", "application/octet-stream"), filename="file") + + if self.params and isinstance(self.params, dict): + for key, value in self.params.items(): + if isinstance(value, bool): + form_data.add_field(key, str(value).lower()) + elif isinstance(value, (list, dict, tuple, int, float)): + form_data.add_field(key, json.dumps(value)) + else: + form_data.add_field(key, str(value)) + + multipart_headers = headers.copy() + multipart_headers.pop('Content-Type', None) + return await session.request( verb, url, - data=data, - params=converted_params, # Use converted params - headers=headers, + data=form_data, + headers=multipart_headers, ) else: return await session.request( From 05dcdc97b4475e51de1aa998932e6acdd94b3ce0 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 08:23:01 -0700 Subject: [PATCH 10/53] fix: param encoding for vision endpoints. --- jigsawstack/vision.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index 4bb6ff5..49191af 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -3,7 +3,6 @@ from .request import Request, RequestConfig from .async_request import AsyncRequest, AsyncRequestConfig from ._config import ClientConfig -from .helpers import build_path from ._types import BaseResponse @@ -190,6 +189,8 @@ def vocr( blob: Union[VOCRParams, bytes], options: VOCRParams = None, ) -> OCRResponse: + path = "/vocr" + options = options or {} if isinstance( blob, dict ): # If params is provided as a dict, we assume it's the first argument @@ -201,8 +202,6 @@ def vocr( ).perform_with_content() return resp - options = options or {} - path = build_path(base_path="/vocr", params=options) content_type = options.get("content_type", "application/octet-stream") headers = {"Content-Type": content_type} @@ -230,17 +229,17 @@ def object_detection( blob: Union[ObjectDetectionParams, bytes], options: ObjectDetectionParams = None, ) -> ObjectDetectionResponse: + path = "/object_detection" + options = options or {} if isinstance(blob, dict): resp = Request( config=self.config, - path="/object_detection", + path=path, params=cast(Dict[Any, Any], blob), verb="post", ).perform_with_content() return resp - - options = options or {} - path = build_path(base_path="/object_detection", params=options) + content_type = options.get("content_type", "application/octet-stream") headers = {"Content-Type": content_type} @@ -281,17 +280,17 @@ async def vocr( blob: Union[VOCRParams, bytes], options: VOCRParams = None, ) -> OCRResponse: + path = "/vocr" + options = options or {} if isinstance(blob, dict): resp = await AsyncRequest( config=self.config, - path="/vocr", + path=path, params=cast(Dict[Any, Any], blob), verb="post", ).perform_with_content() return resp - options = options or {} - path = build_path(base_path="/vocr", params=options) content_type = options.get("content_type", "application/octet-stream") headers = {"Content-Type": content_type} @@ -319,19 +318,19 @@ async def object_detection( blob: Union[ObjectDetectionParams, bytes], options: ObjectDetectionParams = None, ) -> ObjectDetectionResponse: + path = "/object_detection" + options = options or {} if isinstance( blob, dict ): # If params is provided as a dict, we assume it's the first argument resp = await AsyncRequest( config=self.config, - path="/object_detection", + path=path, params=cast(Dict[Any, Any], blob), verb="post", ).perform_with_content() return resp - options = options or {} - path = build_path(base_path="/object_detection", params=options) content_type = options.get("content_type", "application/octet-stream") headers = {"Content-Type": content_type} From 6819aedfde0226c83ba4ace4cf79a25f11b63510 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 08:28:07 -0700 Subject: [PATCH 11/53] test: defining test cases for STT with format changes. --- tests/test_audio.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/tests/test_audio.py b/tests/test_audio.py index 96169d2..1345621 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -158,7 +158,7 @@ def test_speech_to_text(self, test_case): # Use params directly result = jigsaw.audio.speech_to_text(test_case["params"]) # Verify response structure - assert result["success"] == True + assert result["success"] assert result.get("text", None) is not None and isinstance(result["text"], str) # Check for chunks @@ -186,14 +186,8 @@ def test_speech_to_text_webhook(self, test_case): else: # Use params directly result = jigsaw.audio.speech_to_text(test_case["params"]) - - print(f"Test {test_case['name']}: Webhook response") - # Verify webhook response structure - assert result["success"] == True - assert result.get("status") in ["processing", "error"] - assert "id" in result - assert isinstance(result["id"], str) + assert result["success"] except JigsawStackError as e: # Webhook URLs might fail if invalid @@ -220,7 +214,7 @@ async def test_speech_to_text_async(self, test_case): result = await async_jigsaw.audio.speech_to_text(test_case["params"]) # Verify response structure - assert result["success"] == True + assert result["success"] assert result.get("text", None) is not None and isinstance(result["text"], str) # Check for chunks @@ -252,11 +246,8 @@ async def test_speech_to_text_webhook_async(self, test_case): print(f"Async test {test_case['name']}: Webhook response") # Verify webhook response structure - assert result["success"] == True - assert result.get("status") in ["processing", "error"] - assert "id" in result - assert isinstance(result["id"], str) - + assert result["success"] + except JigsawStackError as e: # Webhook URLs might fail if invalid print(f"Expected possible error for async webhook test {test_case['name']}: {e}") \ No newline at end of file From 7c8c46768fa7d444ee91de334de9d7e9d822afe7 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 13:10:43 -0700 Subject: [PATCH 12/53] test: defining test cases for classification endpoint. --- tests/test_classification.py | 190 ++++++++++++++++++++++++----------- 1 file changed, 134 insertions(+), 56 deletions(-) diff --git a/tests/test_classification.py b/tests/test_classification.py index 6c301c5..98ce725 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -1,75 +1,118 @@ from jigsawstack.exceptions import JigsawStackError -from jigsawstack import JigsawStack - +import jigsawstack import pytest +import logging +from dotenv import load_dotenv +import os -# flake8: noqa +load_dotenv() -client = JigsawStack() +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -@pytest.mark.parametrize( - "dataset,labels", - [ - ( - [ +TEST_CASES = [ + { + "name": "text_classification_programming", + "params": { + "dataset": [ {"type": "text", "value": "I love programming"}, {"type": "text", "value": "I love reading books"}, {"type": "text", "value": "I love watching movies"}, {"type": "text", "value": "I love playing games"}, ], - [ + "labels": [ {"type": "text", "value": "programming"}, {"type": "text", "value": "reading"}, {"type": "text", "value": "watching"}, {"type": "text", "value": "playing"}, ], - ), - ( - [ + }, + }, + { + "name": "text_classification_sentiment", + "params": { + "dataset": [ {"type": "text", "value": "This is awesome!"}, {"type": "text", "value": "I hate this product"}, {"type": "text", "value": "It's okay, nothing special"}, ], - [ + "labels": [ {"type": "text", "value": "positive"}, {"type": "text", "value": "negative"}, {"type": "text", "value": "neutral"}, ], - ), - ( - [ + }, + }, + { + "name": "text_classification_weather", + "params": { + "dataset": [ {"type": "text", "value": "The weather is sunny today"}, {"type": "text", "value": "It's raining heavily outside"}, {"type": "text", "value": "Snow is falling gently"}, ], - [ + "labels": [ {"type": "text", "value": "sunny"}, {"type": "text", "value": "rainy"}, {"type": "text", "value": "snowy"}, ], - ), - ], -) -def test_classification_text_success_response(dataset, labels) -> None: - params = { - "dataset": dataset, - "labels": labels, - } - try: - result = client.classification.text(params) - print(result) - assert result["success"] == True - except JigsawStackError as e: - print(str(e)) - assert e.message == "Failed to parse API response. Please try again." - - -@pytest.mark.parametrize( - "dataset,labels", - [ - ( - [ + }, + }, + { + "name": "image_classification_fruits", + "params": { + "dataset": [ + { + "type": "image", + "value": "https://as2.ftcdn.net/v2/jpg/02/24/11/57/1000_F_224115780_2ssvcCoTfQrx68Qsl5NxtVIDFWKtAgq2.jpg", + }, + { + "type": "image", + "value": "https://t3.ftcdn.net/jpg/02/95/44/22/240_F_295442295_OXsXOmLmqBUfZreTnGo9PREuAPSLQhff.jpg", + }, + { + "type": "image", + "value": "https://as1.ftcdn.net/v2/jpg/05/54/94/46/1000_F_554944613_okdr3fBwcE9kTOgbLp4BrtVi8zcKFWdP.jpg", + }, + ], + "labels": [ + {"type": "text", "value": "banana"}, + { + "type": "image", + "value": "https://upload.wikimedia.org/wikipedia/commons/8/8a/Banana-Single.jpg", + }, + {"type": "text", "value": "kisses"}, + ], + }, + }, + { + "name": "text_classification_multiple_labels", + "params": { + "dataset": [ + { + "type": "text", + "value": "Python is a great programming language for data science", + }, + { + "type": "text", + "value": "JavaScript is essential for web development", + }, + ], + "labels": [ + {"type": "text", "value": "programming"}, + {"type": "text", "value": "data science"}, + {"type": "text", "value": "web development"}, + ], + "multiple_labels": True, + }, + }, + { + "name": "image_classification_with_multiple_labels", + "params": { + "dataset": [ { "type": "image", "value": "https://as2.ftcdn.net/v2/jpg/02/24/11/57/1000_F_224115780_2ssvcCoTfQrx68Qsl5NxtVIDFWKtAgq2.jpg", @@ -83,7 +126,7 @@ def test_classification_text_success_response(dataset, labels) -> None: "value": "https://as1.ftcdn.net/v2/jpg/05/54/94/46/1000_F_554944613_okdr3fBwcE9kTOgbLp4BrtVi8zcKFWdP.jpg", }, ], - [ + "labels": [ {"type": "text", "value": "banana"}, { "type": "image", @@ -91,18 +134,53 @@ def test_classification_text_success_response(dataset, labels) -> None: }, {"type": "text", "value": "kisses"}, ], - ), - ], -) -def test_classification_image_success_response(dataset, labels) -> None: - params = { - "dataset": dataset, - "labels": labels, - } - try: - result = client.classification.image(params) - print(result) - assert result["success"] == True - except JigsawStackError as e: - print(str(e)) - assert e.message == "Failed to parse API response. Please try again." + }, + }, +] + + +class TestClassificationSync: + """Test synchronous classification methods""" + + sync_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_classification(self, test_case): + """Test synchronous classification with various inputs""" + try: + result = jigsaw.classification(test_case["params"]) + assert result["success"] == True + assert "predictions" in result + if test_case.get("multiple_labels"): + # Ensure predictions are lists when multiple_labels is True + for prediction in result["predictions"]: + assert isinstance(prediction, list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestClassificationAsync: + """Test asynchronous classification methods""" + + async_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_classification_async(self, test_case): + """Test asynchronous classification with various inputs""" + try: + result = await async_jigsaw.classification(test_case["params"]) + assert result["success"] == True + assert "predictions" in result + + if test_case.get("multiple_labels"): + # Ensure predictions are lists when multiple_labels is True + for prediction in result["predictions"]: + assert isinstance(prediction, list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 199eb27bfb0c0979cfffe3c06bc39edc373c802e Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 13:13:25 -0700 Subject: [PATCH 13/53] tests: formatting previously defined test cases. --- tests/test_classification.py | 4 +- tests/test_object_detection.py | 164 ++++++++++++++++----------------- 2 files changed, 82 insertions(+), 86 deletions(-) diff --git a/tests/test_classification.py b/tests/test_classification.py index 98ce725..a5cf66c 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -151,7 +151,7 @@ def test_classification(self, test_case): """Test synchronous classification with various inputs""" try: result = jigsaw.classification(test_case["params"]) - assert result["success"] == True + assert result["success"] assert "predictions" in result if test_case.get("multiple_labels"): # Ensure predictions are lists when multiple_labels is True @@ -175,7 +175,7 @@ async def test_classification_async(self, test_case): """Test asynchronous classification with various inputs""" try: result = await async_jigsaw.classification(test_case["params"]) - assert result["success"] == True + assert result["success"] assert "predictions" in result if test_case.get("multiple_labels"): diff --git a/tests/test_object_detection.py b/tests/test_object_detection.py index 031459f..8c0f409 100644 --- a/tests/test_object_detection.py +++ b/tests/test_object_detection.py @@ -4,8 +4,11 @@ import pytest import logging from dotenv import load_dotenv -load_dotenv() import os + +load_dotenv() + + logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -16,88 +19,81 @@ IMAGE_URL = "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" TEST_CASES = [ - { - "name": "with_url_only", - "params": { - "url": IMAGE_URL - }, - "blob": None, - "options": None, - }, - { - "name": "with_blob_only", - "params": None, - "blob": IMAGE_URL, - "options": None, - }, - { - "name": "annotated_image_true", - "blob": IMAGE_URL, - "options": { - "annotated_image": True - }, - }, - { - "name": "with_annotated_image_false", - "blob": IMAGE_URL, - "options": { - "annotated_image": False - }, + { + "name": "with_url_only", + "params": {"url": IMAGE_URL}, + "blob": None, + "options": None, + }, + { + "name": "with_blob_only", + "params": None, + "blob": IMAGE_URL, + "options": None, + }, + { + "name": "annotated_image_true", + "blob": IMAGE_URL, + "options": {"annotated_image": True}, + }, + { + "name": "with_annotated_image_false", + "blob": IMAGE_URL, + "options": {"annotated_image": False}, + }, + { + "name": "with_blob_both_features", + "blob": IMAGE_URL, + "options": { + "features": ["object_detection", "gui"], + "annotated_image": True, + "return_type": "url", }, - { - "name": "with_blob_both_features", - "blob": IMAGE_URL, - "options": { - "features": ["object_detection", "gui"], - "annotated_image": True, - "return_type": "url" - }, + }, + { + "name": "with_blob_gui_features", + "blob": IMAGE_URL, + "options": {"features": ["gui"], "annotated_image": False}, + }, + { + "name": "with_blob_object_detection_features", + "blob": IMAGE_URL, + "options": { + "features": ["object_detection"], + "annotated_image": True, + "return_type": "base64", }, - { - "name": "with_blob_gui_features", - "blob": IMAGE_URL, - "options": { - "features": ["gui"], - "annotated_image": False - }, + }, + { + "name": "with_prompts", + "blob": IMAGE_URL, + "options": { + "prompts": ["castle", "tree"], + "annotated_image": True, }, - { - "name": "with_blob_object_detection_features", - "blob": IMAGE_URL, - "options": { - "features": ["object_detection"], - "annotated_image": True, - "return_type": "base64" - }, + }, + { + "name": "with_all_options", + "blob": IMAGE_URL, + "options": { + "features": ["object_detection", "gui"], + "prompts": ["car", "road", "tree"], + "annotated_image": True, + "return_type": "base64", + "return_masks": False, }, - { - "name": "with_prompts", - "blob": IMAGE_URL, - "options": { - "prompts": ["castle", "tree"], - "annotated_image": True, - }, - }, - { - "name": "with_all_options", - "blob": IMAGE_URL, - "options": { - "features": ["object_detection", "gui"], - "prompts": ["car", "road", "tree"], - "annotated_image": True, - "return_type": "base64", - "return_masks": False, - }, - }, - ] + }, +] class TestObjectDetectionSync: """Test synchronous object detection methods""" - + sync_test_cases = TEST_CASES - - @pytest.mark.parametrize("test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases]) + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) def test_object_detection(self, test_case): """Test synchronous object detection with various inputs""" try: @@ -105,15 +101,14 @@ def test_object_detection(self, test_case): # Download blob content blob_content = requests.get(test_case["blob"]).content result = jigsaw.vision.object_detection( - blob_content, - test_case.get("options", {}) + blob_content, test_case.get("options", {}) ) else: # Use params directly result = jigsaw.vision.object_detection(test_case["params"]) - + print(f"Test {test_case['name']}: {result}") - assert result["success"] == True + assert result["success"] except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") @@ -123,7 +118,9 @@ class TestObjectDetectionAsync: async_test_cases = TEST_CASES - @pytest.mark.parametrize("test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases]) + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) @pytest.mark.asyncio async def test_object_detection_async(self, test_case): """Test asynchronous object detection with various inputs""" @@ -132,14 +129,13 @@ async def test_object_detection_async(self, test_case): # Download blob content blob_content = requests.get(test_case["blob"]).content result = await async_jigsaw.vision.object_detection( - blob_content, - test_case.get("options", {}) + blob_content, test_case.get("options", {}) ) else: # Use params directly result = await async_jigsaw.vision.object_detection(test_case["params"]) - + print(f"Test {test_case['name']}: {result}") - assert result["success"] == True + assert result["success"] except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") \ No newline at end of file + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From b695d3648518bf4759baf085e0adfcc4483efcc6 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 13:15:18 -0700 Subject: [PATCH 14/53] chore: clean up redudant test file, actual store test file is test_file_store. --- tests/test_store.py | 51 --------------------------------------------- 1 file changed, 51 deletions(-) delete mode 100644 tests/test_store.py diff --git a/tests/test_store.py b/tests/test_store.py deleted file mode 100644 index 4d59ac7..0000000 --- a/tests/test_store.py +++ /dev/null @@ -1,51 +0,0 @@ -from unittest.mock import MagicMock -import unittest -from jigsawstack.exceptions import JigsawStackError -from jigsawstack import AsyncJigsawStack -import pytest -import asyncio -import logging - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -@pytest.mark.skip(reason="Skipping TestWebAPI class for now") -class TestAsyncFileOperations: - """ - Test class for async file operations. - Add your file operation tests here. - """ - - def test_async_file_upload(self): - # Template for future file upload tests - pass - - def test_async_file_retrieval(self): - # Template for future file retrieval tests - pass - - def test_async_file_deletion(self): - # Template for future file deletion tests - pass - - -# Example file upload test -# Uncomment and modify as needed -""" -def test_async_file_upload_example(): - async def _test(): - client = AsyncJigsawStack() - try: - file_content = b"test file content" - result = await client.store.upload( - file_content, - {"filename": "test.txt", "overwrite": True} - ) - logger.info(result) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) -""" From 621d7ba30d56d94e0c0637f0673fea973a38358f Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:24:41 -0700 Subject: [PATCH 15/53] fix: naming convention and drop deprecated overflow mode chunk. --- jigsawstack/{embeddingV2.py => embedding_v2.py} | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) rename jigsawstack/{embeddingV2.py => embedding_v2.py} (97%) diff --git a/jigsawstack/embeddingV2.py b/jigsawstack/embedding_v2.py similarity index 97% rename from jigsawstack/embeddingV2.py rename to jigsawstack/embedding_v2.py index d7559bb..64c7d11 100644 --- a/jigsawstack/embeddingV2.py +++ b/jigsawstack/embedding_v2.py @@ -2,7 +2,6 @@ from typing_extensions import NotRequired, TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from .helpers import build_path from .embedding import Chunk @@ -14,7 +13,7 @@ class EmbeddingV2Params(TypedDict): type: Literal["text", "text-other", "image", "audio", "pdf"] url: NotRequired[str] file_store_key: NotRequired[str] - token_overflow_mode: NotRequired[Literal["truncate", "chunk", "error"]] = "chunk" + token_overflow_mode: NotRequired[Literal["truncate", "error"]] speaker_fingerprint: NotRequired[bool] From 91f8e0fdfaf49c13792cf02b3dabd1b5d2ea2a27 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:33:00 -0700 Subject: [PATCH 16/53] test: defining test cases for embedding v1 & v2 --- tests/test_embedding.py | 327 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 327 insertions(+) create mode 100644 tests/test_embedding.py diff --git a/tests/test_embedding.py b/tests/test_embedding.py new file mode 100644 index 0000000..0106751 --- /dev/null +++ b/tests/test_embedding.py @@ -0,0 +1,327 @@ +import requests +from jigsawstack.exceptions import JigsawStackError +import jigsawstack +import pytest +import logging +from dotenv import load_dotenv +import os + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) + +SAMPLE_TEXT = "The quick brown fox jumps over the lazy dog. This is a sample text for embedding generation." +SAMPLE_IMAGE_URL = "https://images.unsplash.com/photo-1542931287-023b922fa89b?q=80&w=2574&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" +SAMPLE_AUDIO_URL = "https://jigsawstack.com/preview/stt-example.wav" +SAMPLE_PDF_URL = ( + "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" +) + +# Test cases for Embedding V1 +EMBEDDING_V1_TEST_CASES = [ + { + "name": "text_embedding_basic", + "params": { + "type": "text", + "text": SAMPLE_TEXT, + }, + }, + { + "name": "text_embedding_with_truncate", + "params": { + "type": "text", + "text": SAMPLE_TEXT * 100, # Long text to test truncation + "token_overflow_mode": "truncate", + }, + }, + { + "name": "text_embedding_with_error_mode", + "params": { + "type": "text", + "text": SAMPLE_TEXT, + "token_overflow_mode": "error", + }, + }, + { + "name": "image_embedding_from_url", + "params": { + "type": "image", + "url": SAMPLE_IMAGE_URL, + }, + }, + { + "name": "audio_embedding_from_url", + "params": { + "type": "audio", + "url": SAMPLE_AUDIO_URL, + }, + }, + { + "name": "pdf_embedding_from_url", + "params": { + "type": "pdf", + "url": SAMPLE_PDF_URL, + }, + }, + { + "name": "text_other_type", + "params": { + "type": "text-other", + "text": "This is a different text type for embedding", + }, + }, +] + +# Test cases for Embedding V2 +EMBEDDING_V2_TEST_CASES = [ + { + "name": "text_embedding_v2_basic", + "params": { + "type": "text", + "text": SAMPLE_TEXT, + }, + }, + { + "name": "text_embedding_v2_with_error", + "params": { + "type": "text", + "text": SAMPLE_TEXT * 100, # Long text to test chunking + "token_overflow_mode": "error", + }, + }, + { + "name": "text_embedding_v2_with_truncate", + "params": { + "type": "text", + "text": SAMPLE_TEXT * 100, + "token_overflow_mode": "truncate", + }, + }, + { + "name": "text_embedding_v2_with_error_mode", + "params": { + "type": "text", + "text": SAMPLE_TEXT, + "token_overflow_mode": "error", + }, + }, + { + "name": "image_embedding_v2_from_url", + "params": { + "type": "image", + "url": SAMPLE_IMAGE_URL, + }, + }, + { + "name": "audio_embedding_v2_basic", + "params": { + "type": "audio", + "url": SAMPLE_AUDIO_URL, + }, + }, + { + "name": "audio_embedding_v2_with_speaker_fingerprint", + "params": { + "type": "audio", + "url": SAMPLE_AUDIO_URL, + "speaker_fingerprint": True, + }, + }, + { + "name": "pdf_embedding_v2_from_url", + "params": { + "type": "pdf", + "url": SAMPLE_PDF_URL, + }, + }, +] + +# Test cases for blob inputs +BLOB_TEST_CASES = [ + { + "name": "image_blob_embedding", + "blob_url": SAMPLE_IMAGE_URL, + "options": { + "type": "image", + }, + }, + { + "name": "pdf_blob_embedding", + "blob_url": SAMPLE_PDF_URL, + "options": { + "type": "pdf", + }, + }, +] + + +class TestEmbeddingV1Sync: + """Test synchronous Embedding V1 methods""" + + sync_test_cases = EMBEDDING_V1_TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_embedding_v1(self, test_case): + """Test synchronous embedding v1 with various inputs""" + try: + result = jigsaw.embedding(test_case["params"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + if "chunks" in result: + assert isinstance(result["chunks"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize( + "test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES] + ) + def test_embedding_v1_blob(self, test_case): + """Test synchronous embedding v1 with blob inputs""" + try: + # Download blob content + blob_content = requests.get(test_case["blob_url"]).content + result = jigsaw.embedding(blob_content, test_case["options"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestEmbeddingV1Async: + """Test asynchronous Embedding V1 methods""" + + async_test_cases = EMBEDDING_V1_TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_embedding_v1_async(self, test_case): + """Test asynchronous embedding v1 with various inputs""" + try: + result = await async_jigsaw.embedding(test_case["params"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + if "chunks" in result: + assert isinstance(result["chunks"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize( + "test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES] + ) + @pytest.mark.asyncio + async def test_embedding_v1_blob_async(self, test_case): + """Test asynchronous embedding v1 with blob inputs""" + try: + # Download blob content + blob_content = requests.get(test_case["blob_url"]).content + result = await async_jigsaw.embedding(blob_content, test_case["options"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestEmbeddingV2Sync: + """Test synchronous Embedding V2 methods""" + + sync_test_cases = EMBEDDING_V2_TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_embedding_v2(self, test_case): + """Test synchronous embedding v2 with various inputs""" + try: + result = jigsaw.embeddingV2(test_case["params"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + + # Check for chunks when chunking mode is used + if test_case["params"].get("token_overflow_mode") == "error": + assert "chunks" in result + assert isinstance(result["chunks"], list) + + # Check for speaker embeddings when speaker fingerprint is requested + if test_case["params"].get("speaker_fingerprint"): + assert "speaker_embeddings" in result + assert isinstance(result["speaker_embeddings"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize( + "test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES] + ) + def test_embedding_v2_blob(self, test_case): + """Test synchronous embedding v2 with blob inputs""" + try: + # Download blob content + blob_content = requests.get(test_case["blob_url"]).content + result = jigsaw.embeddingV2(blob_content, test_case["options"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestEmbeddingV2Async: + """Test asynchronous Embedding V2 methods""" + + async_test_cases = EMBEDDING_V2_TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_embedding_v2_async(self, test_case): + """Test asynchronous embedding v2 with various inputs""" + try: + result = await async_jigsaw.embeddingV2(test_case["params"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + + # Check for chunks when chunking mode is used + if test_case["params"].get("token_overflow_mode") == "error": + assert "chunks" in result + assert isinstance(result["chunks"], list) + + # Check for speaker embeddings when speaker fingerprint is requested + if test_case["params"].get("speaker_fingerprint"): + assert "speaker_embeddings" in result + assert isinstance(result["speaker_embeddings"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize( + "test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES] + ) + @pytest.mark.asyncio + async def test_embedding_v2_blob_async(self, test_case): + """Test asynchronous embedding v2 with blob inputs""" + try: + # Download blob content + blob_content = requests.get(test_case["blob_url"]).content + result = await async_jigsaw.embeddingV2(blob_content, test_case["options"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From a629fa25f5344522a850a04adf162b9ae31f3b40 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:45:26 -0700 Subject: [PATCH 17/53] feat: format fixes and new testcases for sentiment. --- jigsawstack/sentiment.py | 3 +- tests/test_sentiment.py | 145 +++++++++++++++++++++++++++++++++++---- 2 files changed, 134 insertions(+), 14 deletions(-) diff --git a/jigsawstack/sentiment.py b/jigsawstack/sentiment.py index 8970110..805dd80 100644 --- a/jigsawstack/sentiment.py +++ b/jigsawstack/sentiment.py @@ -1,8 +1,7 @@ from typing import Any, Dict, List, Union, cast -from typing_extensions import NotRequired, TypedDict +from typing_extensions import TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from ._types import BaseResponse diff --git a/tests/test_sentiment.py b/tests/test_sentiment.py index cd3c602..e46e44f 100644 --- a/tests/test_sentiment.py +++ b/tests/test_sentiment.py @@ -1,21 +1,142 @@ -from unittest.mock import MagicMock -import unittest from jigsawstack.exceptions import JigsawStackError import jigsawstack - import pytest +import logging +from dotenv import load_dotenv +import os + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) + +TEST_CASES = [ + { + "name": "positive_sentiment_excited", + "params": { + "text": "I am so excited about this new product! It's absolutely amazing and I can't wait to use it every day." + }, + }, + { + "name": "negative_sentiment_disappointed", + "params": { + "text": "I'm really disappointed with this purchase. The quality is terrible and it broke after just one day." + }, + }, + { + "name": "neutral_sentiment_factual", + "params": { + "text": "The meeting is scheduled for 3 PM tomorrow in conference room B." + }, + }, + { + "name": "mixed_sentiment_paragraph", + "params": { + "text": "The product arrived on time which was great. However, the packaging was damaged. The item itself works fine, but the instructions were confusing." + }, + }, + { + "name": "positive_sentiment_love", + "params": { + "text": "I absolutely love this! Best purchase I've made all year. Highly recommend to everyone!" + }, + }, + { + "name": "negative_sentiment_angry", + "params": { + "text": "This is unacceptable! I want a refund immediately. Worst customer service ever!" + }, + }, + { + "name": "single_sentence_positive", + "params": {"text": "This made my day!"}, + }, + { + "name": "single_sentence_negative", + "params": {"text": "I hate this."}, + }, + { + "name": "complex_multi_sentence", + "params": { + "text": "The first part of the movie was boring and I almost fell asleep. But then it got really exciting! The ending was spectacular and now it's one of my favorites." + }, + }, + { + "name": "question_sentiment", + "params": { + "text": "Why is this product so amazing? I can't believe how well it works!" + }, + }, +] -# flake8: noqa -client = jigsawstack.JigsawStack() +class TestSentimentSync: + """Test synchronous sentiment analysis methods""" + sync_test_cases = TEST_CASES -@pytest.mark.skip(reason="Skipping TestWebAPI class for now") -class TestSentimentAPI(unittest.TestCase): - def test_sentiment_response_success(self) -> None: - params = {"text": "I am so excited"} + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_sentiment_analysis(self, test_case): + """Test synchronous sentiment analysis with various inputs""" try: - result = client.sentiment(params) - assert result["success"] == True + result = jigsaw.sentiment(test_case["params"]) + + assert result["success"] + assert "sentiment" in result + assert "emotion" in result["sentiment"] + assert "sentiment" in result["sentiment"] + assert "score" in result["sentiment"] + + # Check if sentences analysis is included + if "sentences" in result["sentiment"]: + assert isinstance(result["sentiment"]["sentences"], list) + for sentence in result["sentiment"]["sentences"]: + assert "text" in sentence + assert "sentiment" in sentence + assert "emotion" in sentence + assert "score" in sentence + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestSentimentAsync: + """Test asynchronous sentiment analysis methods""" + + async_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_sentiment_analysis_async(self, test_case): + """Test asynchronous sentiment analysis with various inputs""" + try: + result = await async_jigsaw.sentiment(test_case["params"]) + + assert result["success"] + assert "sentiment" in result + assert "emotion" in result["sentiment"] + assert "sentiment" in result["sentiment"] + assert "score" in result["sentiment"] + + # Check if sentences analysis is included + if "sentences" in result["sentiment"]: + assert isinstance(result["sentiment"]["sentences"], list) + for sentence in result["sentiment"]["sentences"]: + assert "text" in sentence + assert "sentiment" in sentence + assert "emotion" in sentence + assert "score" in sentence + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 5ea1c3d0e9c14e58896e58676546bb83995a3814 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:46:33 -0700 Subject: [PATCH 18/53] chore: deleting outdated vision test file. --- tests/test_vision.py | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 tests/test_vision.py diff --git a/tests/test_vision.py b/tests/test_vision.py deleted file mode 100644 index 7d8fcf0..0000000 --- a/tests/test_vision.py +++ /dev/null @@ -1,28 +0,0 @@ -from unittest.mock import MagicMock -import unittest -from jigsawstack.exceptions import JigsawStackError -from jigsawstack import AsyncJigsawStack -import pytest -import asyncio -import logging - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -def test_async_vocr_response(): - async def _test(): - client = AsyncJigsawStack() - try: - result = await client.vision.vocr( - { - "url": "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg?t=2024-03-22T09%3A22%3A48.442Z", - "prompt": ["Hello"], - } - ) - - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) From 0e8bdb392bf5f25c3d81503f24aee413260bad6e Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:50:36 -0700 Subject: [PATCH 19/53] feat: updated formating for JigsawStack module. --- jigsawstack/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jigsawstack/__init__.py b/jigsawstack/__init__.py index e860936..2c5d775 100644 --- a/jigsawstack/__init__.py +++ b/jigsawstack/__init__.py @@ -16,7 +16,7 @@ from .image_generation import ImageGeneration, AsyncImageGeneration from .classification import Classification, AsyncClassification from .prompt_engine import PromptEngine, AsyncPromptEngine -from .embeddingV2 import EmbeddingV2, AsyncEmbeddingV2 +from .embedding_v2 import EmbeddingV2, AsyncEmbeddingV2 class JigsawStack: @@ -51,7 +51,7 @@ def __init__( if api_url is None: api_url = os.environ.get("JIGSAWSTACK_API_URL") if api_url is None: - api_url = f"https://api.jigsawstack.com/" + api_url = "https://api.jigsawstack.com/" self.api_key = api_key self.api_url = api_url @@ -171,7 +171,7 @@ def __init__( if api_url is None: api_url = os.environ.get("JIGSAWSTACK_API_URL") if api_url is None: - api_url = f"https://api.jigsawstack.com/" + api_url = "https://api.jigsawstack.com/" self.api_key = api_key self.api_url = api_url From 24fc7913cb1b96237e9f511a14183e50c4cdb270 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:51:23 -0700 Subject: [PATCH 20/53] feat: updated formatting for async_request.py --- jigsawstack/async_request.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index b87ca1b..4f90a2c 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -245,20 +245,23 @@ async def make_request( else: if data is not None: form_data = aiohttp.FormData() - form_data.add_field('file', BytesIO(data), content_type=headers.get("Content-Type", "application/octet-stream"), filename="file") - + form_data.add_field( + "file", + BytesIO(data), + content_type=headers.get( + "Content-Type", "application/octet-stream" + ), + filename="file", + ) + if self.params and isinstance(self.params, dict): - for key, value in self.params.items(): - if isinstance(value, bool): - form_data.add_field(key, str(value).lower()) - elif isinstance(value, (list, dict, tuple, int, float)): - form_data.add_field(key, json.dumps(value)) - else: - form_data.add_field(key, str(value)) - + form_data.add_field( + "body", json.dumps(self.params), content_type="application/json" + ) + multipart_headers = headers.copy() - multipart_headers.pop('Content-Type', None) - + multipart_headers.pop("Content-Type", None) + return await session.request( verb, url, From b7bc5befdfea05916123add9b669750a4950e25c Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:52:14 -0700 Subject: [PATCH 21/53] chore: deleting outdated async test cases for embedding. --- tests/test_embedding_async.py | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 tests/test_embedding_async.py diff --git a/tests/test_embedding_async.py b/tests/test_embedding_async.py deleted file mode 100644 index bf2e1e6..0000000 --- a/tests/test_embedding_async.py +++ /dev/null @@ -1,23 +0,0 @@ -from unittest.mock import MagicMock -import unittest -from jigsawstack.exceptions import JigsawStackError -from jigsawstack import AsyncJigsawStack -import pytest -import asyncio -import logging - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -def test_async_embedding_generation_response(): - async def _test(): - client = AsyncJigsawStack() - try: - result = await client.embedding({"text": "Hello, World!", "type": "text"}) - logger.info(result) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) From 3974f32f835324adc35c55693ed08946c76afc1e Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:52:56 -0700 Subject: [PATCH 22/53] feat: updating demo url across jigsawstack-python. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index edf4020..e13c6bf 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ VOCR: ```py params = { - "url": "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg?t=2024-03-22T09%3A22%3A48.442Z" + "url": "https://images.unsplash.com/photo-1542931287-023b922fa89b?q=80&w=2574&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D?t=2024-03-22T09%3A22%3A48.442Z" } result = jigsaw.vision.vocr(params) ``` From efcff9c1c7b03a28b07f61b980c8970ce77a588a Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 17:27:31 -0700 Subject: [PATCH 23/53] feat: updating type to accept float for prediciton, introducing new test cases for prediction endpoint. --- jigsawstack/prediction.py | 9 +- tests/test_prediction.py | 191 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 197 insertions(+), 3 deletions(-) create mode 100644 tests/test_prediction.py diff --git a/jigsawstack/prediction.py b/jigsawstack/prediction.py index d24168b..84bfbf9 100644 --- a/jigsawstack/prediction.py +++ b/jigsawstack/prediction.py @@ -1,15 +1,14 @@ from typing import Any, Dict, List, Union, cast -from typing_extensions import NotRequired, TypedDict +from typing_extensions import TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from ._types import BaseResponse class Dataset(TypedDict): - value: Union[int, str] + value: Union[int, float, str] """ The value of the dataset. """ @@ -32,6 +31,10 @@ class PredictionParams(TypedDict): class PredictionResponse(BaseResponse): + steps: int + """ + The number of steps predicted. + """ prediction: List[Dataset] """ The predictions made on the dataset. diff --git a/tests/test_prediction.py b/tests/test_prediction.py new file mode 100644 index 0000000..6069140 --- /dev/null +++ b/tests/test_prediction.py @@ -0,0 +1,191 @@ +from jigsawstack.exceptions import JigsawStackError +import jigsawstack +import pytest +import logging +from dotenv import load_dotenv +import os +from datetime import datetime, timedelta + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) + + +def generate_dates(start_date, num_days): + dates = [] + for i in range(num_days): + date = start_date + timedelta(days=i) + dates.append(date.strftime("%Y-%m-%d %H:%M:%S")) + return dates + + +start = datetime(2024, 1, 1) +dates = generate_dates(start, 30) +dates = [str(date) for date in dates] + +TEST_CASES = [ + { + "name": "linear_growth_pattern", + "params": { + "dataset": [{"date": dates[i], "value": 100 + (i * 10)} for i in range(10)], + "steps": 5, + }, + }, + { + "name": "exponential_growth_pattern", + "params": { + "dataset": [{"date": dates[i], "value": 100 * (1.1**i)} for i in range(10)], + "steps": 3, + }, + }, + { + "name": "seasonal_pattern", + "params": { + "dataset": [ + {"date": dates[i], "value": 100 + (50 * (i % 7))} for i in range(21) + ], + "steps": 7, + }, + }, + { + "name": "single_step_prediction", + "params": { + "dataset": [{"date": dates[i], "value": 200 + (i * 5)} for i in range(15)], + "steps": 1, + }, + }, + { + "name": "large_dataset_prediction", + "params": { + "dataset": [ + {"date": dates[i], "value": 1000 + (i * 20)} for i in range(30) + ], + "steps": 10, + }, + }, + { + "name": "declining_trend", + "params": { + "dataset": [{"date": dates[i], "value": 500 - (i * 10)} for i in range(10)], + "steps": 5, + }, + }, + { + "name": "volatile_data", + "params": { + "dataset": [ + {"date": dates[0], "value": 100}, + {"date": dates[1], "value": 150}, + {"date": dates[2], "value": 80}, + {"date": dates[3], "value": 200}, + {"date": dates[4], "value": 120}, + {"date": dates[5], "value": 180}, + {"date": dates[6], "value": 90}, + {"date": dates[7], "value": 160}, + ], + "steps": 4, + }, + }, + { + "name": "constant_values", + "params": { + "dataset": [{"date": dates[i], "value": 100} for i in range(10)], + "steps": 3, + }, + }, + { + "name": "string_values_prediction", + "params": { + "dataset": [ + {"date": dates[0], "value": "33.4"}, + {"date": dates[1], "value": "33.6"}, + {"date": dates[2], "value": "33.6"}, + {"date": dates[3], "value": "33.0"}, + {"date": dates[4], "value": "265.0"}, + {"date": dates[5], "value": "80"}, + {"date": dates[6], "value": "90.45"}, + ], + "steps": 3, + }, + }, + { + "name": "minimal_dataset", + "params": { + "dataset": [ + {"date": dates[0], "value": 50}, + {"date": dates[1], "value": 60}, + {"date": dates[2], "value": 70}, + {"date": dates[3], "value": 80}, + {"date": dates[4], "value": 90}, + ], + "steps": 2, + }, + }, +] + + +class TestPredictionSync: + """Test synchronous prediction methods""" + + sync_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_prediction(self, test_case): + """Test synchronous prediction with various inputs""" + try: + result = jigsaw.prediction(test_case["params"]) + + assert result["success"] + assert "prediction" in result + assert isinstance(result["prediction"], list) + + # Verify the number of predictions matches the requested steps + assert len(result["prediction"]) == test_case["params"]["steps"] + + # Verify each prediction has the required fields + for prediction in result["prediction"]: + assert "date" in prediction + assert "value" in prediction + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestPredictionAsync: + """Test asynchronous prediction methods""" + + async_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_prediction_async(self, test_case): + """Test asynchronous prediction with various inputs""" + try: + result = await async_jigsaw.prediction(test_case["params"]) + + assert result["success"] + assert "prediction" in result + assert isinstance(result["prediction"], list) + + # Verify the number of predictions matches the requested steps + assert len(result["prediction"]) == test_case["params"]["steps"] + + # Verify each prediction has the required fields + for prediction in result["prediction"]: + assert "date" in prediction + assert "value" in prediction + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 2a47fd0c411a54e4bb227804334a00eebc1362cf Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 17:37:49 -0700 Subject: [PATCH 24/53] test: defining new test cases for summary endpoint. --- tests/test_summary.py | 189 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 tests/test_summary.py diff --git a/tests/test_summary.py b/tests/test_summary.py new file mode 100644 index 0000000..46b5229 --- /dev/null +++ b/tests/test_summary.py @@ -0,0 +1,189 @@ +from jigsawstack.exceptions import JigsawStackError +import jigsawstack +import pytest +import logging +from dotenv import load_dotenv +import os + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) + +LONG_TEXT = """ +Artificial Intelligence (AI) has become one of the most transformative technologies of the 21st century. +From healthcare to finance, transportation to entertainment, AI is reshaping industries and changing the way we live and work. +Machine learning algorithms can now diagnose diseases with remarkable accuracy, predict market trends, and even create art. +Natural language processing has enabled computers to understand and generate human language, leading to the development of sophisticated chatbots and virtual assistants. +Computer vision systems can identify objects, faces, and activities in images and videos with superhuman precision. +However, the rapid advancement of AI also raises important ethical questions about privacy, job displacement, and the potential for bias in algorithmic decision-making. +As we continue to develop more powerful AI systems, it's crucial that we consider their societal impact and work to ensure that the benefits of AI are distributed equitably. +The future of AI holds immense promise, but it will require careful planning, regulation, and collaboration between technologists, policymakers, and society at large to realize its full potential while mitigating its risks. +""" + +ARTICLE_URL = "https://en.wikipedia.org/wiki/Artificial_intelligence" +PDF_URL = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" + +TEST_CASES = [ + { + "name": "text_summary_default", + "params": { + "text": LONG_TEXT, + }, + }, + { + "name": "text_summary_with_text_type", + "params": { + "text": LONG_TEXT, + "type": "text", + }, + }, + { + "name": "text_summary_with_points_type", + "params": { + "text": LONG_TEXT, + "type": "points", + }, + }, + { + "name": "text_summary_with_max_points", + "params": { + "text": LONG_TEXT, + "type": "points", + "max_points": 5, + }, + }, + { + "name": "text_summary_with_max_characters", + "params": { + "text": LONG_TEXT, + "type": "text", + "max_characters": 200, + }, + }, + { + "name": "short_text_summary", + "params": { + "text": "This is a short text that doesn't need much summarization.", + }, + }, + { + "name": "url_summary_default", + "params": { + "url": ARTICLE_URL, + }, + }, + { + "name": "url_summary_with_text_type", + "params": { + "url": ARTICLE_URL, + "type": "text", + }, + }, + { + "name": "url_summary_with_points_type", + "params": { + "url": ARTICLE_URL, + "type": "points", + "max_points": 7, + }, + }, + { + "name": "pdf_url_summary", + "params": { + "url": PDF_URL, + "type": "text", + }, + }, + { + "name": "complex_text_with_points_and_limit", + "params": { + "text": LONG_TEXT * 3, # Triple the text for more content + "type": "points", + "max_points": 10, + }, + }, + { + "name": "technical_text_summary", + "params": { + "text": """ + Machine learning is a subset of artificial intelligence that focuses on the development of algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience. + Deep learning, a subfield of machine learning, uses artificial neural networks with multiple layers to progressively extract higher-level features from raw input. + Supervised learning involves training models on labeled data, while unsupervised learning discovers patterns in unlabeled data. + Reinforcement learning enables agents to learn optimal behaviors through trial and error interactions with an environment. + """, + "type": "points", + "max_points": 4, + }, + }, +] + + +class TestSummarySync: + """Test synchronous summary methods""" + + sync_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_summary(self, test_case): + """Test synchronous summary with various inputs""" + try: + result = jigsaw.summary(test_case["params"]) + + assert result["success"] + assert "summary" in result + + if test_case["params"].get("type") == "points": + assert isinstance(result["summary"], list) + if "max_points" in test_case["params"]: + assert len(result["summary"]) <= test_case["params"]["max_points"] + else: + assert isinstance(result["summary"], str) + if "max_characters" in test_case["params"]: + assert ( + len(result["summary"]) <= test_case["params"]["max_characters"] + ) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestSummaryAsync: + """Test asynchronous summary methods""" + + async_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_summary_async(self, test_case): + """Test asynchronous summary with various inputs""" + try: + result = await async_jigsaw.summary(test_case["params"]) + + assert result["success"] + assert "summary" in result + + if test_case["params"].get("type") == "points": + assert isinstance(result["summary"], list) + if "max_points" in test_case["params"]: + assert len(result["summary"]) <= test_case["params"]["max_points"] + else: + assert isinstance(result["summary"], str) + if "max_characters" in test_case["params"]: + assert ( + len(result["summary"]) <= test_case["params"]["max_characters"] + ) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 7771416c08b171c8242c0fd1aeb0dad17687f204 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 17:47:59 -0700 Subject: [PATCH 25/53] feat: defining version for requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0a1a976..351d200 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ requests>=2.31.0 typing_extensions -aiohttp \ No newline at end of file +aiohttp>=3.12.15 \ No newline at end of file From 27cb92dff0dd2c6e982cbc70a4de493386586386 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 17:52:19 -0700 Subject: [PATCH 26/53] test: defining test cases for text_2_sql service. --- tests/test_sql.py | 272 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100644 tests/test_sql.py diff --git a/tests/test_sql.py b/tests/test_sql.py new file mode 100644 index 0000000..397b855 --- /dev/null +++ b/tests/test_sql.py @@ -0,0 +1,272 @@ +from jigsawstack.exceptions import JigsawStackError +import jigsawstack +import pytest +import logging +from dotenv import load_dotenv +import os + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) + +# Sample schemas for different databases +MYSQL_SCHEMA = """ +CREATE TABLE users ( + id INT PRIMARY KEY AUTO_INCREMENT, + username VARCHAR(255) NOT NULL, + email VARCHAR(255) UNIQUE NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE orders ( + id INT PRIMARY KEY AUTO_INCREMENT, + user_id INT, + product_name VARCHAR(255), + quantity INT, + price DECIMAL(10, 2), + order_date DATE, + FOREIGN KEY (user_id) REFERENCES users(id) +); +""" + +POSTGRESQL_SCHEMA = """ +CREATE TABLE employees ( + id SERIAL PRIMARY KEY, + name VARCHAR(100) NOT NULL, + department VARCHAR(50), + salary NUMERIC(10, 2), + hire_date DATE, + is_active BOOLEAN DEFAULT true +); + +CREATE TABLE departments ( + id SERIAL PRIMARY KEY, + name VARCHAR(50) UNIQUE NOT NULL, + budget NUMERIC(12, 2), + manager_id INTEGER REFERENCES employees(id) +); +""" + +SQLITE_SCHEMA = """ +CREATE TABLE products ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + category TEXT, + price REAL, + stock_quantity INTEGER DEFAULT 0 +); + +CREATE TABLE sales ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + product_id INTEGER, + quantity INTEGER, + sale_date TEXT, + total_amount REAL, + FOREIGN KEY (product_id) REFERENCES products(id) +); +""" + +TEST_CASES = [ + { + "name": "mysql_simple_select", + "params": { + "prompt": "Get all users from the users table", + "sql_schema": MYSQL_SCHEMA, + "database": "mysql", + }, + }, + { + "name": "mysql_join_query", + "params": { + "prompt": "Get all orders with user information for orders placed in the last 30 days", + "sql_schema": MYSQL_SCHEMA, + "database": "mysql", + }, + }, + { + "name": "mysql_aggregate_query", + "params": { + "prompt": "Calculate the total revenue per user", + "sql_schema": MYSQL_SCHEMA, + "database": "mysql", + }, + }, + { + "name": "postgresql_simple_select", + "params": { + "prompt": "Find all active employees", + "sql_schema": POSTGRESQL_SCHEMA, + "database": "postgresql", + }, + }, + { + "name": "postgresql_complex_join", + "params": { + "prompt": "Get all departments with their manager names and department budgets greater than 100000", + "sql_schema": POSTGRESQL_SCHEMA, + "database": "postgresql", + }, + }, + { + "name": "postgresql_window_function", + "params": { + "prompt": "Rank employees by salary within each department", + "sql_schema": POSTGRESQL_SCHEMA, + "database": "postgresql", + }, + }, + { + "name": "sqlite_simple_query", + "params": { + "prompt": "List all products in the electronics category", + "sql_schema": SQLITE_SCHEMA, + "database": "sqlite", + }, + }, + { + "name": "sqlite_aggregate_with_group", + "params": { + "prompt": "Calculate total sales amount for each product", + "sql_schema": SQLITE_SCHEMA, + "database": "sqlite", + }, + }, + { + "name": "default_database_type", + "params": { + "prompt": "Select all records from users table where email contains 'example.com'", + "sql_schema": MYSQL_SCHEMA, + # No database specified, should use default + }, + }, + { + "name": "complex_multi_table_query", + "params": { + "prompt": "Find users who have placed more than 5 orders with total value exceeding 1000", + "sql_schema": MYSQL_SCHEMA, + "database": "mysql", + }, + }, + { + "name": "insert_query", + "params": { + "prompt": "Insert a new user with username 'john_doe' and email 'john@example.com'", + "sql_schema": MYSQL_SCHEMA, + "database": "mysql", + }, + }, + { + "name": "update_query", + "params": { + "prompt": "Update the salary of all employees in the IT department by 10%", + "sql_schema": POSTGRESQL_SCHEMA, + "database": "postgresql", + }, + }, + { + "name": "delete_query", + "params": { + "prompt": "Delete all products with zero stock quantity", + "sql_schema": SQLITE_SCHEMA, + "database": "sqlite", + }, + }, + { + "name": "subquery_example", + "params": { + "prompt": "Find all users who have never placed an order", + "sql_schema": MYSQL_SCHEMA, + "database": "mysql", + }, + }, + { + "name": "date_filtering", + "params": { + "prompt": "Get all employees hired in the last year", + "sql_schema": POSTGRESQL_SCHEMA, + "database": "postgresql", + }, + }, +] + + +class TestSQLSync: + """Test synchronous SQL text-to-sql methods""" + + sync_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_text_to_sql(self, test_case): + """Test synchronous text-to-sql with various inputs""" + try: + result = jigsaw.text_to_sql(test_case["params"]) + + assert result["success"] + assert "sql" in result + assert isinstance(result["sql"], str) + assert len(result["sql"]) > 0 + + # Basic SQL validation - check if it contains SQL keywords + sql_lower = result["sql"].lower() + sql_keywords = [ + "select", + "insert", + "update", + "delete", + "create", + "alter", + "drop", + ] + assert any(keyword in sql_lower for keyword in sql_keywords), ( + "Generated SQL should contain valid SQL keywords" + ) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestSQLAsync: + """Test asynchronous SQL text-to-sql methods""" + + async_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_text_to_sql_async(self, test_case): + """Test asynchronous text-to-sql with various inputs""" + try: + result = await async_jigsaw.text_to_sql(test_case["params"]) + + assert result["success"] + assert "sql" in result + assert isinstance(result["sql"], str) + assert len(result["sql"]) > 0 + + sql_lower = result["sql"].lower() + sql_keywords = [ + "select", + "insert", + "update", + "delete", + "create", + "alter", + "drop", + ] + assert any(keyword in sql_lower for keyword in sql_keywords), ( + "Generated SQL should contain valid SQL keywords" + ) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 974f46fba58674d950d34c2e995ed189e9108080 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 17:54:16 -0700 Subject: [PATCH 27/53] feat: updating formating for sql dot py --- jigsawstack/sql.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/jigsawstack/sql.py b/jigsawstack/sql.py index d2dfc3b..efac7be 100644 --- a/jigsawstack/sql.py +++ b/jigsawstack/sql.py @@ -1,8 +1,7 @@ -from typing import Any, Dict, List, Union, cast, Literal +from typing import Any, Dict, Union, cast, Literal from typing_extensions import NotRequired, TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from ._types import BaseResponse From c1b6df1a900730b5eb00964de30e047085c190fe Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 18:03:20 -0700 Subject: [PATCH 28/53] test: defining test cases for validation services. --- tests/test_validate.py | 457 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 431 insertions(+), 26 deletions(-) diff --git a/tests/test_validate.py b/tests/test_validate.py index 51b8d3d..f6219b1 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -1,42 +1,447 @@ -from unittest.mock import MagicMock -import unittest +import requests from jigsawstack.exceptions import JigsawStackError -from jigsawstack import AsyncJigsawStack +import jigsawstack import pytest -import asyncio import logging +from dotenv import load_dotenv +import os + +load_dotenv() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) + +# Sample URLs for NSFW testing +SAFE_IMAGE_URL = ( + "https://images.unsplash.com/photo-1506905925346-21bda4d32df4?q=80&w=2070" +) +POTENTIALLY_NSFW_URL = "https://images.unsplash.com/photo-1512310604669-443f26c35f52?q=80&w=868&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + +SPAM_CHECK_TEST_CASES = [ + { + "name": "single_text_not_spam", + "params": { + "text": "I had a great experience with your product. The customer service was excellent!" + }, + }, + { + "name": "single_text_potential_spam", + "params": { + "text": "CLICK HERE NOW!!! FREE MONEY!!! Win $1000000 instantly! No credit card required! Act NOW!" + }, + }, + { + "name": "multiple_texts_mixed", + "params": { + "text": [ + "Thank you for your email. I'll get back to you soon.", + "BUY NOW! LIMITED TIME OFFER! 90% OFF EVERYTHING!", + "The meeting is scheduled for 3 PM tomorrow.", + ] + }, + }, + { + "name": "professional_email", + "params": { + "text": "Dear John, I hope this email finds you well. I wanted to follow up on our discussion from yesterday." + }, + }, + { + "name": "marketing_spam", + "params": { + "text": "Congratulations! You've been selected as our lucky winner! Claim your prize now at this link: bit.ly/win" + }, + }, +] + +# Spell Check Test Cases +SPELL_CHECK_TEST_CASES = [ + { + "name": "text_with_no_errors", + "params": {"text": "The quick brown fox jumps over the lazy dog."}, + }, + { + "name": "text_with_spelling_errors", + "params": {"text": "Thiss sentense has severel speling erors in it."}, + }, + { + "name": "text_with_language_code", + "params": {"text": "I recieved the pacakge yesterday.", "language_code": "en"}, + }, + { + "name": "mixed_correct_and_incorrect", + "params": { + "text": "The weather is beatiful today, but tommorow might be diferent." + }, + }, + { + "name": "technical_text", + "params": {"text": "The algorythm processes the datbase queries eficiently."}, + }, +] + +# Profanity Test Cases +PROFANITY_TEST_CASES = [ + { + "name": "clean_text", + "params": {"text": "This is a perfectly clean and professional message."}, + }, + { + "name": "text_with_profanity", + "params": { + "text": "This fucking thing is not working properly.", + "censor_replacement": "****", + }, + }, + { + "name": "text_with_custom_censor", + "params": { + "text": "What the fuck is going on here?", + "censor_replacement": "[CENSORED]", + }, + }, + { + "name": "mixed_clean_and_profane", + "params": {"text": "The weather is nice but this damn traffic is terrible."}, + }, + { + "name": "no_censor_replacement", + "params": {"text": "This text might contain some inappropriate words."}, + }, +] -@pytest.mark.skip(reason="Skipping TestWebAPI class for now") -def test_async_spam_check_response(): - async def _test(): - client = AsyncJigsawStack() +# NSFW Test Cases +NSFW_TEST_CASES = [ + { + "name": "safe_image_url", + "params": {"url": SAFE_IMAGE_URL}, + }, + { + "name": "landscape_image_url", + "params": {"url": POTENTIALLY_NSFW_URL}, + }, +] + +# NSFW Blob Test Cases +NSFW_BLOB_TEST_CASES = [ + { + "name": "safe_image_blob", + "blob_url": SAFE_IMAGE_URL, + "options": {}, + }, +] + + +class TestSpamCheckSync: + """Test synchronous spam check methods""" + + @pytest.mark.parametrize( + "test_case", + SPAM_CHECK_TEST_CASES, + ids=[tc["name"] for tc in SPAM_CHECK_TEST_CASES], + ) + def test_spam_check(self, test_case): + """Test synchronous spam check with various inputs""" try: - result = await client.validate.spamcheck({"text": "I am happy!"}) - logger.info(result) - assert result["success"] == True + result = jigsaw.validate.spamcheck(test_case["params"]) + + assert result["success"] + assert "check" in result + + # Check structure based on input type + if isinstance(test_case["params"]["text"], list): + assert isinstance(result["check"], list) + for check in result["check"]: + assert "is_spam" in check + assert "score" in check + assert isinstance(check["is_spam"], bool) + assert 0 <= check["score"] <= 1 + else: + assert "is_spam" in result["check"] + assert "score" in result["check"] + assert isinstance(result["check"]["is_spam"], bool) + assert 0 <= result["check"]["score"] <= 1 + except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - asyncio.run(_test()) +class TestSpellCheckSync: + """Test synchronous spell check methods""" -@pytest.mark.skip(reason="Skipping TestWebAPI class for now") -def test_async_spell_check_response(): - async def _test(): - client = AsyncJigsawStack() + @pytest.mark.parametrize( + "test_case", + SPELL_CHECK_TEST_CASES, + ids=[tc["name"] for tc in SPELL_CHECK_TEST_CASES], + ) + def test_spell_check(self, test_case): + """Test synchronous spell check with various inputs""" try: - result = await client.validate.spellcheck( - { - "text": "All the world's a stage, and all the men and women merely players. They have their exits and their entrances; And one man in his time plays many parts" - } - ) - logger.info(result) - assert result["success"] == True + result = jigsaw.validate.spellcheck(test_case["params"]) + + assert result["success"] + assert "misspellings_found" in result + assert "misspellings" in result + assert "auto_correct_text" in result + assert isinstance(result["misspellings_found"], bool) + assert isinstance(result["misspellings"], list) + assert isinstance(result["auto_correct_text"], str) + + # Check misspellings structure + for misspelling in result["misspellings"]: + assert "word" in misspelling + assert "startIndex" in misspelling + assert "endIndex" in misspelling + assert "expected" in misspelling + assert "auto_corrected" in misspelling + assert isinstance(misspelling["expected"], list) + assert isinstance(misspelling["auto_corrected"], bool) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestProfanitySync: + """Test synchronous profanity check methods""" + + @pytest.mark.parametrize( + "test_case", + PROFANITY_TEST_CASES, + ids=[tc["name"] for tc in PROFANITY_TEST_CASES], + ) + def test_profanity_check(self, test_case): + """Test synchronous profanity check with various inputs""" + try: + result = jigsaw.validate.profanity(test_case["params"]) + + assert result["success"] + assert "clean_text" in result + assert "profanities" in result + assert "profanities_found" in result + assert isinstance(result["profanities_found"], bool) + assert isinstance(result["profanities"], list) + assert isinstance(result["clean_text"], str) + + # Check profanities structure + for profanity in result["profanities"]: + assert "profanity" in profanity + assert "startIndex" in profanity + assert "endIndex" in profanity + except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestNSFWSync: + """Test synchronous NSFW check methods""" + + @pytest.mark.parametrize( + "test_case", NSFW_TEST_CASES, ids=[tc["name"] for tc in NSFW_TEST_CASES] + ) + def test_nsfw_check(self, test_case): + """Test synchronous NSFW check with various inputs""" + try: + result = jigsaw.validate.nsfw(test_case["params"]) + + assert result["success"] + assert "nsfw" in result + assert "nudity" in result + assert "gore" in result + assert "nsfw_score" in result + assert "nudity_score" in result + assert "gore_score" in result + + assert isinstance(result["nsfw"], bool) + assert isinstance(result["nudity"], bool) + assert isinstance(result["gore"], bool) + assert 0 <= result["nsfw_score"] <= 1 + assert 0 <= result["nudity_score"] <= 1 + assert 0 <= result["gore_score"] <= 1 + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize( + "test_case", + NSFW_BLOB_TEST_CASES, + ids=[tc["name"] for tc in NSFW_BLOB_TEST_CASES], + ) + def test_nsfw_check_blob(self, test_case): + """Test synchronous NSFW check with blob inputs""" + try: + # Download blob content + blob_content = requests.get(test_case["blob_url"]).content + result = jigsaw.validate.nsfw(blob_content, test_case["options"]) + + assert result["success"] + assert "nsfw" in result + assert "nudity" in result + assert "gore" in result + assert "nsfw_score" in result + assert "nudity_score" in result + assert "gore_score" in result + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + - asyncio.run(_test()) +# Async Test Classes + + +class TestSpamCheckAsync: + """Test asynchronous spam check methods""" + + @pytest.mark.parametrize( + "test_case", + SPAM_CHECK_TEST_CASES, + ids=[tc["name"] for tc in SPAM_CHECK_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_spam_check_async(self, test_case): + """Test asynchronous spam check with various inputs""" + try: + result = await async_jigsaw.validate.spamcheck(test_case["params"]) + + assert result["success"] + assert "check" in result + + # Check structure based on input type + if isinstance(test_case["params"]["text"], list): + assert isinstance(result["check"], list) + for check in result["check"]: + assert "is_spam" in check + assert "score" in check + assert isinstance(check["is_spam"], bool) + assert 0 <= check["score"] <= 1 + else: + assert "is_spam" in result["check"] + assert "score" in result["check"] + assert isinstance(result["check"]["is_spam"], bool) + assert 0 <= result["check"]["score"] <= 1 + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestSpellCheckAsync: + """Test asynchronous spell check methods""" + + @pytest.mark.parametrize( + "test_case", + SPELL_CHECK_TEST_CASES, + ids=[tc["name"] for tc in SPELL_CHECK_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_spell_check_async(self, test_case): + """Test asynchronous spell check with various inputs""" + try: + result = await async_jigsaw.validate.spellcheck(test_case["params"]) + + assert result["success"] + assert "misspellings_found" in result + assert "misspellings" in result + assert "auto_correct_text" in result + assert isinstance(result["misspellings_found"], bool) + assert isinstance(result["misspellings"], list) + assert isinstance(result["auto_correct_text"], str) + + # Check misspellings structure + for misspelling in result["misspellings"]: + assert "word" in misspelling + assert "startIndex" in misspelling + assert "endIndex" in misspelling + assert "expected" in misspelling + assert "auto_corrected" in misspelling + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestProfanityAsync: + """Test asynchronous profanity check methods""" + + @pytest.mark.parametrize( + "test_case", + PROFANITY_TEST_CASES, + ids=[tc["name"] for tc in PROFANITY_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_profanity_check_async(self, test_case): + """Test asynchronous profanity check with various inputs""" + try: + result = await async_jigsaw.validate.profanity(test_case["params"]) + + assert result["success"] + assert "clean_text" in result + assert "profanities" in result + assert "profanities_found" in result + assert isinstance(result["profanities_found"], bool) + assert isinstance(result["profanities"], list) + assert isinstance(result["clean_text"], str) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestNSFWAsync: + """Test asynchronous NSFW check methods""" + + @pytest.mark.parametrize( + "test_case", NSFW_TEST_CASES, ids=[tc["name"] for tc in NSFW_TEST_CASES] + ) + @pytest.mark.asyncio + async def test_nsfw_check_async(self, test_case): + """Test asynchronous NSFW check with various inputs""" + try: + result = await async_jigsaw.validate.nsfw(test_case["params"]) + + assert result["success"] + assert "nsfw" in result + assert "nudity" in result + assert "gore" in result + assert "nsfw_score" in result + assert "nudity_score" in result + assert "gore_score" in result + + assert isinstance(result["nsfw"], bool) + assert isinstance(result["nudity"], bool) + assert isinstance(result["gore"], bool) + assert 0 <= result["nsfw_score"] <= 1 + assert 0 <= result["nudity_score"] <= 1 + assert 0 <= result["gore_score"] <= 1 + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize( + "test_case", + NSFW_BLOB_TEST_CASES, + ids=[tc["name"] for tc in NSFW_BLOB_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_nsfw_check_blob_async(self, test_case): + """Test asynchronous NSFW check with blob inputs""" + try: + # Download blob content + blob_content = requests.get(test_case["blob_url"]).content + result = await async_jigsaw.validate.nsfw( + blob_content, test_case["options"] + ) + + assert result["success"] + assert "nsfw" in result + assert "nudity" in result + assert "gore" in result + assert "nsfw_score" in result + assert "nudity_score" in result + assert "gore_score" in result + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 0a71df1b311ce5192609a115c967f10433172513 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 18:05:52 -0700 Subject: [PATCH 29/53] feat: formatting validate dot py. --- jigsawstack/validate.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/jigsawstack/validate.py b/jigsawstack/validate.py index 1d4f715..3565ac9 100644 --- a/jigsawstack/validate.py +++ b/jigsawstack/validate.py @@ -3,8 +3,6 @@ from .request import Request, RequestConfig from .async_request import AsyncRequest, AsyncRequestConfig from ._config import ClientConfig -from typing import Any, Dict, List, cast -from typing_extensions import NotRequired, TypedDict, Union, Optional from .helpers import build_path from ._types import BaseResponse From ba568f0f92d7e2423c639d14ada98e5ab8fbb6e2 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 18:37:45 -0700 Subject: [PATCH 30/53] feat: formatting search dot py --- jigsawstack/search.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/jigsawstack/search.py b/jigsawstack/search.py index 4b10884..3b80bca 100644 --- a/jigsawstack/search.py +++ b/jigsawstack/search.py @@ -247,7 +247,7 @@ def search(self, params: SearchParams) -> SearchResponse: "spell_check": spell_check, } - path = f"/web/search" + path = "/web/search" resp = Request( config=self.config, path=path, @@ -269,7 +269,7 @@ def suggestions(self, params: SearchSuggestionsParams) -> SearchSuggestionsRespo return resp def deep_research(self, params: DeepResearchParams) -> DeepResearchResponse: - path = f"/web/deep_research" + path = "/web/deep_research" resp = Request( config=self.config, path=path, @@ -296,7 +296,7 @@ def __init__( ) async def search(self, params: SearchParams) -> SearchResponse: - path = f"/web/search" + path = "/web/search" query = params["query"] ai_overview = params.get("ai_overview", "True") safe_search = params.get("safe_search", "moderate") @@ -331,7 +331,7 @@ async def suggestions( return resp async def deep_research(self, params: DeepResearchParams) -> DeepResearchResponse: - path = f"/web/deep_research" + path = "/web/deep_research" resp = await AsyncRequest( config=self.config, path=path, From b5e2577b37040c7f023a83ea4df5c453cf24a79c Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 19:40:47 -0700 Subject: [PATCH 31/53] test:defining new test cases for web search. --- tests/test_web.py | 555 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 525 insertions(+), 30 deletions(-) diff --git a/tests/test_web.py b/tests/test_web.py index 5191fca..565d1e5 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -1,45 +1,540 @@ -from unittest.mock import MagicMock -import unittest from jigsawstack.exceptions import JigsawStackError -from jigsawstack import JigsawStack - +import jigsawstack import pytest +import logging +from dotenv import load_dotenv +import os + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) + +URL = "https://jigsawstack.com" + + +# AI Scrape Test Cases +AI_SCRAPE_TEST_CASES = [ + { + "name": "scrape_with_element_prompts", + "params": { + "url": URL, + "element_prompts": ["title", "main content", "navigation links"], + }, + }, + { + "name": "scrape_with_selectors", + "params": { + "url": URL, + "selectors": ["h1", "p", "a"], + }, + }, + { + "name": "scrape_with_features", + "params": { + "url": URL, + "features": ["meta", "link"], + }, + }, + { + "name": "scrape_with_root_element", + "params": { + "url": URL, + "element_prompts": ["content"], + "root_element_selector": "main", + }, + }, + { + "name": "scrape_with_wait_for_selector", + "params": { + "url": URL, + "element_prompts": ["dynamic content"], + "wait_for": {"mode": "selector", "value": ".loaded-content"}, + }, + }, + { + "name": "scrape_with_wait_for_timeout", + "params": { + "url": URL, + "element_prompts": ["content"], + "wait_for": {"mode": "timeout", "value": 3000}, + }, + }, + { + "name": "scrape_mobile_view", + "params": { + "url": URL, + "element_prompts": ["mobile menu"], + "is_mobile": True, + }, + }, + { + "name": "scrape_with_cookies", + "params": { + "url": URL, + "element_prompts": ["user data"], + "cookies": [ + {"name": "session", "value": "test123", "domain": "example.com"} + ], + }, + }, + { + "name": "scrape_with_advance_config", + "params": { + "url": URL, + "element_prompts": ["content"], + "advance_config": {"console": True, "network": True, "cookies": True}, + }, + }, +] + +# HTML to Any Test Cases +HTML_TO_ANY_TEST_CASES = [ + { + "name": "html_to_pdf_url", + "params": { + "url": URL, + "type": "pdf", + "return_type": "url", + }, + }, + { + "name": "html_to_png_base64", + "params": { + "url": URL, + "type": "png", + "return_type": "base64", + }, + }, + { + "name": "html_to_jpeg_binary", + "params": { + "url": URL, + "type": "jpeg", + "return_type": "binary", + }, + }, + { + "name": "html_string_to_pdf", + "params": { + "html": "

Test Document

This is a test.

", + "type": "pdf", + "return_type": "url", + }, + }, + { + "name": "html_to_pdf_with_options", + "params": { + "url": URL, + "type": "pdf", + "return_type": "url", + "pdf_display_header_footer": True, + "pdf_print_background": True, + }, + }, + { + "name": "html_to_png_full_page", + "params": { + "url": URL, + "type": "png", + "full_page": True, + "return_type": "url", + }, + }, + { + "name": "html_to_webp_custom_size", + "params": { + "url": URL, + "type": "webp", + "width": 1920, + "height": 1080, + "return_type": "base64", + }, + }, + { + "name": "html_to_png_mobile", + "params": { + "url": URL, + "type": "png", + "is_mobile": True, + "return_type": "url", + }, + }, + { + "name": "html_to_png_dark_mode", + "params": { + "url": URL, + "type": "png", + "dark_mode": True, + "return_type": "url", + }, + }, +] + +# Search Test Cases +SEARCH_TEST_CASES = [ + { + "name": "basic_search", + "params": { + "query": "artificial intelligence news", + }, + }, + { + "name": "search_with_max_results", + "params": { + "query": "python programming", + "max_results": 5, + }, + }, + { + "name": "search_specific_site", + "params": { + "query": "documentation site:github.com", + }, + }, + { + "name": "search_ai_mode", + "params": { + "query": "explain quantum computing", + "ai": True, + }, + }, +] + +# Search Suggestions Test Cases +SEARCH_SUGGESTIONS_TEST_CASES = [ + { + "name": "basic_suggestions", + "params": { + "query": "machine learn", + }, + }, + { + "name": "programming_suggestions", + "params": { + "query": "python tutor", + }, + }, + { + "name": "partial_query_suggestions", + "params": { + "query": "artifi", + }, + }, +] + +# Deep Research Test Cases +DEEP_RESEARCH_TEST_CASES = [ + { + "name": "basic_deep_research", + "params": { + "query": "climate change effects", + }, + }, + { + "name": "technical_deep_research", + "params": { + "query": "quantum computing applications in cryptography", + }, + }, + { + "name": "deep_research_with_depth", + "params": { + "query": "renewable energy sources", + "depth": 3, + }, + }, +] + + +class TestAIScrapeSync: + """Test synchronous AI scrape methods""" + + @pytest.mark.parametrize( + "test_case", + AI_SCRAPE_TEST_CASES, + ids=[tc["name"] for tc in AI_SCRAPE_TEST_CASES], + ) + def test_ai_scrape(self, test_case): + """Test synchronous AI scrape with various inputs""" + try: + result = jigsaw.web.ai_scrape(test_case["params"]) + + assert result["success"] + assert "data" in result + assert isinstance(result["data"], list) + + # Check for optional features + if "meta" in test_case["params"].get("features", []): + assert "meta" in result + if "link" in test_case["params"].get("features", []): + assert "link" in result + assert isinstance(result["link"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestHTMLToAnySync: + """Test synchronous HTML to Any methods""" + + @pytest.mark.parametrize( + "test_case", + HTML_TO_ANY_TEST_CASES, + ids=[tc["name"] for tc in HTML_TO_ANY_TEST_CASES], + ) + def test_html_to_any(self, test_case): + """Test synchronous HTML to Any with various inputs""" + try: + result = jigsaw.web.html_to_any(test_case["params"]) + + return_type = test_case["params"].get("return_type", "url") + + if return_type == "binary": + assert isinstance(result, bytes) + assert len(result) > 0 + else: + assert result["success"] + assert "url" in result + assert isinstance(result["url"], str) + + if return_type == "base64": + # Check if it's a valid base64 string + assert result["url"].startswith("data:") + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestSearchSync: + """Test synchronous search methods""" + + @pytest.mark.parametrize( + "test_case", SEARCH_TEST_CASES, ids=[tc["name"] for tc in SEARCH_TEST_CASES] + ) + def test_search(self, test_case): + """Test synchronous search with various inputs""" + try: + result = jigsaw.web.search(test_case["params"]) + + assert result["success"] + assert "results" in result + assert isinstance(result["results"], list) + + if test_case["params"].get("max_results"): + assert len(result["results"]) <= test_case["params"]["max_results"] + + # Check result structure + for item in result["results"]: + assert "title" in item + assert "url" in item + assert "description" in item + + # Check AI mode response + if test_case["params"].get("ai"): + assert "ai_overview" in result + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestSearchSuggestionsSync: + """Test synchronous search suggestions methods""" + + @pytest.mark.parametrize( + "test_case", + SEARCH_SUGGESTIONS_TEST_CASES, + ids=[tc["name"] for tc in SEARCH_SUGGESTIONS_TEST_CASES], + ) + def test_search_suggestions(self, test_case): + """Test synchronous search suggestions with various inputs""" + try: + result = jigsaw.web.search_suggestions(test_case["params"]) -# flake8: noqa + assert result["success"] + assert "suggestions" in result + assert isinstance(result["suggestions"], list) + assert len(result["suggestions"]) > 0 -client = JigsawStack() + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestDeepResearchSync: + """Test synchronous deep research methods""" + + @pytest.mark.parametrize( + "test_case", + DEEP_RESEARCH_TEST_CASES, + ids=[tc["name"] for tc in DEEP_RESEARCH_TEST_CASES], + ) + def test_deep_research(self, test_case): + """Test synchronous deep research with various inputs""" + try: + result = jigsaw.web.deep_research(test_case["params"]) + + assert result["success"] + assert "report" in result + assert isinstance(result["report"], str) + assert len(result["report"]) > 0 + + # Check for sources + if "sources" in result: + assert isinstance(result["sources"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +# Async Test Classes + + +class TestAIScrapeAsync: + """Test asynchronous AI scrape methods""" + + @pytest.mark.parametrize( + "test_case", + AI_SCRAPE_TEST_CASES, + ids=[tc["name"] for tc in AI_SCRAPE_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_ai_scrape_async(self, test_case): + """Test asynchronous AI scrape with various inputs""" + try: + result = await async_jigsaw.web.ai_scrape(test_case["params"]) + + assert result["success"] + assert "data" in result + assert isinstance(result["data"], list) + + # Check for optional features + if "meta" in test_case["params"].get("features", []): + assert "meta" in result + if "link" in test_case["params"].get("features", []): + assert "link" in result + assert isinstance(result["link"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestHTMLToAnyAsync: + """Test asynchronous HTML to Any methods""" + @pytest.mark.parametrize( + "test_case", + HTML_TO_ANY_TEST_CASES, + ids=[tc["name"] for tc in HTML_TO_ANY_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_html_to_any_async(self, test_case): + """Test asynchronous HTML to Any with various inputs""" + try: + result = await async_jigsaw.web.html_to_any(test_case["params"]) + + return_type = test_case["params"].get("return_type", "url") + + if return_type == "binary": + assert isinstance(result, bytes) + assert len(result) > 0 + else: + assert result["success"] + assert "url" in result + assert isinstance(result["url"], str) + + if return_type == "base64": + # Check if it's a valid base64 string + assert result["url"].startswith("data:") + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") -@pytest.mark.skip(reason="Skipping TestWebAPI class for now") -class TestWebAPI(unittest.TestCase): - def test_ai_scrape_success_response(self) -> None: - params = { - "url": "https://supabase.com/pricing", - "element_prompts": ["Plan title", "Plan price"], - } + +class TestSearchAsync: + """Test asynchronous search methods""" + + @pytest.mark.parametrize( + "test_case", SEARCH_TEST_CASES, ids=[tc["name"] for tc in SEARCH_TEST_CASES] + ) + @pytest.mark.asyncio + async def test_search_async(self, test_case): + """Test asynchronous search with various inputs""" try: - result = client.file.upload(params) - assert result["success"] == True + result = await async_jigsaw.web.search(test_case["params"]) + + assert result["success"] + assert "results" in result + assert isinstance(result["results"], list) + + if test_case["params"].get("max_results"): + assert len(result["results"]) <= test_case["params"]["max_results"] + + # Check result structure + for item in result["results"]: + assert "title" in item + assert "url" in item + assert "description" in item + + # Check AI mode response + if test_case["params"].get("ai"): + assert "ai_overview" in result + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + - def test_scrape_success_response(self) -> None: - params = { - "url": "https://supabase.com/pricing", - } +class TestSearchSuggestionsAsync: + """Test asynchronous search suggestions methods""" + + @pytest.mark.parametrize( + "test_case", + SEARCH_SUGGESTIONS_TEST_CASES, + ids=[tc["name"] for tc in SEARCH_SUGGESTIONS_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_search_suggestions_async(self, test_case): + """Test asynchronous search suggestions with various inputs""" try: - result = client.web.scrape(params) - assert result["success"] == True + result = await async_jigsaw.web.search_suggestions(test_case["params"]) + + assert result["success"] + assert "suggestions" in result + assert isinstance(result["suggestions"], list) + assert len(result["suggestions"]) > 0 + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + - def test_dns_success_response(self) -> None: +class TestDeepResearchAsync: + """Test asynchronous deep research methods""" - params = { - "url": "https://supabase.com/pricing", - } + @pytest.mark.parametrize( + "test_case", + DEEP_RESEARCH_TEST_CASES, + ids=[tc["name"] for tc in DEEP_RESEARCH_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_deep_research_async(self, test_case): + """Test asynchronous deep research with various inputs""" try: - result = client.web.dns(params) - assert result["success"] == True + result = await async_jigsaw.web.deep_research(test_case["params"]) + + assert result["success"] + assert "report" in result + assert isinstance(result["report"], str) + assert len(result["report"]) > 0 + + # Check for sources + if "sources" in result: + assert isinstance(result["sources"], list) + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." \ No newline at end of file + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 7b213f4c47a8d5240219676afad4fbb44c60c36b Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 20:02:22 -0700 Subject: [PATCH 32/53] fix: update type for image translation. --- jigsawstack/translate.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index 14d225a..b9fef14 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -50,10 +50,10 @@ class TranslateResponse(BaseResponse): """ -class TranslateImageResponse(TypedDict): - image: bytes +class TranslateImageResponse(BaseResponse): + url: str """ - The image data that was translated. + The URL or base64 of the translated image. """ @@ -83,17 +83,17 @@ def text(self, params: TranslateParams) -> TranslateResponse: return resp @overload - def image(self, params: TranslateImageParams) -> TranslateImageResponse: ... + def image(self, params: TranslateImageParams) -> Union[TranslateImageResponse, bytes]: ... @overload def image( self, blob: bytes, options: TranslateImageParams = None - ) -> TranslateImageParams: ... + ) -> Union[TranslateImageResponse, bytes]: ... def image( self, blob: Union[TranslateImageParams, bytes], options: TranslateImageParams = None, - ) -> TranslateImageResponse: + ) -> Union[TranslateImageResponse, bytes]: if isinstance( blob, dict ): # If params is provided as a dict, we assume it's the first argument From b9d2ac1d0d1cc1dd34e944c829febede073c09ae Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 20:03:14 -0700 Subject: [PATCH 33/53] fix: formatting for translate --- jigsawstack/translate.py | 1 - 1 file changed, 1 deletion(-) diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index b9fef14..4bb51a6 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -2,7 +2,6 @@ from typing_extensions import NotRequired, TypedDict, Literal from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from .helpers import build_path from ._types import BaseResponse From 33c895e64743695349e603cc8566653817cb7ea0 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 20:03:44 -0700 Subject: [PATCH 34/53] test: defining test cases for text and image translation. --- tests/test_translate.py | 238 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 tests/test_translate.py diff --git a/tests/test_translate.py b/tests/test_translate.py new file mode 100644 index 0000000..7c903c7 --- /dev/null +++ b/tests/test_translate.py @@ -0,0 +1,238 @@ +import requests +from jigsawstack.exceptions import JigsawStackError +import jigsawstack +import pytest +import logging +from dotenv import load_dotenv +import os + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) + +# Sample image URL for translation tests +IMAGE_URL = "https://images.unsplash.com/photo-1580679137870-86ef9f9a03d6?q=80&w=2574&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + +# Text translation test cases +TEXT_TEST_CASES = [ + { + "name": "translate_single_text_to_spanish", + "params": { + "text": "Hello, how are you?", + "target_language": "es", + }, + }, + { + "name": "translate_single_text_with_current_language", + "params": { + "text": "Bonjour, comment allez-vous?", + "current_language": "fr", + "target_language": "en", + }, + }, + { + "name": "translate_multiple_texts", + "params": { + "text": ["Hello world", "Good morning", "Thank you"], + "target_language": "fr", + }, + }, + { + "name": "translate_to_german", + "params": { + "text": "The weather is beautiful today", + "target_language": "de", + }, + }, + { + "name": "translate_to_japanese", + "params": { + "text": "Welcome to our website", + "target_language": "ja", + }, + }, + { + "name": "translate_multiple_with_source_language", + "params": { + "text": ["Ciao", "Grazie", "Arrivederci"], + "current_language": "it", + "target_language": "en", + }, + }, +] + +# Image translation test cases +IMAGE_TEST_CASES = [ + { + "name": "translate_image_with_url", + "params": { + "url": IMAGE_URL, + "target_language": "es", + }, + "blob": None, + "options": None, + }, + { + "name": "translate_image_with_blob", + "params": None, + "blob": IMAGE_URL, + "options": { + "target_language": "fr", + }, + }, + { + "name": "translate_image_with_url_return_base64", + "params": { + "url": IMAGE_URL, + "target_language": "de", + "return_type": "base64", + }, + "blob": None, + "options": None, + }, + { + "name": "translate_image_with_blob_return_url", + "params": None, + "blob": IMAGE_URL, + "options": { + "target_language": "ja", + "return_type": "url", + }, + }, + { + "name": "translate_image_with_blob_return_binary", + "params": None, + "blob": IMAGE_URL, + "options": { + "target_language": "zh", + "return_type": "binary", + }, + }, + { + "name": "translate_image_to_italian", + "params": { + "url": IMAGE_URL, + "target_language": "it", + }, + "blob": None, + "options": None, + }, +] + + +class TestTranslateTextSync: + """Test synchronous text translation methods""" + + sync_test_cases = TEXT_TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_translate_text(self, test_case): + """Test synchronous text translation with various inputs""" + try: + result = jigsaw.translate.text(test_case["params"]) + assert result["success"] + assert "translated_text" in result + + # Check if the response structure matches the input + if isinstance(test_case["params"]["text"], list): + assert isinstance(result["translated_text"], list) + assert len(result["translated_text"]) == len(test_case["params"]["text"]) + else: + assert isinstance(result["translated_text"], str) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestTranslateTextAsync: + """Test asynchronous text translation methods""" + + async_test_cases = TEXT_TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_translate_text_async(self, test_case): + """Test asynchronous text translation with various inputs""" + try: + result = await async_jigsaw.translate.text(test_case["params"]) + assert result["success"] + assert "translated_text" in result + + # Check if the response structure matches the input + if isinstance(test_case["params"]["text"], list): + assert isinstance(result["translated_text"], list) + assert len(result["translated_text"]) == len(test_case["params"]["text"]) + else: + assert isinstance(result["translated_text"], str) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestTranslateImageSync: + """Test synchronous image translation methods""" + + sync_test_cases = IMAGE_TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_translate_image(self, test_case): + """Test synchronous image translation with various inputs""" + try: + if test_case.get("blob"): + # Download blob content + blob_content = requests.get(test_case["blob"]).content + result = jigsaw.translate.image( + blob_content, test_case.get("options", {}) + ) + else: + # Use params directly + result = jigsaw.translate.image(test_case["params"]) + assert result is not None + if isinstance(result, dict): + assert "url" in result + else: + assert isinstance(result, bytes) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestTranslateImageAsync: + """Test asynchronous image translation methods""" + + async_test_cases = IMAGE_TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_translate_image_async(self, test_case): + """Test asynchronous image translation with various inputs""" + try: + if test_case.get("blob"): + # Download blob content + blob_content = requests.get(test_case["blob"]).content + result = await async_jigsaw.translate.image( + blob_content, test_case.get("options", {}) + ) + else: + # Use params directly + result = await async_jigsaw.translate.image(test_case["params"]) + assert result is not None + if isinstance(result, dict): + assert "url" in result + else: + assert isinstance(result, bytes) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") \ No newline at end of file From b12157816909f114b904830785a9adb08349eb76 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 20:04:54 -0700 Subject: [PATCH 35/53] fix: update type for image translation. --- jigsawstack/translate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index 4bb51a6..0b95ef0 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -146,17 +146,17 @@ async def text(self, params: TranslateParams) -> TranslateResponse: return resp @overload - async def image(self, params: TranslateImageParams) -> TranslateImageResponse: ... + async def image(self, params: TranslateImageParams) -> Union[TranslateImageResponse, bytes]: ... @overload async def image( self, blob: bytes, options: TranslateImageParams = None - ) -> TranslateImageParams: ... + ) -> Union[TranslateImageResponse, bytes]: ... async def image( self, blob: Union[TranslateImageParams, bytes], options: TranslateImageParams = None, - ) -> TranslateImageResponse: + ) -> Union[TranslateImageResponse, bytes]: if isinstance(blob, dict): resp = await AsyncRequest( config=self.config, From 1f743da90f56ea4908a920472b7c9174bb87eaac Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 20:28:04 -0700 Subject: [PATCH 36/53] fix: formatting for the files. --- jigsawstack/audio.py | 1 - jigsawstack/embedding.py | 1 - jigsawstack/geo.py | 406 ----------------------------------- jigsawstack/prompt_engine.py | 1 - jigsawstack/store.py | 3 +- jigsawstack/summary.py | 1 - 6 files changed, 1 insertion(+), 412 deletions(-) delete mode 100644 jigsawstack/geo.py diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index cb4f199..9e6dfd0 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -4,7 +4,6 @@ from .async_request import AsyncRequest, AsyncRequestConfig from ._config import ClientConfig from typing_extensions import Literal -from .helpers import build_path from ._types import BaseResponse diff --git a/jigsawstack/embedding.py b/jigsawstack/embedding.py index 70a8359..e37e856 100644 --- a/jigsawstack/embedding.py +++ b/jigsawstack/embedding.py @@ -2,7 +2,6 @@ from typing_extensions import NotRequired, TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from .helpers import build_path from ._types import BaseResponse diff --git a/jigsawstack/geo.py b/jigsawstack/geo.py deleted file mode 100644 index cd182ba..0000000 --- a/jigsawstack/geo.py +++ /dev/null @@ -1,406 +0,0 @@ -from typing import Any, Dict, List, Union, cast -from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequestConfig, AsyncRequest -from typing import List, Union -from ._config import ClientConfig - - -class BaseResponse: - success: bool - - -class GeoParams(TypedDict): - search_value: str - lat: str - lng: str - country_code: str - proximity_lat: str - proximity_lng: str - types: str - city_code: str - state_code: str - limit: int - - -class GeoSearchParams(TypedDict): - search_value: str - country_code: NotRequired[str] = None - proximity_lat: NotRequired[str] = None - proximity_lng: NotRequired[str] = None - types: NotRequired[str] = None - - -class Geoloc(TypedDict): - type: str - coordinates: List[float] - - -class Region(TypedDict): - name: str - region_code: str - region_code_full: str - - -class Country(TypedDict): - name: str - country_code: str - country_code_alpha_3: str - - -class GeoSearchResult(TypedDict): - type: str - full_address: str - name: str - place_formatted: str - postcode: str - place: str - region: Region - country: Country - language: str - geoloc: Geoloc - poi_category: List[str] - additional_properties: Dict[str, any] - - -class CityResult(TypedDict): - state_code: str - name: str - city_code: str - state: "StateResult" - - -class CountryResult(TypedDict): - country_code: str - name: str - iso2: str - iso3: str - capital: str - phone_code: str - region: str - subregion: str - currency_code: str - geoloc: Geoloc - currency_name: str - currency_symbol: str - tld: str - native: str - emoji: str - emojiU: str - latitude: float - longitude: float - - -class StateResult(TypedDict): - state_code: str - name: str - country_code: str - country: CountryResult - - -class GeoSearchResponse(BaseResponse): - data: List[GeoSearchResult] - - -class GeocodeParams(TypedDict): - search_value: str - lat: str - lng: str - country_code: str - proximity_lat: str - proximity_lng: str - types: str - limit: int - - -class GeoCityParams(TypedDict): - country_code: str - city_code: str - state_code: str - search_value: str - lat: str - lng: str - limit: int - - -class GeoCityResponse(BaseResponse): - city: List[CityResult] - - -class GeoCountryParams(TypedDict): - country_code: str - city_code: str - search_value: str - lat: str - lng: str - limit: int - currency_code: str - - -class GeoCountryResponse(BaseResponse): - country: List[CountryResult] - - -class GeoStateParams(TypedDict): - country_code: str - state_code: str - search_value: str - lat: str - lng: str - limit: int - - -class GeoStateResponse(BaseResponse): - state: List[StateResult] - - -class GeoDistanceParams(TypedDict): - unit: NotRequired[str] = None # "K" or "N" - lat1: str - lng1: str - lat2: str - lng2: str - - -class GeoDistanceResponse(BaseResponse): - distance: float - - -class GeoTimezoneParams(TypedDict): - lat: str - lng: str - city_code: NotRequired[str] = None - country_code: NotRequired[str] = None - - -class GeoTimezoneResponse(BaseResponse): - timezone: Dict[str, any] - - -class GeohashParams(TypedDict): - lat: str - lng: str - precision: int - - -class GeohashResponse(BaseResponse): - geohash: str - - -class GeohashDecodeResponse(BaseResponse): - latitude: float - longitude: float - - -class Geo(ClientConfig): - config: RequestConfig - - def __init__( - self, - api_key: str, - api_url: str, - disable_request_logging: Union[bool, None] = False, - ): - super().__init__(api_key, api_url, disable_request_logging) - self.config = RequestConfig( - api_url=api_url, - api_key=api_key, - disable_request_logging=disable_request_logging, - ) - - def search(self, params: GeoSearchParams) -> GeoSearchResponse: - path = "/geo/search" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def geocode(self, params: GeocodeParams) -> GeohashDecodeResponse: - path = "/geo/geocode" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def city(self, params: GeoCityParams) -> GeoCityResponse: - path = "/geo/city" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def country(self, params: GeoCountryParams) -> GeoCountryResponse: - path = "/geo/country" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def state(self, params: GeoStateParams) -> GeoStateResponse: - path = "/geo/state" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def distance(self, params: GeoDistanceParams) -> GeoDistanceResponse: - path = "/geo/distance" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def timezone(self, params: GeoTimezoneParams) -> GeoTimezoneResponse: - path = "/geo/timezone" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def geohash(self, params: GeohashParams) -> GeohashResponse: - path = "/geo/geohash" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def geohash(self, key: str) -> GeohashDecodeResponse: - path = f"/geo/geohash/decode/{key}" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params={}), - verb="get", - ).perform_with_content() - return resp - - -class AsyncGeo(ClientConfig): - config: AsyncRequestConfig - - def __init__( - self, - api_key: str, - api_url: str, - disable_request_logging: Union[bool, None] = False, - ): - super().__init__(api_key, api_url, disable_request_logging) - self.config = AsyncRequestConfig( - api_url=api_url, - api_key=api_key, - disable_request_logging=disable_request_logging, - ) - - async def search(self, params: GeoSearchParams) -> GeoSearchResponse: - path = "/geo/search" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def geocode(self, params: GeocodeParams) -> GeohashDecodeResponse: - path = "/geo/geocode" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def city(self, params: GeoCityParams) -> GeoCityResponse: - path = "/geo/city" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def country(self, params: GeoCountryParams) -> GeoCountryResponse: - path = "/geo/country" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def state(self, params: GeoStateParams) -> GeoStateResponse: - path = "/geo/state" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def distance(self, params: GeoDistanceParams) -> GeoDistanceResponse: - path = "/geo/distance" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def timezone(self, params: GeoTimezoneParams) -> GeoTimezoneResponse: - path = "/geo/timezone" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def geohash(self, params: GeohashParams) -> GeohashResponse: - path = "/geo/geohash" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def geohash(self, key: str) -> GeohashDecodeResponse: - path = f"/geo/geohash/decode/{key}" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params={}), - verb="get", - ).perform_with_content() - return resp diff --git a/jigsawstack/prompt_engine.py b/jigsawstack/prompt_engine.py index 378e9b3..62416e7 100644 --- a/jigsawstack/prompt_engine.py +++ b/jigsawstack/prompt_engine.py @@ -2,7 +2,6 @@ from typing_extensions import NotRequired, TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from .helpers import build_path diff --git a/jigsawstack/store.py b/jigsawstack/store.py index 72bf191..878a767 100644 --- a/jigsawstack/store.py +++ b/jigsawstack/store.py @@ -1,10 +1,9 @@ -from typing import Any, Dict, List, Union, cast +from typing import Any, Union from typing_extensions import NotRequired, TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest, AsyncRequestConfig from ._config import ClientConfig from .helpers import build_path -from .exceptions import JigsawStackError class FileDeleteResponse(TypedDict): diff --git a/jigsawstack/summary.py b/jigsawstack/summary.py index 898c42b..898312d 100644 --- a/jigsawstack/summary.py +++ b/jigsawstack/summary.py @@ -2,7 +2,6 @@ from typing_extensions import NotRequired, TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from ._types import BaseResponse From 6706742a06f1b5167bf95038f35d8ea122fb37a5 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 20:29:09 -0700 Subject: [PATCH 37/53] fix: dropping url used for local testing. --- tests/test_async_web.py | 36 --------------------------- tests/test_embedding.py | 8 ++---- tests/test_prediction.py | 9 +++---- tests/test_search.py | 53 ---------------------------------------- tests/test_sentiment.py | 8 ++---- tests/test_sql.py | 8 ++---- tests/test_summary.py | 8 ++---- tests/test_validate.py | 8 ++---- 8 files changed, 13 insertions(+), 125 deletions(-) delete mode 100644 tests/test_async_web.py delete mode 100644 tests/test_search.py diff --git a/tests/test_async_web.py b/tests/test_async_web.py deleted file mode 100644 index 99899a8..0000000 --- a/tests/test_async_web.py +++ /dev/null @@ -1,36 +0,0 @@ -from unittest.mock import MagicMock -import unittest -from jigsawstack.exceptions import JigsawStackError -from jigsawstack import AsyncJigsawStack -import pytest -import asyncio -import logging - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -def test_async_web_search_response(): - async def _test(): - client = AsyncJigsawStack() - try: - result = await client.web.search({"query": "JigsawStack fund raising"}) - # logger.info(result) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) - - -def test_async_web_search_suggestion_response(): - async def _test(): - client = AsyncJigsawStack() - try: - result = await client.web.search_suggestion({"query": "Lagos"}) - logger.info(result) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) diff --git a/tests/test_embedding.py b/tests/test_embedding.py index 0106751..c5b08f5 100644 --- a/tests/test_embedding.py +++ b/tests/test_embedding.py @@ -11,12 +11,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) SAMPLE_TEXT = "The quick brown fox jumps over the lazy dog. This is a sample text for embedding generation." SAMPLE_IMAGE_URL = "https://images.unsplash.com/photo-1542931287-023b922fa89b?q=80&w=2574&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" diff --git a/tests/test_prediction.py b/tests/test_prediction.py index 6069140..48ba6f7 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -11,12 +11,9 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) + def generate_dates(start_date, num_days): diff --git a/tests/test_search.py b/tests/test_search.py deleted file mode 100644 index 1ee28f0..0000000 --- a/tests/test_search.py +++ /dev/null @@ -1,53 +0,0 @@ -from unittest.mock import MagicMock -import unittest -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest -import asyncio -import logging - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -jigsaw = jigsawstack.JigsawStack() -async_jigsaw = jigsawstack.AsyncJigsawStack() - - -def test_search_suggestion_response(): - try: - result = jigsaw.web.search({"query": "Where is San Francisco"}) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - -def test_ai_search_response(): - try: - result = jigsaw.web.search({"query": "Where is San Francisco"}) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - -def test_search_suggestion_response_async(): - async def _test(): - client = jigsawstack.AsyncJigsawStack() - try: - result = await client.web.search({"query": "Where is San Francisco"}) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) - - -def test_ai_search_response_async(): - async def _test(): - client = jigsawstack.AsyncJigsawStack() - try: - result = await client.web.search({"query": "Where is San Francisco"}) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) diff --git a/tests/test_sentiment.py b/tests/test_sentiment.py index e46e44f..8967562 100644 --- a/tests/test_sentiment.py +++ b/tests/test_sentiment.py @@ -10,12 +10,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) TEST_CASES = [ { diff --git a/tests/test_sql.py b/tests/test_sql.py index 397b855..71de82b 100644 --- a/tests/test_sql.py +++ b/tests/test_sql.py @@ -10,12 +10,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) # Sample schemas for different databases MYSQL_SCHEMA = """ diff --git a/tests/test_summary.py b/tests/test_summary.py index 46b5229..12125de 100644 --- a/tests/test_summary.py +++ b/tests/test_summary.py @@ -10,12 +10,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) LONG_TEXT = """ Artificial Intelligence (AI) has become one of the most transformative technologies of the 21st century. diff --git a/tests/test_validate.py b/tests/test_validate.py index f6219b1..9ad90bc 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -11,12 +11,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) # Sample URLs for NSFW testing SAFE_IMAGE_URL = ( From 408afeb9d92ccade67503329bece0c0e7b42e0aa Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 21:39:06 -0700 Subject: [PATCH 38/53] fix: formatting for test cases. --- tests/test_file_store.py | 2 +- tests/test_image_generation.py | 5 ++-- tests/test_web.py | 42 ++++++++++------------------------ 3 files changed, 15 insertions(+), 34 deletions(-) diff --git a/tests/test_file_store.py b/tests/test_file_store.py index c44090b..e48cc15 100644 --- a/tests/test_file_store.py +++ b/tests/test_file_store.py @@ -32,7 +32,7 @@ "name": "upload_image_with_temp_url", "file": BINARY_FILE_CONTENT, "options": { - "key": f"test_image.jpg", + "key": "test_image.jpg", "content_type": "image/jpeg", "overwrite": True, "temp_public_url": True, diff --git a/tests/test_image_generation.py b/tests/test_image_generation.py index fe2dc79..052fd6c 100644 --- a/tests/test_image_generation.py +++ b/tests/test_image_generation.py @@ -5,7 +5,6 @@ import logging from dotenv import load_dotenv import os -import base64 load_dotenv() logging.basicConfig(level=logging.INFO) @@ -166,7 +165,7 @@ def test_image_to_image_generation(self, test_case): assert result is not None if type(result) is dict: - assert result.get("success") == True + assert result.get("success") assert result.get("url") is not None elif type(result) is bytes: assert isinstance(result, bytes) @@ -217,7 +216,7 @@ async def test_image_to_image_generation_async(self, test_case): assert result is not None if type(result) is dict: - assert result.get("success") == True + assert result.get("success") assert result.get("url") is not None elif type(result) is bytes: assert isinstance(result, bytes) diff --git a/tests/test_web.py b/tests/test_web.py index 565d1e5..dda97c5 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -10,12 +10,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) URL = "https://jigsawstack.com" @@ -40,6 +36,7 @@ "name": "scrape_with_features", "params": { "url": URL, + "element_prompts": ["title"], "features": ["meta", "link"], }, }, @@ -51,14 +48,6 @@ "root_element_selector": "main", }, }, - { - "name": "scrape_with_wait_for_selector", - "params": { - "url": URL, - "element_prompts": ["dynamic content"], - "wait_for": {"mode": "selector", "value": ".loaded-content"}, - }, - }, { "name": "scrape_with_wait_for_timeout", "params": { @@ -186,13 +175,6 @@ "query": "artificial intelligence news", }, }, - { - "name": "search_with_max_results", - "params": { - "query": "python programming", - "max_results": 5, - }, - }, { "name": "search_specific_site", "params": { @@ -203,7 +185,7 @@ "name": "search_ai_mode", "params": { "query": "explain quantum computing", - "ai": True, + "ai_overview": True, }, }, ] @@ -248,7 +230,7 @@ "name": "deep_research_with_depth", "params": { "query": "renewable energy sources", - "depth": 3, + "depth": 2, }, }, ] @@ -381,9 +363,9 @@ def test_deep_research(self, test_case): result = jigsaw.web.deep_research(test_case["params"]) assert result["success"] - assert "report" in result - assert isinstance(result["report"], str) - assert len(result["report"]) > 0 + assert "results" in result + assert isinstance(result["results"], str) + assert len(result["results"]) > 0 # Check for sources if "sources" in result: @@ -483,7 +465,7 @@ async def test_search_async(self, test_case): assert "description" in item # Check AI mode response - if test_case["params"].get("ai"): + if test_case["params"].get("ai_overview"): assert "ai_overview" in result except JigsawStackError as e: @@ -528,9 +510,9 @@ async def test_deep_research_async(self, test_case): result = await async_jigsaw.web.deep_research(test_case["params"]) assert result["success"] - assert "report" in result - assert isinstance(result["report"], str) - assert len(result["report"]) > 0 + assert "results" in result + assert isinstance(result["results"], str) + assert len(result["results"]) > 0 # Check for sources if "sources" in result: From 7de35498bbc0bf4cc06f269b4a937ec6e1dab8ee Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 21:39:41 -0700 Subject: [PATCH 39/53] chore: rebase linter to ruff --- biome.json | 37 ------------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 biome.json diff --git a/biome.json b/biome.json deleted file mode 100644 index 5ad6df5..0000000 --- a/biome.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json", - "files": { - "ignoreUnknown": false, - "ignore": [] - }, - "formatter": { - "enabled": true, - "useEditorconfig": true, - "formatWithErrors": false, - "indentStyle": "space", - "indentWidth": 2, - "lineEnding": "lf", - "lineWidth": 150, - "attributePosition": "auto", - "bracketSpacing": true - }, - "organizeImports": { - "enabled": true - }, - "linter": { - "enabled": false - }, - "javascript": { - "formatter": { - "jsxQuoteStyle": "double", - "quoteProperties": "asNeeded", - "trailingCommas": "es5", - "semicolons": "always", - "arrowParentheses": "always", - "bracketSameLine": false, - "quoteStyle": "double", - "attributePosition": "auto", - "bracketSpacing": true - } - } -} From 3f920f8bcd5584447ae4e156bc464e53f8303e00 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:12:43 -0700 Subject: [PATCH 40/53] chore: setting up ci for formatting checks and testing. --- .github/ruff.toml | 24 ++++++++ .github/workflows/ci.yml | 117 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 .github/ruff.toml create mode 100644 .github/workflows/ci.yml diff --git a/.github/ruff.toml b/.github/ruff.toml new file mode 100644 index 0000000..e431c02 --- /dev/null +++ b/.github/ruff.toml @@ -0,0 +1,24 @@ +# Ruff configuration for CI/CD +line-length = 88 +target-version = "py37" + +[lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "UP", # pyupgrade +] +ignore = [ + "E501", # line too long (handled by formatter) + "B008", # do not perform function calls in argument defaults +] + +[format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..12b1e5c --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,117 @@ +name: CI + +on: + push: + branches: [ main, develop ] + pull_request: + branches: [ main, develop ] + +jobs: + ruff-format-check: + name: Ruff Format Check - ${{ matrix.file }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + file: + - __init__.py + - _config.py + - _types.py + - async_request.py + - audio.py + - classification.py + - embedding_v2.py + - embedding.py + - exceptions.py + - helpers.py + - image_generation.py + - prediction.py + - prompt_engine.py + - request.py + - search.py + - sentiment.py + - sql.py + - store.py + - summary.py + - translate.py + - validate.py + - vision.py + - web.py + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Install ruff + run: pip install ruff + + - name: Check formatting for ${{ matrix.file }} + run: | + ruff check jigsawstack/${{ matrix.file }} --select I,F,E,W + ruff format --check jigsawstack/${{ matrix.file }} + + test: + name: Test - ${{ matrix.test-file }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + test-file: + - test_audio.py + - test_classification.py + - test_embedding.py + - test_file_store.py + - test_geo.py + - test_image_generation.py + - test_object_detection.py + - test_prediction.py + - test_sentiment.py + - test_sql.py + - test_summary.py + - test_translate.py + - test_validate.py + - test_vision.py + - test_web.py + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest pytest-asyncio pytest-cov + pip install -e . + + - name: Run test ${{ matrix.test-file }} + env: + JIGSAWSTACK_API_KEY: ${{ secrets.JIGSAWSTACK_API_KEY }} + run: | + pytest tests/${{ matrix.test-file }} -v + continue-on-error: true + + - name: Check if critical tests passed + if: contains(matrix.test-file, 'test_') && !contains(matrix.test-file, 'skip') + run: | + pytest tests/${{ matrix.test-file }} -v -m "not skip" + + all-checks-passed: + name: All Checks Passed + needs: [ruff-format-check, test] + runs-on: ubuntu-latest + if: always() + steps: + - name: Check if all jobs passed + run: | + if [[ "${{ needs.ruff-format-check.result }}" != "success" || "${{ needs.test.result }}" != "success" ]]; then + echo "One or more checks failed" + exit 1 + fi + echo "All checks passed successfully!" \ No newline at end of file From 592c5648afb4375b30800c54ef22035acdb90003 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:23:23 -0700 Subject: [PATCH 41/53] chore: track success count for ci/cd merge. --- .github/workflows/ci.yml | 57 +++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 12b1e5c..da6881a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,7 +64,6 @@ jobs: - test_classification.py - test_embedding.py - test_file_store.py - - test_geo.py - test_image_generation.py - test_object_detection.py - test_prediction.py @@ -75,6 +74,8 @@ jobs: - test_validate.py - test_vision.py - test_web.py + outputs: + test-result: ${{ steps.test-run.outcome }} steps: - uses: actions/checkout@v4 @@ -91,16 +92,29 @@ jobs: pip install -e . - name: Run test ${{ matrix.test-file }} + id: test-run env: JIGSAWSTACK_API_KEY: ${{ secrets.JIGSAWSTACK_API_KEY }} run: | - pytest tests/${{ matrix.test-file }} -v - continue-on-error: true + pytest tests/${{ matrix.test-file }} -v --json-report --json-report-file=report.json - - name: Check if critical tests passed - if: contains(matrix.test-file, 'test_') && !contains(matrix.test-file, 'skip') + - name: Count passed tests + id: count-tests + if: always() run: | - pytest tests/${{ matrix.test-file }} -v -m "not skip" + if [ -f report.json ]; then + PASSED=$(python -c "import json; data=json.load(open('report.json')); print(data.get('summary', {}).get('passed', 0))") + echo "passed-count=$PASSED" >> $GITHUB_OUTPUT + else + echo "passed-count=0" >> $GITHUB_OUTPUT + fi + + - name: Upload test results + if: always() + uses: actions/upload-artifact@v3 + with: + name: test-results-${{ matrix.test-file }} + path: report.json all-checks-passed: name: All Checks Passed @@ -108,10 +122,33 @@ jobs: runs-on: ubuntu-latest if: always() steps: - - name: Check if all jobs passed + - name: Download all test results + uses: actions/download-artifact@v3 + with: + path: test-results + + - name: Count total passed tests run: | - if [[ "${{ needs.ruff-format-check.result }}" != "success" || "${{ needs.test.result }}" != "success" ]]; then - echo "One or more checks failed" + TOTAL_PASSED=0 + for file in test-results/*/report.json; do + if [ -f "$file" ]; then + PASSED=$(python -c "import json; data=json.load(open('$file')); print(data.get('summary', {}).get('passed', 0))") + TOTAL_PASSED=$((TOTAL_PASSED + PASSED)) + fi + done + + echo "Total passed tests: $TOTAL_PASSED" + + if [ $TOTAL_PASSED -lt 327 ]; then + echo "❌ Insufficient tests passed: $TOTAL_PASSED/327" exit 1 + else + echo "✅ Required tests passed: $TOTAL_PASSED/327" fi - echo "All checks passed successfully!" \ No newline at end of file + + - name: Check if ruff passed + run: | + if [[ "${{ needs.ruff-format-check.result }}" != "success" ]]; then + echo "Ruff format check failed" + exit 1 + fi \ No newline at end of file From 083874aed8f4202ecb5aff0596251a9092258777 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:25:23 -0700 Subject: [PATCH 42/53] chore: loosen ruff constraints. --- .github/ruff.toml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/ruff.toml b/.github/ruff.toml index e431c02..6b655a6 100644 --- a/.github/ruff.toml +++ b/.github/ruff.toml @@ -13,12 +13,5 @@ select = [ "UP", # pyupgrade ] ignore = [ - "E501", # line too long (handled by formatter) "B008", # do not perform function calls in argument defaults ] - -[format] -quote-style = "double" -indent-style = "space" -skip-magic-trailing-comma = false -line-ending = "auto" \ No newline at end of file From 302d7ad2bb3659998753a29215a42787ec9104b5 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:29:06 -0700 Subject: [PATCH 43/53] fix: artifacts to use v4 since v3 upload and download artifacts were depricated by GitHub earlier this year. --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index da6881a..2d0f6df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -111,7 +111,7 @@ jobs: - name: Upload test results if: always() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: test-results-${{ matrix.test-file }} path: report.json @@ -123,7 +123,7 @@ jobs: if: always() steps: - name: Download all test results - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: path: test-results From be5c5a7ab8693429536fdb1c98a2554d1bd6d306 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:34:18 -0700 Subject: [PATCH 44/53] fix: formatting with ruff. --- jigsawstack/__init__.py | 33 ++++++++++---------- jigsawstack/async_request.py | 8 +++-- jigsawstack/audio.py | 11 ++++--- jigsawstack/classification.py | 8 +++-- jigsawstack/embedding.py | 10 +++--- jigsawstack/embedding_v2.py | 10 +++--- jigsawstack/image_generation.py | 7 +++-- jigsawstack/prediction.py | 5 +-- jigsawstack/prompt_engine.py | 8 +++-- jigsawstack/request.py | 6 ++-- jigsawstack/search.py | 10 +++--- jigsawstack/sentiment.py | 6 ++-- jigsawstack/sql.py | 8 +++-- jigsawstack/store.py | 6 ++-- jigsawstack/summary.py | 8 +++-- jigsawstack/translate.py | 10 +++--- jigsawstack/validate.py | 8 +++-- jigsawstack/vision.py | 12 +++++--- jigsawstack/web.py | 17 ++++++----- tests/test_audio.py | 38 ++++++++++++----------- tests/test_classification.py | 10 +++--- tests/test_embedding.py | 12 +++++--- tests/test_file_store.py | 54 +++++++++++++++++---------------- tests/test_image_generation.py | 36 +++++++++++----------- tests/test_object_detection.py | 12 +++++--- tests/test_prediction.py | 10 +++--- tests/test_sentiment.py | 10 +++--- tests/test_sql.py | 10 +++--- tests/test_summary.py | 10 +++--- tests/test_translate.py | 22 ++++++++------ tests/test_validate.py | 12 +++++--- 31 files changed, 242 insertions(+), 185 deletions(-) diff --git a/jigsawstack/__init__.py b/jigsawstack/__init__.py index 2c5d775..091f775 100644 --- a/jigsawstack/__init__.py +++ b/jigsawstack/__init__.py @@ -1,22 +1,23 @@ -from typing import Union, Dict import os -from .audio import Audio, AsyncAudio -from .vision import Vision, AsyncVision +from typing import Dict, Union + +from .audio import AsyncAudio, Audio +from .classification import AsyncClassification, Classification +from .embedding import AsyncEmbedding, Embedding +from .embedding_v2 import AsyncEmbeddingV2, EmbeddingV2 +from .exceptions import JigsawStackError +from .image_generation import AsyncImageGeneration, ImageGeneration +from .prediction import AsyncPrediction, Prediction +from .prompt_engine import AsyncPromptEngine, PromptEngine from .search import Search -from .prediction import Prediction, AsyncPrediction +from .sentiment import AsyncSentiment, Sentiment from .sql import SQL, AsyncSQL -from .store import Store, AsyncStore -from .translate import Translate, AsyncTranslate -from .web import Web, AsyncWeb -from .sentiment import Sentiment, AsyncSentiment -from .validate import Validate, AsyncValidate -from .summary import Summary, AsyncSummary -from .embedding import Embedding, AsyncEmbedding -from .exceptions import JigsawStackError -from .image_generation import ImageGeneration, AsyncImageGeneration -from .classification import Classification, AsyncClassification -from .prompt_engine import PromptEngine, AsyncPromptEngine -from .embedding_v2 import EmbeddingV2, AsyncEmbeddingV2 +from .store import AsyncStore, Store +from .summary import AsyncSummary, Summary +from .translate import AsyncTranslate, Translate +from .validate import AsyncValidate, Validate +from .vision import AsyncVision, Vision +from .web import AsyncWeb, Web class JigsawStack: diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index 4f90a2c..8d7bfb1 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -1,9 +1,11 @@ -from typing import Any, Dict, Generic, List, Union, cast, TypedDict, AsyncGenerator +import json +from io import BytesIO +from typing import Any, AsyncGenerator, Dict, Generic, List, TypedDict, Union, cast + import aiohttp from typing_extensions import Literal, TypeVar + from .exceptions import NoContentError, raise_for_code_and_type -import json -from io import BytesIO RequestVerb = Literal["get", "post", "put", "patch", "delete"] diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index 9e6dfd0..cadfd25 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -1,10 +1,11 @@ -from typing import Any, Dict, List, cast, Union, Optional, overload -from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest, AsyncRequestConfig +from typing import Any, Dict, List, Optional, Union, cast, overload + +from typing_extensions import Literal, NotRequired, TypedDict + from ._config import ClientConfig -from typing_extensions import Literal from ._types import BaseResponse +from .async_request import AsyncRequest, AsyncRequestConfig +from .request import Request, RequestConfig class SpeechToTextParams(TypedDict): diff --git a/jigsawstack/classification.py b/jigsawstack/classification.py index a53ed87..45407e9 100644 --- a/jigsawstack/classification.py +++ b/jigsawstack/classification.py @@ -1,9 +1,11 @@ from typing import Any, Dict, List, Union, cast -from typing_extensions import NotRequired, TypedDict, Literal -from .request import Request, RequestConfig -from .async_request import AsyncRequest, AsyncRequestConfig + +from typing_extensions import Literal, NotRequired, TypedDict + from ._config import ClientConfig from ._types import BaseResponse +from .async_request import AsyncRequest, AsyncRequestConfig +from .request import Request, RequestConfig class DatasetItem(TypedDict): diff --git a/jigsawstack/embedding.py b/jigsawstack/embedding.py index e37e856..4957cde 100644 --- a/jigsawstack/embedding.py +++ b/jigsawstack/embedding.py @@ -1,10 +1,12 @@ -from typing import Any, Dict, List, Union, cast, Literal, overload +from typing import Any, Dict, List, Literal, Union, cast, overload + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest + from ._config import ClientConfig -from .helpers import build_path from ._types import BaseResponse +from .async_request import AsyncRequest +from .helpers import build_path +from .request import Request, RequestConfig class EmbeddingParams(TypedDict): diff --git a/jigsawstack/embedding_v2.py b/jigsawstack/embedding_v2.py index 64c7d11..e944ee4 100644 --- a/jigsawstack/embedding_v2.py +++ b/jigsawstack/embedding_v2.py @@ -1,10 +1,12 @@ -from typing import Any, Dict, List, Union, cast, Literal, overload +from typing import Any, Dict, List, Literal, Union, cast, overload + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest + from ._config import ClientConfig -from .helpers import build_path +from .async_request import AsyncRequest from .embedding import Chunk +from .helpers import build_path +from .request import Request, RequestConfig class EmbeddingV2Params(TypedDict): diff --git a/jigsawstack/image_generation.py b/jigsawstack/image_generation.py index d615b6d..525b653 100644 --- a/jigsawstack/image_generation.py +++ b/jigsawstack/image_generation.py @@ -1,9 +1,10 @@ from typing import Any, Dict, Union, cast -from typing_extensions import NotRequired, TypedDict, Literal, Required -from .request import Request, RequestConfig -from .async_request import AsyncRequest + +from typing_extensions import Literal, NotRequired, Required, TypedDict from ._config import ClientConfig +from .async_request import AsyncRequest +from .request import Request, RequestConfig class AdvanceConfig(TypedDict): diff --git a/jigsawstack/prediction.py b/jigsawstack/prediction.py index 84bfbf9..8517bda 100644 --- a/jigsawstack/prediction.py +++ b/jigsawstack/prediction.py @@ -1,10 +1,11 @@ from typing import Any, Dict, List, Union, cast + from typing_extensions import TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest from ._config import ClientConfig from ._types import BaseResponse +from .async_request import AsyncRequest +from .request import Request, RequestConfig class Dataset(TypedDict): diff --git a/jigsawstack/prompt_engine.py b/jigsawstack/prompt_engine.py index 62416e7..6411c66 100644 --- a/jigsawstack/prompt_engine.py +++ b/jigsawstack/prompt_engine.py @@ -1,9 +1,11 @@ -from typing import Any, Dict, List, Union, cast, Generator, Literal +from typing import Any, Dict, Generator, List, Literal, Union, cast + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest + from ._config import ClientConfig +from .async_request import AsyncRequest from .helpers import build_path +from .request import Request, RequestConfig class PromptEngineResult(TypedDict): diff --git a/jigsawstack/request.py b/jigsawstack/request.py index 68ac675..84d9f3d 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -1,8 +1,10 @@ -from typing import Any, Dict, Generic, List, Union, cast, TypedDict, Generator +import json +from typing import Any, Dict, Generator, Generic, List, TypedDict, Union, cast + import requests from typing_extensions import Literal, TypeVar + from .exceptions import NoContentError, raise_for_code_and_type -import json RequestVerb = Literal["get", "post", "put", "patch", "delete"] diff --git a/jigsawstack/search.py b/jigsawstack/search.py index 3b80bca..a607a1c 100644 --- a/jigsawstack/search.py +++ b/jigsawstack/search.py @@ -1,9 +1,11 @@ -from typing import Any, Dict, List, Union, cast, Literal -from typing_extensions import NotRequired, TypedDict, Optional -from .request import Request, RequestConfig -from .async_request import AsyncRequest, AsyncRequestConfig +from typing import Any, Dict, List, Literal, Optional, Union, cast + +from typing_extensions import NotRequired, TypedDict + from ._config import ClientConfig from ._types import BaseResponse +from .async_request import AsyncRequest, AsyncRequestConfig +from .request import Request, RequestConfig class RelatedIndex(TypedDict): diff --git a/jigsawstack/sentiment.py b/jigsawstack/sentiment.py index 805dd80..ef5e9df 100644 --- a/jigsawstack/sentiment.py +++ b/jigsawstack/sentiment.py @@ -1,9 +1,11 @@ from typing import Any, Dict, List, Union, cast + from typing_extensions import TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest + from ._config import ClientConfig from ._types import BaseResponse +from .async_request import AsyncRequest +from .request import Request, RequestConfig class SentimentParams(TypedDict): diff --git a/jigsawstack/sql.py b/jigsawstack/sql.py index efac7be..b895485 100644 --- a/jigsawstack/sql.py +++ b/jigsawstack/sql.py @@ -1,9 +1,11 @@ -from typing import Any, Dict, Union, cast, Literal +from typing import Any, Dict, Literal, Union, cast + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest + from ._config import ClientConfig from ._types import BaseResponse +from .async_request import AsyncRequest +from .request import Request, RequestConfig class SQLParams(TypedDict): diff --git a/jigsawstack/store.py b/jigsawstack/store.py index 878a767..1fe5f33 100644 --- a/jigsawstack/store.py +++ b/jigsawstack/store.py @@ -1,9 +1,11 @@ from typing import Any, Union + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest, AsyncRequestConfig + from ._config import ClientConfig +from .async_request import AsyncRequest, AsyncRequestConfig from .helpers import build_path +from .request import Request, RequestConfig class FileDeleteResponse(TypedDict): diff --git a/jigsawstack/summary.py b/jigsawstack/summary.py index 898312d..0d19b39 100644 --- a/jigsawstack/summary.py +++ b/jigsawstack/summary.py @@ -1,9 +1,11 @@ -from typing import Any, Dict, List, Union, cast, Literal +from typing import Any, Dict, List, Literal, Union, cast + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest + from ._config import ClientConfig from ._types import BaseResponse +from .async_request import AsyncRequest +from .request import Request, RequestConfig class SummaryParams(TypedDict): diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index 0b95ef0..63b7fa5 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -1,10 +1,12 @@ from typing import Any, Dict, List, Union, cast, overload -from typing_extensions import NotRequired, TypedDict, Literal -from .request import Request, RequestConfig -from .async_request import AsyncRequest + +from typing_extensions import Literal, NotRequired, TypedDict + from ._config import ClientConfig -from .helpers import build_path from ._types import BaseResponse +from .async_request import AsyncRequest +from .helpers import build_path +from .request import Request, RequestConfig class TranslateImageParams(TypedDict): diff --git a/jigsawstack/validate.py b/jigsawstack/validate.py index 3565ac9..fc57c3c 100644 --- a/jigsawstack/validate.py +++ b/jigsawstack/validate.py @@ -1,10 +1,12 @@ from typing import Any, Dict, List, Union, cast, overload + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest, AsyncRequestConfig + from ._config import ClientConfig -from .helpers import build_path from ._types import BaseResponse +from .async_request import AsyncRequest, AsyncRequestConfig +from .helpers import build_path +from .request import Request, RequestConfig class Spam(TypedDict): diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index 49191af..452291b 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -1,9 +1,11 @@ -from typing import Any, Dict, List, Union, cast, Optional, overload -from typing_extensions import NotRequired, TypedDict, Literal -from .request import Request, RequestConfig -from .async_request import AsyncRequest, AsyncRequestConfig +from typing import Any, Dict, List, Optional, Union, cast, overload + +from typing_extensions import Literal, NotRequired, TypedDict + from ._config import ClientConfig from ._types import BaseResponse +from .async_request import AsyncRequest, AsyncRequestConfig +from .request import Request, RequestConfig class Point(TypedDict): @@ -239,7 +241,7 @@ def object_detection( verb="post", ).perform_with_content() return resp - + content_type = options.get("content_type", "application/octet-stream") headers = {"Content-Type": content_type} diff --git a/jigsawstack/web.py b/jigsawstack/web.py index 58d9307..5ebf206 100644 --- a/jigsawstack/web.py +++ b/jigsawstack/web.py @@ -1,20 +1,21 @@ -from typing import Any, Dict, List, Union, Optional, cast, Literal, overload +from typing import Any, Dict, List, Literal, Optional, Union, cast, overload + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest, AsyncRequestConfig from ._config import ClientConfig +from ._types import BaseResponse +from .async_request import AsyncRequest, AsyncRequestConfig +from .request import Request, RequestConfig from .search import ( + AsyncSearch, + DeepResearchParams, + DeepResearchResponse, Search, SearchParams, + SearchResponse, SearchSuggestionsParams, SearchSuggestionsResponse, - SearchResponse, - AsyncSearch, - DeepResearchParams, - DeepResearchResponse, ) -from ._types import BaseResponse class GotoOptions(TypedDict): diff --git a/tests/test_audio.py b/tests/test_audio.py index 1345621..3ba499e 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -1,11 +1,13 @@ -import requests -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -142,7 +144,7 @@ class TestAudioSync: """Test synchronous audio speech-to-text methods""" - + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) def test_speech_to_text(self, test_case): """Test synchronous speech-to-text with various inputs""" @@ -160,18 +162,18 @@ def test_speech_to_text(self, test_case): # Verify response structure assert result["success"] assert result.get("text", None) is not None and isinstance(result["text"], str) - + # Check for chunks if result.get("chunks", None): assert isinstance(result["chunks"], list) - + # Check for speaker diarization if requested if result.get("speakers", None): assert isinstance(result["speakers"], list) except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - + @pytest.mark.parametrize("test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES]) def test_speech_to_text_webhook(self, test_case): """Test synchronous speech-to-text with webhook""" @@ -188,7 +190,7 @@ def test_speech_to_text_webhook(self, test_case): result = jigsaw.audio.speech_to_text(test_case["params"]) # Verify webhook response structure assert result["success"] - + except JigsawStackError as e: # Webhook URLs might fail if invalid print(f"Expected possible error for webhook test {test_case['name']}: {e}") @@ -196,7 +198,7 @@ def test_speech_to_text_webhook(self, test_case): class TestAudioAsync: """Test asynchronous audio speech-to-text methods""" - + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) @pytest.mark.asyncio async def test_speech_to_text_async(self, test_case): @@ -212,21 +214,21 @@ async def test_speech_to_text_async(self, test_case): else: # Use params directly result = await async_jigsaw.audio.speech_to_text(test_case["params"]) - + # Verify response structure assert result["success"] assert result.get("text", None) is not None and isinstance(result["text"], str) - + # Check for chunks if result.get("chunks", None): assert isinstance(result["chunks"], list) - + # Check for speaker diarization if requested if result.get("speakers", None): assert isinstance(result["speakers"], list) except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") - + @pytest.mark.parametrize("test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES]) @pytest.mark.asyncio async def test_speech_to_text_webhook_async(self, test_case): @@ -242,12 +244,12 @@ async def test_speech_to_text_webhook_async(self, test_case): else: # Use params directly result = await async_jigsaw.audio.speech_to_text(test_case["params"]) - + print(f"Async test {test_case['name']}: Webhook response") - + # Verify webhook response structure assert result["success"] except JigsawStackError as e: # Webhook URLs might fail if invalid - print(f"Expected possible error for async webhook test {test_case['name']}: {e}") \ No newline at end of file + print(f"Expected possible error for async webhook test {test_case['name']}: {e}") diff --git a/tests/test_classification.py b/tests/test_classification.py index a5cf66c..dba924a 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -1,10 +1,12 @@ -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) diff --git a/tests/test_embedding.py b/tests/test_embedding.py index c5b08f5..4464ae4 100644 --- a/tests/test_embedding.py +++ b/tests/test_embedding.py @@ -1,11 +1,13 @@ -import requests -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) diff --git a/tests/test_file_store.py b/tests/test_file_store.py index e48cc15..8cee658 100644 --- a/tests/test_file_store.py +++ b/tests/test_file_store.py @@ -1,12 +1,14 @@ -import requests -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os import uuid +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -55,30 +57,30 @@ class TestFileStoreSync: """Test synchronous file store operations""" - + uploaded_keys = [] # Track uploaded files for cleanup - + @pytest.mark.parametrize("test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD]) def test_file_upload(self, test_case): """Test synchronous file upload with various options""" try: result = jigsaw.store.upload(test_case["file"], test_case["options"]) - + print(f"Upload test {test_case['name']}: {result}") assert result.get("key") is not None assert result.get("url") is not None assert result.get("size") > 0 - + # Check temp_public_url if requested if test_case.get("options") and test_case["options"].get("temp_public_url"): assert result.get("temp_public_url") is not None - + # Store key for cleanup self.uploaded_keys.append(result["key"]) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - + def test_file_get(self): """Test synchronous file retrieval""" # First upload a file to retrieve @@ -88,46 +90,46 @@ def test_file_get(self): TEXT_FILE_CONTENT, {"key": test_key, "content_type": "text/plain"} ) - + # Now retrieve it file_content = jigsaw.store.get(upload_result["key"]) assert file_content is not None print(f"Retrieved file with key {upload_result['key']}") - + # Cleanup self.uploaded_keys.append(upload_result["key"]) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in file get: {e}") class TestFileStoreAsync: """Test asynchronous file store operations""" - + uploaded_keys = [] # Track uploaded files for cleanup - + @pytest.mark.parametrize("test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD]) @pytest.mark.asyncio async def test_file_upload_async(self, test_case): """Test asynchronous file upload with various options""" try: result = await async_jigsaw.store.upload(test_case["file"], test_case["options"]) - + print(f"Async upload test {test_case['name']}: {result}") assert result.get("key") is not None assert result.get("url") is not None assert result.get("size") > 0 - + # Check temp_public_url if requested if test_case.get("options") and test_case["options"].get("temp_public_url"): assert result.get("temp_public_url") is not None - + # Store key for cleanup self.uploaded_keys.append(result["key"]) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") - + @pytest.mark.asyncio async def test_file_get_async(self): """Test asynchronous file retrieval""" @@ -138,14 +140,14 @@ async def test_file_get_async(self): TEXT_FILE_CONTENT, {"key": test_key, "content_type": "text/plain"} ) - + # Now retrieve it file_content = await async_jigsaw.store.get(upload_result["key"]) assert file_content is not None print(f"Async retrieved file with key {upload_result['key']}") - + # Cleanup self.uploaded_keys.append(upload_result["key"]) - + except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in async file get: {e}") \ No newline at end of file + pytest.fail(f"Unexpected JigsawStackError in async file get: {e}") diff --git a/tests/test_image_generation.py b/tests/test_image_generation.py index 052fd6c..1a4342e 100644 --- a/tests/test_image_generation.py +++ b/tests/test_image_generation.py @@ -1,11 +1,13 @@ -import requests -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -122,7 +124,7 @@ class TestImageGenerationSync: """Test synchronous image generation methods""" - + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) def test_image_generation(self, test_case): """Test synchronous image generation with various parameters""" @@ -150,20 +152,20 @@ def test_image_generation(self, test_case): assert requests.get(result["url"]).status_code == 200 else: assert isinstance(result, bytes) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - + @pytest.mark.parametrize("test_case", IMAGE_TO_IMAGE_TEST_CASES[:1], ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]]) def test_image_to_image_generation(self, test_case): """Test image-to-image generation with URL input""" try: - + result = jigsaw.image_generation(test_case["params"]) - + print(f"Test {test_case['name']}: Generated image from input") assert result is not None - + if type(result) is dict: assert result.get("success") assert result.get("url") is not None @@ -177,16 +179,16 @@ def test_image_to_image_generation(self, test_case): class TestImageGenerationAsync: """Test asynchronous image generation methods""" - + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) @pytest.mark.asyncio async def test_image_generation_async(self, test_case): """Test asynchronous image generation with various parameters""" try: result = await async_jigsaw.image_generation(test_case["params"]) - + print(f"Async test {test_case['name']}: Generated image") - + # Check response structure assert result is not None if type(result) is dict: @@ -203,10 +205,10 @@ async def test_image_generation_async(self, test_case): assert requests.get(result["url"]).status_code == 200 else: assert isinstance(result, bytes) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") - + @pytest.mark.parametrize("test_case", IMAGE_TO_IMAGE_TEST_CASES[:1], ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]]) @pytest.mark.asyncio async def test_image_to_image_generation_async(self, test_case): @@ -224,4 +226,4 @@ async def test_image_to_image_generation_async(self, test_case): pytest.fail(f"Unexpected result type in {test_case['name']}: {type(result)}") except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") \ No newline at end of file + pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") diff --git a/tests/test_object_detection.py b/tests/test_object_detection.py index 8c0f409..b6d8d78 100644 --- a/tests/test_object_detection.py +++ b/tests/test_object_detection.py @@ -1,11 +1,13 @@ -import requests -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() diff --git a/tests/test_prediction.py b/tests/test_prediction.py index 48ba6f7..f38b016 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -1,11 +1,13 @@ -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os from datetime import datetime, timedelta +import pytest +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) diff --git a/tests/test_sentiment.py b/tests/test_sentiment.py index 8967562..c184ec1 100644 --- a/tests/test_sentiment.py +++ b/tests/test_sentiment.py @@ -1,10 +1,12 @@ -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) diff --git a/tests/test_sql.py b/tests/test_sql.py index 71de82b..822ae18 100644 --- a/tests/test_sql.py +++ b/tests/test_sql.py @@ -1,10 +1,12 @@ -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) diff --git a/tests/test_summary.py b/tests/test_summary.py index 12125de..5d66335 100644 --- a/tests/test_summary.py +++ b/tests/test_summary.py @@ -1,10 +1,12 @@ -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) diff --git a/tests/test_translate.py b/tests/test_translate.py index 7c903c7..f556fca 100644 --- a/tests/test_translate.py +++ b/tests/test_translate.py @@ -1,11 +1,13 @@ -import requests -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) @@ -138,14 +140,14 @@ def test_translate_text(self, test_case): result = jigsaw.translate.text(test_case["params"]) assert result["success"] assert "translated_text" in result - + # Check if the response structure matches the input if isinstance(test_case["params"]["text"], list): assert isinstance(result["translated_text"], list) assert len(result["translated_text"]) == len(test_case["params"]["text"]) else: assert isinstance(result["translated_text"], str) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") @@ -165,14 +167,14 @@ async def test_translate_text_async(self, test_case): result = await async_jigsaw.translate.text(test_case["params"]) assert result["success"] assert "translated_text" in result - + # Check if the response structure matches the input if isinstance(test_case["params"]["text"], list): assert isinstance(result["translated_text"], list) assert len(result["translated_text"]) == len(test_case["params"]["text"]) else: assert isinstance(result["translated_text"], str) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") @@ -235,4 +237,4 @@ async def test_translate_image_async(self, test_case): assert isinstance(result, bytes) except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") \ No newline at end of file + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") diff --git a/tests/test_validate.py b/tests/test_validate.py index 9ad90bc..c1c0311 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -1,11 +1,13 @@ -import requests -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) From 5ac36799e308bbb81b26af0bdc9d02c6f48e55b0 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:39:30 -0700 Subject: [PATCH 45/53] fix: formatting errors and improper initializations. --- .github/ruff.toml | 1 + jigsawstack/async_request.py | 4 ++-- jigsawstack/prediction.py | 2 +- jigsawstack/request.py | 4 ++-- tests/test_summary.py | 20 ++++++++++---------- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/.github/ruff.toml b/.github/ruff.toml index 6b655a6..8f2e22f 100644 --- a/.github/ruff.toml +++ b/.github/ruff.toml @@ -13,5 +13,6 @@ select = [ "UP", # pyupgrade ] ignore = [ + "E501", # ignore line too long. "B008", # do not perform function calls in argument defaults ] diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index 8d7bfb1..ee8a802 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -25,7 +25,7 @@ def __init__( path: str, params: Union[Dict[Any, Any], List[Dict[Any, Any]]], verb: RequestVerb, - headers: Dict[str, str] = {"Content-Type": "application/json"}, + headers: Dict[str, str] = None, data: Union[bytes, None] = None, stream: Union[bool, None] = False, ): @@ -35,7 +35,7 @@ def __init__( self.api_url = config.get("api_url") self.api_key = config.get("api_key") self.data = data - self.headers = headers + self.headers = headers or {"Content-Type": "application/json"} self.disable_request_logging = config.get("disable_request_logging") self.stream = stream diff --git a/jigsawstack/prediction.py b/jigsawstack/prediction.py index 8517bda..ec571a4 100644 --- a/jigsawstack/prediction.py +++ b/jigsawstack/prediction.py @@ -27,7 +27,7 @@ class PredictionParams(TypedDict): """ steps: int """ - The number of predictions to make. The defualt is 5. + The number of predictions to make. The default is 5. """ diff --git a/jigsawstack/request.py b/jigsawstack/request.py index 84d9f3d..ea373a6 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -25,7 +25,7 @@ def __init__( path: str, params: Union[Dict[Any, Any], List[Dict[Any, Any]]], verb: RequestVerb, - headers: Dict[str, str] = {"Content-Type": "application/json"}, + headers: Dict[str, str] = None, data: Union[bytes, None] = None, stream: Union[bool, None] = False, ): @@ -35,7 +35,7 @@ def __init__( self.api_url = config.get("api_url") self.api_key = config.get("api_key") self.data = data - self.headers = headers + self.headers = headers or {"Content-Type": "application/json"} self.disable_request_logging = config.get("disable_request_logging") self.stream = stream diff --git a/tests/test_summary.py b/tests/test_summary.py index 5d66335..e2fb763 100644 --- a/tests/test_summary.py +++ b/tests/test_summary.py @@ -16,13 +16,13 @@ async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) LONG_TEXT = """ -Artificial Intelligence (AI) has become one of the most transformative technologies of the 21st century. -From healthcare to finance, transportation to entertainment, AI is reshaping industries and changing the way we live and work. -Machine learning algorithms can now diagnose diseases with remarkable accuracy, predict market trends, and even create art. -Natural language processing has enabled computers to understand and generate human language, leading to the development of sophisticated chatbots and virtual assistants. -Computer vision systems can identify objects, faces, and activities in images and videos with superhuman precision. -However, the rapid advancement of AI also raises important ethical questions about privacy, job displacement, and the potential for bias in algorithmic decision-making. -As we continue to develop more powerful AI systems, it's crucial that we consider their societal impact and work to ensure that the benefits of AI are distributed equitably. +Artificial Intelligence (AI) has become one of the most transformative technologies of the 21st century. +From healthcare to finance, transportation to entertainment, AI is reshaping industries and changing the way we live and work. +Machine learning algorithms can now diagnose diseases with remarkable accuracy, predict market trends, and even create art. +Natural language processing has enabled computers to understand and generate human language, leading to the development of sophisticated chatbots and virtual assistants. +Computer vision systems can identify objects, faces, and activities in images and videos with superhuman precision. +However, the rapid advancement of AI also raises important ethical questions about privacy, job displacement, and the potential for bias in algorithmic decision-making. +As we continue to develop more powerful AI systems, it's crucial that we consider their societal impact and work to ensure that the benefits of AI are distributed equitably. The future of AI holds immense promise, but it will require careful planning, regulation, and collaboration between technologists, policymakers, and society at large to realize its full potential while mitigating its risks. """ @@ -112,9 +112,9 @@ "name": "technical_text_summary", "params": { "text": """ - Machine learning is a subset of artificial intelligence that focuses on the development of algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience. - Deep learning, a subfield of machine learning, uses artificial neural networks with multiple layers to progressively extract higher-level features from raw input. - Supervised learning involves training models on labeled data, while unsupervised learning discovers patterns in unlabeled data. + Machine learning is a subset of artificial intelligence that focuses on the development of algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience. + Deep learning, a subfield of machine learning, uses artificial neural networks with multiple layers to progressively extract higher-level features from raw input. + Supervised learning involves training models on labeled data, while unsupervised learning discovers patterns in unlabeled data. Reinforcement learning enables agents to learn optimal behaviors through trial and error interactions with an environment. """, "type": "points", From dfe3154c96f96a0aa3625d65da6ae583f75b42db Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:41:46 -0700 Subject: [PATCH 46/53] chore: update string max lenght --- .github/ruff.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ruff.toml b/.github/ruff.toml index 8f2e22f..a401c08 100644 --- a/.github/ruff.toml +++ b/.github/ruff.toml @@ -1,5 +1,5 @@ # Ruff configuration for CI/CD -line-length = 88 +line-length = 100 target-version = "py37" [lint] @@ -13,6 +13,6 @@ select = [ "UP", # pyupgrade ] ignore = [ - "E501", # ignore line too long. + "E501", "B008", # do not perform function calls in argument defaults ] From a76d141feec2a5a0c83051bdf04801f5380fa06e Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:44:05 -0700 Subject: [PATCH 47/53] chore: update ruff toml --- .github/ruff.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/ruff.toml b/.github/ruff.toml index a401c08..3922be1 100644 --- a/.github/ruff.toml +++ b/.github/ruff.toml @@ -4,7 +4,6 @@ target-version = "py37" [lint] select = [ - "E", # pycodestyle errors "W", # pycodestyle warnings "F", # pyflakes "I", # isort @@ -13,6 +12,5 @@ select = [ "UP", # pyupgrade ] ignore = [ - "E501", "B008", # do not perform function calls in argument defaults ] From 026c3d958a687a4eabd0506dae93173a3db1c125 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:49:03 -0700 Subject: [PATCH 48/53] feat: update CI ruff config. --- .github/workflows/ci.yml | 63 +++++++++------------------------------- 1 file changed, 14 insertions(+), 49 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2d0f6df..f77b329 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,8 +50,8 @@ jobs: - name: Check formatting for ${{ matrix.file }} run: | - ruff check jigsawstack/${{ matrix.file }} --select I,F,E,W - ruff format --check jigsawstack/${{ matrix.file }} + ruff check jigsawstack/${{ matrix.file }} --config .github/ruff.toml + ruff format --check jigsawstack/${{ matrix.file }} --config .github/ruff.toml test: name: Test - ${{ matrix.test-file }} @@ -74,8 +74,6 @@ jobs: - test_validate.py - test_vision.py - test_web.py - outputs: - test-result: ${{ steps.test-run.outcome }} steps: - uses: actions/checkout@v4 @@ -92,29 +90,10 @@ jobs: pip install -e . - name: Run test ${{ matrix.test-file }} - id: test-run env: JIGSAWSTACK_API_KEY: ${{ secrets.JIGSAWSTACK_API_KEY }} run: | - pytest tests/${{ matrix.test-file }} -v --json-report --json-report-file=report.json - - - name: Count passed tests - id: count-tests - if: always() - run: | - if [ -f report.json ]; then - PASSED=$(python -c "import json; data=json.load(open('report.json')); print(data.get('summary', {}).get('passed', 0))") - echo "passed-count=$PASSED" >> $GITHUB_OUTPUT - else - echo "passed-count=0" >> $GITHUB_OUTPUT - fi - - - name: Upload test results - if: always() - uses: actions/upload-artifact@v4 - with: - name: test-results-${{ matrix.test-file }} - path: report.json + pytest tests/${{ matrix.test-file }} -v all-checks-passed: name: All Checks Passed @@ -122,33 +101,19 @@ jobs: runs-on: ubuntu-latest if: always() steps: - - name: Download all test results - uses: actions/download-artifact@v4 - with: - path: test-results - - - name: Count total passed tests + - name: Verify all checks passed run: | - TOTAL_PASSED=0 - for file in test-results/*/report.json; do - if [ -f "$file" ]; then - PASSED=$(python -c "import json; data=json.load(open('$file')); print(data.get('summary', {}).get('passed', 0))") - TOTAL_PASSED=$((TOTAL_PASSED + PASSED)) - fi - done - - echo "Total passed tests: $TOTAL_PASSED" + echo "Ruff Format Check: ${{ needs.ruff-format-check.result }}" + echo "Tests: ${{ needs.test.result }}" - if [ $TOTAL_PASSED -lt 327 ]; then - echo "❌ Insufficient tests passed: $TOTAL_PASSED/327" + if [[ "${{ needs.ruff-format-check.result }}" != "success" ]]; then + echo "❌ Ruff format check failed" exit 1 - else - echo "✅ Required tests passed: $TOTAL_PASSED/327" fi - - - name: Check if ruff passed - run: | - if [[ "${{ needs.ruff-format-check.result }}" != "success" ]]; then - echo "Ruff format check failed" + + if [[ "${{ needs.test.result }}" != "success" ]]; then + echo "❌ Tests failed" exit 1 - fi \ No newline at end of file + fi + + echo "✅ All checks passed successfully!" \ No newline at end of file From af5c92718ca7ef7c71c9ff58bd6eff560fafe0bb Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:50:10 -0700 Subject: [PATCH 49/53] fix: avoid re-formmating gracefully fail for the dev to fix the formatting errors. --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f77b329..d576488 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,7 +51,6 @@ jobs: - name: Check formatting for ${{ matrix.file }} run: | ruff check jigsawstack/${{ matrix.file }} --config .github/ruff.toml - ruff format --check jigsawstack/${{ matrix.file }} --config .github/ruff.toml test: name: Test - ${{ matrix.test-file }} From 700d8edfb86d1bcc9a34befe1001a4929acfd950 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:52:05 -0700 Subject: [PATCH 50/53] fix: missing package in ci --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d576488..6195b72 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -85,7 +85,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt - pip install pytest pytest-asyncio pytest-cov + pip install pytest pytest-asyncio pytest-cov python-dotenv pip install -e . - name: Run test ${{ matrix.test-file }} From 519f8316a20301922b34d5ff1de9051b6b891a3d Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 23:02:55 -0700 Subject: [PATCH 51/53] fix: drop non-existent-previously-deleted test cases. --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6195b72..c3b541d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,7 +71,6 @@ jobs: - test_summary.py - test_translate.py - test_validate.py - - test_vision.py - test_web.py steps: - uses: actions/checkout@v4 From c3c25410dc4ee7b085dc663d6d16b52064873c30 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 08:37:36 -0700 Subject: [PATCH 52/53] fix: formatting for ruff. --- jigsawstack/async_request.py | 4 +- jigsawstack/embedding.py | 8 +--- jigsawstack/embedding_v2.py | 4 +- jigsawstack/helpers.py | 8 +--- jigsawstack/image_generation.py | 8 +--- jigsawstack/prompt_engine.py | 16 ++----- jigsawstack/request.py | 5 +-- jigsawstack/search.py | 4 +- jigsawstack/store.py | 4 +- jigsawstack/vision.py | 8 +--- jigsawstack/web.py | 12 ++--- tests/test_audio.py | 79 +++++++++------------------------ tests/test_embedding.py | 8 ++-- tests/test_file_store.py | 18 +++++--- tests/test_image_generation.py | 62 +++++++++++--------------- tests/test_object_detection.py | 8 ++-- tests/test_prediction.py | 9 +--- tests/test_sentiment.py | 8 +--- tests/test_summary.py | 8 +--- tests/test_translate.py | 4 +- tests/test_validate.py | 12 ++--- 21 files changed, 98 insertions(+), 199 deletions(-) diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index ee8a802..26a7e53 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -250,9 +250,7 @@ async def make_request( form_data.add_field( "file", BytesIO(data), - content_type=headers.get( - "Content-Type", "application/octet-stream" - ), + content_type=headers.get("Content-Type", "application/octet-stream"), filename="file", ) diff --git a/jigsawstack/embedding.py b/jigsawstack/embedding.py index 4957cde..cd755f0 100644 --- a/jigsawstack/embedding.py +++ b/jigsawstack/embedding.py @@ -47,9 +47,7 @@ def __init__( @overload def execute(self, params: EmbeddingParams) -> EmbeddingResponse: ... @overload - def execute( - self, blob: bytes, options: EmbeddingParams = None - ) -> EmbeddingResponse: ... + def execute(self, blob: bytes, options: EmbeddingParams = None) -> EmbeddingResponse: ... def execute( self, @@ -101,9 +99,7 @@ def __init__( @overload async def execute(self, params: EmbeddingParams) -> EmbeddingResponse: ... @overload - async def execute( - self, blob: bytes, options: EmbeddingParams = None - ) -> EmbeddingResponse: ... + async def execute(self, blob: bytes, options: EmbeddingParams = None) -> EmbeddingResponse: ... async def execute( self, diff --git a/jigsawstack/embedding_v2.py b/jigsawstack/embedding_v2.py index e944ee4..fe62f69 100644 --- a/jigsawstack/embedding_v2.py +++ b/jigsawstack/embedding_v2.py @@ -45,9 +45,7 @@ def __init__( @overload def execute(self, params: EmbeddingV2Params) -> EmbeddingV2Response: ... @overload - def execute( - self, blob: bytes, options: EmbeddingV2Params = None - ) -> EmbeddingV2Response: ... + def execute(self, blob: bytes, options: EmbeddingV2Params = None) -> EmbeddingV2Response: ... def execute( self, diff --git a/jigsawstack/helpers.py b/jigsawstack/helpers.py index 1854410..5c1ad6a 100644 --- a/jigsawstack/helpers.py +++ b/jigsawstack/helpers.py @@ -2,9 +2,7 @@ from urllib.parse import urlencode -def build_path( - base_path: str, params: Optional[Dict[str, Union[str, int, bool]]] = None -) -> str: +def build_path(base_path: str, params: Optional[Dict[str, Union[str, int, bool]]] = None) -> str: """ Build an API endpoint path with query parameters. @@ -20,9 +18,7 @@ def build_path( # remove None values from the parameters filtered_params = { - k: str(v).lower() if isinstance(v, bool) else v - for k, v in params.items() - if v is not None + k: str(v).lower() if isinstance(v, bool) else v for k, v in params.items() if v is not None } # encode the parameters diff --git a/jigsawstack/image_generation.py b/jigsawstack/image_generation.py index 525b653..9584cf3 100644 --- a/jigsawstack/image_generation.py +++ b/jigsawstack/image_generation.py @@ -92,9 +92,7 @@ def __init__( api_url: str, disable_request_logging: Union[bool, None] = False, ): - super().__init__( - api_key, api_url, disable_request_logging=disable_request_logging - ) + super().__init__(api_key, api_url, disable_request_logging=disable_request_logging) self.config = RequestConfig( api_url=api_url, api_key=api_key, @@ -123,9 +121,7 @@ def __init__( api_url: str, disable_request_logging: Union[bool, None] = False, ): - super().__init__( - api_key, api_url, disable_request_logging=disable_request_logging - ) + super().__init__(api_key, api_url, disable_request_logging=disable_request_logging) self.config = RequestConfig( api_url=api_url, api_key=api_key, diff --git a/jigsawstack/prompt_engine.py b/jigsawstack/prompt_engine.py index 6411c66..3af7fa3 100644 --- a/jigsawstack/prompt_engine.py +++ b/jigsawstack/prompt_engine.py @@ -119,14 +119,10 @@ def create(self, params: PromptEngineCreateParams) -> PromptEngineCreateResponse def get(self, id: str) -> PromptEngineGetResponse: path = f"/prompt_engine/{id}" - resp = Request( - config=self.config, path=path, params={}, verb="get" - ).perform_with_content() + resp = Request(config=self.config, path=path, params={}, verb="get").perform_with_content() return resp - def list( - self, params: Union[PromptEngineListParams, None] = None - ) -> PromptEngineListResponse: + def list(self, params: Union[PromptEngineListParams, None] = None) -> PromptEngineListResponse: if params is None: params = {} @@ -141,9 +137,7 @@ def list( base_path="/prompt_engine", params=params, ) - resp = Request( - config=self.config, path=path, params={}, verb="get" - ).perform_with_content() + resp = Request(config=self.config, path=path, params={}, verb="get").perform_with_content() return resp def delete(self, id: str) -> PromptEngineDeleteResponse: @@ -219,9 +213,7 @@ def __init__( disable_request_logging=disable_request_logging, ) - async def create( - self, params: PromptEngineCreateParams - ) -> PromptEngineCreateResponse: + async def create(self, params: PromptEngineCreateParams) -> PromptEngineCreateResponse: path = "/prompt_engine" resp = await AsyncRequest( config=self.config, diff --git a/jigsawstack/request.py b/jigsawstack/request.py index ea373a6..c1967a4 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -91,10 +91,7 @@ def perform_file(self) -> Union[T, None]: # handle error in case there is a statusCode attr present # and status != 200 and response is a json. - if ( - "application/json" not in resp.headers["content-type"] - and resp.status_code != 200 - ): + if "application/json" not in resp.headers["content-type"] and resp.status_code != 200: raise_for_code_and_type( code=500, message="Failed to parse JigsawStack API response. Please try again.", diff --git a/jigsawstack/search.py b/jigsawstack/search.py index a607a1c..21b0187 100644 --- a/jigsawstack/search.py +++ b/jigsawstack/search.py @@ -319,9 +319,7 @@ async def search(self, params: SearchParams) -> SearchResponse: ).perform_with_content() return resp - async def suggestions( - self, params: SearchSuggestionsParams - ) -> SearchSuggestionsResponse: + async def suggestions(self, params: SearchSuggestionsParams) -> SearchSuggestionsResponse: query = params["query"] path = f"/web/search/suggest?query={query}" resp = await AsyncRequest( diff --git a/jigsawstack/store.py b/jigsawstack/store.py index 1fe5f33..0693f49 100644 --- a/jigsawstack/store.py +++ b/jigsawstack/store.py @@ -23,9 +23,7 @@ class FileUploadResponse(TypedDict): key: str url: str size: int - temp_public_url: NotRequired[ - str - ] # Optional, only if temp_public_url is set to True in params + temp_public_url: NotRequired[str] # Optional, only if temp_public_url is set to True in params class Store(ClientConfig): diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index 452291b..6df4e37 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -218,9 +218,7 @@ def vocr( return resp @overload - def object_detection( - self, params: ObjectDetectionParams - ) -> ObjectDetectionResponse: ... + def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... @overload def object_detection( self, blob: bytes, options: ObjectDetectionParams = None @@ -307,9 +305,7 @@ async def vocr( return resp @overload - async def object_detection( - self, params: ObjectDetectionParams - ) -> ObjectDetectionResponse: ... + async def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... @overload async def object_detection( self, blob: bytes, options: ObjectDetectionParams = None diff --git a/jigsawstack/web.py b/jigsawstack/web.py index 5ebf206..5d400c3 100644 --- a/jigsawstack/web.py +++ b/jigsawstack/web.py @@ -20,9 +20,7 @@ class GotoOptions(TypedDict): timeout: NotRequired[int] - wait_until: NotRequired[ - Literal["load", "domcontentloaded", "networkidle0", "networkidle2"] - ] + wait_until: NotRequired[Literal["load", "domcontentloaded", "networkidle0", "networkidle2"]] # @@ -257,9 +255,7 @@ def search(self, params: SearchParams) -> SearchResponse: ) return s.search(params) - def search_suggestions( - self, params: SearchSuggestionsParams - ) -> SearchSuggestionsResponse: + def search_suggestions(self, params: SearchSuggestionsParams) -> SearchSuggestionsResponse: s = Search( self.api_key, self.api_url, @@ -309,9 +305,7 @@ async def ai_scrape(self, params: AIScrapeParams) -> AIScrapeResponse: async def html_to_any(self, params: HTMLToAnyURLParams) -> HTMLToAnyURLResponse: ... @overload - async def html_to_any( - self, params: HTMLToAnyBinaryParams - ) -> HTMLToAnyBinaryResponse: ... + async def html_to_any(self, params: HTMLToAnyBinaryParams) -> HTMLToAnyBinaryResponse: ... async def html_to_any( self, params: HTMLToAnyParams diff --git a/tests/test_audio.py b/tests/test_audio.py index 3ba499e..037f285 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -22,36 +22,25 @@ TEST_CASES = [ { "name": "with_url_only", - "params": { - "url": AUDIO_URL - }, + "params": {"url": AUDIO_URL}, "blob": None, "options": None, }, { "name": "with_url_and_language", - "params": { - "url": AUDIO_URL, - "language": "en" - }, + "params": {"url": AUDIO_URL, "language": "en"}, "blob": None, "options": None, }, { "name": "with_url_auto_detect_language", - "params": { - "url": AUDIO_URL, - "language": "auto" - }, + "params": {"url": AUDIO_URL, "language": "auto"}, "blob": None, "options": None, }, { "name": "with_url_and_translate", - "params": { - "url": AUDIO_URL, - "translate": True - }, + "params": {"url": AUDIO_URL, "translate": True}, "blob": None, "options": None, }, @@ -65,43 +54,29 @@ "name": "with_blob_and_language", "params": None, "blob": AUDIO_URL, - "options": { - "language": "en" - }, + "options": {"language": "en"}, }, { "name": "with_blob_auto_detect", "params": None, "blob": AUDIO_URL, - "options": { - "language": "auto" - }, + "options": {"language": "auto"}, }, { "name": "with_blob_and_translate", "params": None, "blob": AUDIO_URL, - "options": { - "translate": True, - "language": "en" - }, + "options": {"translate": True, "language": "en"}, }, { "name": "with_by_speaker", - "params": { - "url": AUDIO_URL_LONG, - "by_speaker": True - }, + "params": {"url": AUDIO_URL_LONG, "by_speaker": True}, "blob": None, "options": None, }, { "name": "with_chunk_settings", - "params": { - "url": AUDIO_URL, - "batch_size": 5, - "chunk_duration": 15 - }, + "params": {"url": AUDIO_URL, "batch_size": 5, "chunk_duration": 15}, "blob": None, "options": None, }, @@ -114,7 +89,7 @@ "translate": False, "by_speaker": True, "batch_size": 10, - "chunk_duration": 15 + "chunk_duration": 15, }, }, ] @@ -123,10 +98,7 @@ WEBHOOK_TEST_CASES = [ { "name": "with_webhook_url", - "params": { - "url": AUDIO_URL, - "webhook_url": "https://webhook.site/test-webhook" - }, + "params": {"url": AUDIO_URL, "webhook_url": "https://webhook.site/test-webhook"}, "blob": None, "options": None, }, @@ -134,10 +106,7 @@ "name": "with_blob_and_webhook", "params": None, "blob": AUDIO_URL, - "options": { - "webhook_url": "https://webhook.site/test-webhook", - "language": "en" - }, + "options": {"webhook_url": "https://webhook.site/test-webhook", "language": "en"}, }, ] @@ -152,10 +121,7 @@ def test_speech_to_text(self, test_case): if test_case.get("blob"): # Download audio content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.audio.speech_to_text( - blob_content, - test_case.get("options", {}) - ) + result = jigsaw.audio.speech_to_text(blob_content, test_case.get("options", {})) else: # Use params directly result = jigsaw.audio.speech_to_text(test_case["params"]) @@ -174,17 +140,16 @@ def test_speech_to_text(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - @pytest.mark.parametrize("test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES]) + @pytest.mark.parametrize( + "test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES] + ) def test_speech_to_text_webhook(self, test_case): """Test synchronous speech-to-text with webhook""" try: if test_case.get("blob"): # Download audio content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.audio.speech_to_text( - blob_content, - test_case.get("options", {}) - ) + result = jigsaw.audio.speech_to_text(blob_content, test_case.get("options", {})) else: # Use params directly result = jigsaw.audio.speech_to_text(test_case["params"]) @@ -208,8 +173,7 @@ async def test_speech_to_text_async(self, test_case): # Download audio content blob_content = requests.get(test_case["blob"]).content result = await async_jigsaw.audio.speech_to_text( - blob_content, - test_case.get("options", {}) + blob_content, test_case.get("options", {}) ) else: # Use params directly @@ -229,7 +193,9 @@ async def test_speech_to_text_async(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") - @pytest.mark.parametrize("test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES]) + @pytest.mark.parametrize( + "test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES] + ) @pytest.mark.asyncio async def test_speech_to_text_webhook_async(self, test_case): """Test asynchronous speech-to-text with webhook""" @@ -238,8 +204,7 @@ async def test_speech_to_text_webhook_async(self, test_case): # Download audio content blob_content = requests.get(test_case["blob"]).content result = await async_jigsaw.audio.speech_to_text( - blob_content, - test_case.get("options", {}) + blob_content, test_case.get("options", {}) ) else: # Use params directly diff --git a/tests/test_embedding.py b/tests/test_embedding.py index 4464ae4..7b6b368 100644 --- a/tests/test_embedding.py +++ b/tests/test_embedding.py @@ -16,12 +16,12 @@ jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -SAMPLE_TEXT = "The quick brown fox jumps over the lazy dog. This is a sample text for embedding generation." +SAMPLE_TEXT = ( + "The quick brown fox jumps over the lazy dog. This is a sample text for embedding generation." +) SAMPLE_IMAGE_URL = "https://images.unsplash.com/photo-1542931287-023b922fa89b?q=80&w=2574&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" SAMPLE_AUDIO_URL = "https://jigsawstack.com/preview/stt-example.wav" -SAMPLE_PDF_URL = ( - "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" -) +SAMPLE_PDF_URL = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" # Test cases for Embedding V1 EMBEDDING_V1_TEST_CASES = [ diff --git a/tests/test_file_store.py b/tests/test_file_store.py index 8cee658..97d07dd 100644 --- a/tests/test_file_store.py +++ b/tests/test_file_store.py @@ -18,7 +18,9 @@ TEXT_FILE_CONTENT = b"This is a test file content for JigsawStack storage" JSON_FILE_CONTENT = b'{"test": "data", "key": "value"}' -BINARY_FILE_CONTENT = requests.get("https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg").content +BINARY_FILE_CONTENT = requests.get( + "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" +).content TEST_CASES_UPLOAD = [ { @@ -60,7 +62,9 @@ class TestFileStoreSync: uploaded_keys = [] # Track uploaded files for cleanup - @pytest.mark.parametrize("test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD]) + @pytest.mark.parametrize( + "test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD] + ) def test_file_upload(self, test_case): """Test synchronous file upload with various options""" try: @@ -87,8 +91,7 @@ def test_file_get(self): test_key = f"test-get-{uuid.uuid4().hex[:8]}.txt" try: upload_result = jigsaw.store.upload( - TEXT_FILE_CONTENT, - {"key": test_key, "content_type": "text/plain"} + TEXT_FILE_CONTENT, {"key": test_key, "content_type": "text/plain"} ) # Now retrieve it @@ -108,7 +111,9 @@ class TestFileStoreAsync: uploaded_keys = [] # Track uploaded files for cleanup - @pytest.mark.parametrize("test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD]) + @pytest.mark.parametrize( + "test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD] + ) @pytest.mark.asyncio async def test_file_upload_async(self, test_case): """Test asynchronous file upload with various options""" @@ -137,8 +142,7 @@ async def test_file_get_async(self): test_key = f"test-async-get-{uuid.uuid4().hex[:8]}.txt" try: upload_result = await async_jigsaw.store.upload( - TEXT_FILE_CONTENT, - {"key": test_key, "content_type": "text/plain"} + TEXT_FILE_CONTENT, {"key": test_key, "content_type": "text/plain"} ) # Now retrieve it diff --git a/tests/test_image_generation.py b/tests/test_image_generation.py index 1a4342e..6b982ba 100644 --- a/tests/test_image_generation.py +++ b/tests/test_image_generation.py @@ -16,11 +16,10 @@ async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) IMAGE_URL = "https://images.unsplash.com/photo-1494588024300-e9df7ff98d78?q=80&w=1284&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" -FILE_STORE_KEY = jigsaw.store.upload(requests.get(IMAGE_URL).content, { - "filename": "test_image.jpg", - "content_type": "image/jpeg", - "overwrite": True - }) +FILE_STORE_KEY = jigsaw.store.upload( + requests.get(IMAGE_URL).content, + {"filename": "test_image.jpg", "content_type": "image/jpeg", "overwrite": True}, +) TEST_CASES = [ { @@ -33,23 +32,16 @@ "name": "with_aspect_ratio", "params": { "prompt": "A serene lake with mountains in the background", - "aspect_ratio": "16:9" + "aspect_ratio": "16:9", }, }, { "name": "with_custom_dimensions", - "params": { - "prompt": "A futuristic city skyline", - "width": 1024, - "height": 768 - }, + "params": {"prompt": "A futuristic city skyline", "width": 1024, "height": 768}, }, { "name": "with_output_format_png", - "params": { - "prompt": "A colorful abstract painting", - "output_format": "png" - }, + "params": {"prompt": "A colorful abstract painting", "output_format": "png"}, }, { "name": "with_advanced_config", @@ -58,8 +50,8 @@ "advance_config": { "negative_prompt": "blurry, low quality, distorted", "guidance": 7, - "seed": 42 - } + "seed": 42, + }, }, }, { @@ -68,22 +60,16 @@ "prompt": "A detailed botanical illustration", "steps": 30, "aspect_ratio": "3:4", - "return_type": "base64" + "return_type": "base64", }, }, { "name": "with_return_type_url", - "params": { - "prompt": "A vintage car on a desert road", - "return_type": "url" - }, + "params": {"prompt": "A vintage car on a desert road", "return_type": "url"}, }, { "name": "with_return_type_base64", - "params": { - "prompt": "A fantasy castle on a hill", - "return_type": "base64" - } + "params": {"prompt": "A fantasy castle on a hill", "return_type": "base64"}, }, { "name": "with_all_options", @@ -95,9 +81,9 @@ "advance_config": { "negative_prompt": "simple, plain, boring", "guidance": 8, - "seed": 12345 + "seed": 12345, }, - "return_type": "base64" + "return_type": "base64", }, }, ] @@ -109,7 +95,7 @@ "params": { "prompt": "Add snow effects to this image", "url": IMAGE_URL, - "return_type": "base64" + "return_type": "base64", }, }, { @@ -118,7 +104,7 @@ "prompt": "Apply a cyberpunk style to this image", "file_store_key": FILE_STORE_KEY, }, - } + }, ] @@ -139,7 +125,6 @@ def test_image_generation(self, test_case): assert result is not None if type(result) is dict: - # Check for image data based on return_type if test_case["params"].get("return_type") == "url": assert result.get("url") is not None @@ -156,11 +141,14 @@ def test_image_generation(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - @pytest.mark.parametrize("test_case", IMAGE_TO_IMAGE_TEST_CASES[:1], ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]]) + @pytest.mark.parametrize( + "test_case", + IMAGE_TO_IMAGE_TEST_CASES[:1], + ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]], + ) def test_image_to_image_generation(self, test_case): """Test image-to-image generation with URL input""" try: - result = jigsaw.image_generation(test_case["params"]) print(f"Test {test_case['name']}: Generated image from input") @@ -192,7 +180,7 @@ async def test_image_generation_async(self, test_case): # Check response structure assert result is not None if type(result) is dict: - # Check for image data based on return_type + # Check for image data based on return_type if test_case["params"].get("return_type") == "url": assert result.get("url") is not None assert requests.get(result["url"]).status_code == 200 @@ -209,7 +197,11 @@ async def test_image_generation_async(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") - @pytest.mark.parametrize("test_case", IMAGE_TO_IMAGE_TEST_CASES[:1], ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]]) + @pytest.mark.parametrize( + "test_case", + IMAGE_TO_IMAGE_TEST_CASES[:1], + ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]], + ) @pytest.mark.asyncio async def test_image_to_image_generation_async(self, test_case): """Test asynchronous image-to-image generation with URL input""" diff --git a/tests/test_object_detection.py b/tests/test_object_detection.py index b6d8d78..1fbd5ca 100644 --- a/tests/test_object_detection.py +++ b/tests/test_object_detection.py @@ -18,7 +18,9 @@ jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -IMAGE_URL = "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" +IMAGE_URL = ( + "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" +) TEST_CASES = [ { @@ -102,9 +104,7 @@ def test_object_detection(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.vision.object_detection( - blob_content, test_case.get("options", {}) - ) + result = jigsaw.vision.object_detection(blob_content, test_case.get("options", {})) else: # Use params directly result = jigsaw.vision.object_detection(test_case["params"]) diff --git a/tests/test_prediction.py b/tests/test_prediction.py index f38b016..a87ccab 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -17,7 +17,6 @@ async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) - def generate_dates(start_date, num_days): dates = [] for i in range(num_days): @@ -48,9 +47,7 @@ def generate_dates(start_date, num_days): { "name": "seasonal_pattern", "params": { - "dataset": [ - {"date": dates[i], "value": 100 + (50 * (i % 7))} for i in range(21) - ], + "dataset": [{"date": dates[i], "value": 100 + (50 * (i % 7))} for i in range(21)], "steps": 7, }, }, @@ -64,9 +61,7 @@ def generate_dates(start_date, num_days): { "name": "large_dataset_prediction", "params": { - "dataset": [ - {"date": dates[i], "value": 1000 + (i * 20)} for i in range(30) - ], + "dataset": [{"date": dates[i], "value": 1000 + (i * 20)} for i in range(30)], "steps": 10, }, }, diff --git a/tests/test_sentiment.py b/tests/test_sentiment.py index c184ec1..5bb5914 100644 --- a/tests/test_sentiment.py +++ b/tests/test_sentiment.py @@ -30,9 +30,7 @@ }, { "name": "neutral_sentiment_factual", - "params": { - "text": "The meeting is scheduled for 3 PM tomorrow in conference room B." - }, + "params": {"text": "The meeting is scheduled for 3 PM tomorrow in conference room B."}, }, { "name": "mixed_sentiment_paragraph", @@ -68,9 +66,7 @@ }, { "name": "question_sentiment", - "params": { - "text": "Why is this product so amazing? I can't believe how well it works!" - }, + "params": {"text": "Why is this product so amazing? I can't believe how well it works!"}, }, ] diff --git a/tests/test_summary.py b/tests/test_summary.py index e2fb763..ab79ea9 100644 --- a/tests/test_summary.py +++ b/tests/test_summary.py @@ -147,9 +147,7 @@ def test_summary(self, test_case): else: assert isinstance(result["summary"], str) if "max_characters" in test_case["params"]: - assert ( - len(result["summary"]) <= test_case["params"]["max_characters"] - ) + assert len(result["summary"]) <= test_case["params"]["max_characters"] except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") @@ -179,9 +177,7 @@ async def test_summary_async(self, test_case): else: assert isinstance(result["summary"], str) if "max_characters" in test_case["params"]: - assert ( - len(result["summary"]) <= test_case["params"]["max_characters"] - ) + assert len(result["summary"]) <= test_case["params"]["max_characters"] except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") diff --git a/tests/test_translate.py b/tests/test_translate.py index f556fca..5b560be 100644 --- a/tests/test_translate.py +++ b/tests/test_translate.py @@ -193,9 +193,7 @@ def test_translate_image(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.translate.image( - blob_content, test_case.get("options", {}) - ) + result = jigsaw.translate.image(blob_content, test_case.get("options", {})) else: # Use params directly result = jigsaw.translate.image(test_case["params"]) diff --git a/tests/test_validate.py b/tests/test_validate.py index c1c0311..d0d2c43 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -17,9 +17,7 @@ async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) # Sample URLs for NSFW testing -SAFE_IMAGE_URL = ( - "https://images.unsplash.com/photo-1506905925346-21bda4d32df4?q=80&w=2070" -) +SAFE_IMAGE_URL = "https://images.unsplash.com/photo-1506905925346-21bda4d32df4?q=80&w=2070" POTENTIALLY_NSFW_URL = "https://images.unsplash.com/photo-1512310604669-443f26c35f52?q=80&w=868&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" SPAM_CHECK_TEST_CASES = [ @@ -75,9 +73,7 @@ }, { "name": "mixed_correct_and_incorrect", - "params": { - "text": "The weather is beatiful today, but tommorow might be diferent." - }, + "params": {"text": "The weather is beatiful today, but tommorow might be diferent."}, }, { "name": "technical_text", @@ -429,9 +425,7 @@ async def test_nsfw_check_blob_async(self, test_case): try: # Download blob content blob_content = requests.get(test_case["blob_url"]).content - result = await async_jigsaw.validate.nsfw( - blob_content, test_case["options"] - ) + result = await async_jigsaw.validate.nsfw(blob_content, test_case["options"]) assert result["success"] assert "nsfw" in result From ba56ab1b30b500f45533870a76f02472773f80f1 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 09:03:57 -0700 Subject: [PATCH 53/53] feat: seperating test cases for deepresearch and ai_scrape. --- .github/workflows/ci.yml | 36 ++----- tests/test_ai_scrape.py | 141 +++++++++++++++++++++++++ tests/test_deep_research.py | 95 +++++++++++++++++ tests/test_web.py | 205 ------------------------------------ 4 files changed, 242 insertions(+), 235 deletions(-) create mode 100644 tests/test_ai_scrape.py create mode 100644 tests/test_deep_research.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c3b541d..b1f5b26 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,35 +8,8 @@ on: jobs: ruff-format-check: - name: Ruff Format Check - ${{ matrix.file }} + name: Ruff Format Check runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - file: - - __init__.py - - _config.py - - _types.py - - async_request.py - - audio.py - - classification.py - - embedding_v2.py - - embedding.py - - exceptions.py - - helpers.py - - image_generation.py - - prediction.py - - prompt_engine.py - - request.py - - search.py - - sentiment.py - - sql.py - - store.py - - summary.py - - translate.py - - validate.py - - vision.py - - web.py steps: - uses: actions/checkout@v4 @@ -48,9 +21,10 @@ jobs: - name: Install ruff run: pip install ruff - - name: Check formatting for ${{ matrix.file }} + - name: Check all files with ruff run: | - ruff check jigsawstack/${{ matrix.file }} --config .github/ruff.toml + ruff check jigsawstack/ --config .github/ruff.toml + ruff format --check jigsawstack/ --config .github/ruff.toml test: name: Test - ${{ matrix.test-file }} @@ -72,6 +46,8 @@ jobs: - test_translate.py - test_validate.py - test_web.py + - test_deep_research.py + - test_ai_scrape.py steps: - uses: actions/checkout@v4 diff --git a/tests/test_ai_scrape.py b/tests/test_ai_scrape.py new file mode 100644 index 0000000..4c30b33 --- /dev/null +++ b/tests/test_ai_scrape.py @@ -0,0 +1,141 @@ +import logging +import os + +import pytest +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) + +URL = "https://jigsawstack.com" + +# AI Scrape Test Cases +AI_SCRAPE_TEST_CASES = [ + { + "name": "scrape_with_element_prompts", + "params": { + "url": URL, + "element_prompts": ["title", "main content", "navigation links"], + }, + }, + { + "name": "scrape_with_selectors", + "params": { + "url": URL, + "selectors": ["h1", "p", "a"], + }, + }, + { + "name": "scrape_with_features", + "params": { + "url": URL, + "element_prompts": ["title"], + "features": ["meta", "link"], + }, + }, + { + "name": "scrape_with_root_element", + "params": { + "url": URL, + "element_prompts": ["content"], + "root_element_selector": "main", + }, + }, + { + "name": "scrape_with_wait_for_timeout", + "params": { + "url": URL, + "element_prompts": ["content"], + "wait_for": {"mode": "timeout", "value": 3000}, + }, + }, + { + "name": "scrape_mobile_view", + "params": { + "url": URL, + "element_prompts": ["mobile menu"], + "is_mobile": True, + }, + }, + { + "name": "scrape_with_cookies", + "params": { + "url": URL, + "element_prompts": ["user data"], + "cookies": [{"name": "session", "value": "test123", "domain": "example.com"}], + }, + }, + { + "name": "scrape_with_advance_config", + "params": { + "url": URL, + "element_prompts": ["content"], + "advance_config": {"console": True, "network": True, "cookies": True}, + }, + }, +] + + +class TestAIScrapeSync: + """Test synchronous AI scrape methods""" + + @pytest.mark.parametrize( + "test_case", + AI_SCRAPE_TEST_CASES, + ids=[tc["name"] for tc in AI_SCRAPE_TEST_CASES], + ) + def test_ai_scrape(self, test_case): + """Test synchronous AI scrape with various inputs""" + try: + result = jigsaw.web.ai_scrape(test_case["params"]) + + assert result["success"] + assert "data" in result + assert isinstance(result["data"], list) + + # Check for optional features + if "meta" in test_case["params"].get("features", []): + assert "meta" in result + if "link" in test_case["params"].get("features", []): + assert "link" in result + assert isinstance(result["link"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestAIScrapeAsync: + """Test asynchronous AI scrape methods""" + + @pytest.mark.parametrize( + "test_case", + AI_SCRAPE_TEST_CASES, + ids=[tc["name"] for tc in AI_SCRAPE_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_ai_scrape_async(self, test_case): + """Test asynchronous AI scrape with various inputs""" + try: + result = await async_jigsaw.web.ai_scrape(test_case["params"]) + + assert result["success"] + assert "data" in result + assert isinstance(result["data"], list) + + # Check for optional features + if "meta" in test_case["params"].get("features", []): + assert "meta" in result + if "link" in test_case["params"].get("features", []): + assert "link" in result + assert isinstance(result["link"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") diff --git a/tests/test_deep_research.py b/tests/test_deep_research.py new file mode 100644 index 0000000..3d584ab --- /dev/null +++ b/tests/test_deep_research.py @@ -0,0 +1,95 @@ +import logging +import os + +import pytest +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) + +URL = "https://jigsawstack.com" + + +# Deep Research Test Cases +DEEP_RESEARCH_TEST_CASES = [ + { + "name": "basic_deep_research", + "params": { + "query": "climate change effects", + }, + }, + { + "name": "technical_deep_research", + "params": { + "query": "quantum computing applications in cryptography", + }, + }, + { + "name": "deep_research_with_depth", + "params": { + "query": "renewable energy sources", + "depth": 2, + }, + }, +] + + +class TestDeepResearchSync: + """Test synchronous deep research methods""" + + @pytest.mark.parametrize( + "test_case", + DEEP_RESEARCH_TEST_CASES, + ids=[tc["name"] for tc in DEEP_RESEARCH_TEST_CASES], + ) + def test_deep_research(self, test_case): + """Test synchronous deep research with various inputs""" + try: + result = jigsaw.web.deep_research(test_case["params"]) + + assert result["success"] + assert "results" in result + assert isinstance(result["results"], str) + assert len(result["results"]) > 0 + + # Check for sources + if "sources" in result: + assert isinstance(result["sources"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestDeepResearchAsync: + """Test asynchronous deep research methods""" + + @pytest.mark.parametrize( + "test_case", + DEEP_RESEARCH_TEST_CASES, + ids=[tc["name"] for tc in DEEP_RESEARCH_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_deep_research_async(self, test_case): + """Test asynchronous deep research with various inputs""" + try: + result = await async_jigsaw.web.deep_research(test_case["params"]) + + assert result["success"] + assert "results" in result + assert isinstance(result["results"], str) + assert len(result["results"]) > 0 + + # Check for sources + if "sources" in result: + assert isinstance(result["sources"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") diff --git a/tests/test_web.py b/tests/test_web.py index dda97c5..c22ccd7 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -15,75 +15,6 @@ URL = "https://jigsawstack.com" - -# AI Scrape Test Cases -AI_SCRAPE_TEST_CASES = [ - { - "name": "scrape_with_element_prompts", - "params": { - "url": URL, - "element_prompts": ["title", "main content", "navigation links"], - }, - }, - { - "name": "scrape_with_selectors", - "params": { - "url": URL, - "selectors": ["h1", "p", "a"], - }, - }, - { - "name": "scrape_with_features", - "params": { - "url": URL, - "element_prompts": ["title"], - "features": ["meta", "link"], - }, - }, - { - "name": "scrape_with_root_element", - "params": { - "url": URL, - "element_prompts": ["content"], - "root_element_selector": "main", - }, - }, - { - "name": "scrape_with_wait_for_timeout", - "params": { - "url": URL, - "element_prompts": ["content"], - "wait_for": {"mode": "timeout", "value": 3000}, - }, - }, - { - "name": "scrape_mobile_view", - "params": { - "url": URL, - "element_prompts": ["mobile menu"], - "is_mobile": True, - }, - }, - { - "name": "scrape_with_cookies", - "params": { - "url": URL, - "element_prompts": ["user data"], - "cookies": [ - {"name": "session", "value": "test123", "domain": "example.com"} - ], - }, - }, - { - "name": "scrape_with_advance_config", - "params": { - "url": URL, - "element_prompts": ["content"], - "advance_config": {"console": True, "network": True, "cookies": True}, - }, - }, -] - # HTML to Any Test Cases HTML_TO_ANY_TEST_CASES = [ { @@ -212,58 +143,6 @@ }, ] -# Deep Research Test Cases -DEEP_RESEARCH_TEST_CASES = [ - { - "name": "basic_deep_research", - "params": { - "query": "climate change effects", - }, - }, - { - "name": "technical_deep_research", - "params": { - "query": "quantum computing applications in cryptography", - }, - }, - { - "name": "deep_research_with_depth", - "params": { - "query": "renewable energy sources", - "depth": 2, - }, - }, -] - - -class TestAIScrapeSync: - """Test synchronous AI scrape methods""" - - @pytest.mark.parametrize( - "test_case", - AI_SCRAPE_TEST_CASES, - ids=[tc["name"] for tc in AI_SCRAPE_TEST_CASES], - ) - def test_ai_scrape(self, test_case): - """Test synchronous AI scrape with various inputs""" - try: - result = jigsaw.web.ai_scrape(test_case["params"]) - - assert result["success"] - assert "data" in result - assert isinstance(result["data"], list) - - # Check for optional features - if "meta" in test_case["params"].get("features", []): - assert "meta" in result - if "link" in test_case["params"].get("features", []): - assert "link" in result - assert isinstance(result["link"], list) - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - class TestHTMLToAnySync: """Test synchronous HTML to Any methods""" @@ -348,65 +227,8 @@ def test_search_suggestions(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - -class TestDeepResearchSync: - """Test synchronous deep research methods""" - - @pytest.mark.parametrize( - "test_case", - DEEP_RESEARCH_TEST_CASES, - ids=[tc["name"] for tc in DEEP_RESEARCH_TEST_CASES], - ) - def test_deep_research(self, test_case): - """Test synchronous deep research with various inputs""" - try: - result = jigsaw.web.deep_research(test_case["params"]) - - assert result["success"] - assert "results" in result - assert isinstance(result["results"], str) - assert len(result["results"]) > 0 - - # Check for sources - if "sources" in result: - assert isinstance(result["sources"], list) - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - # Async Test Classes - -class TestAIScrapeAsync: - """Test asynchronous AI scrape methods""" - - @pytest.mark.parametrize( - "test_case", - AI_SCRAPE_TEST_CASES, - ids=[tc["name"] for tc in AI_SCRAPE_TEST_CASES], - ) - @pytest.mark.asyncio - async def test_ai_scrape_async(self, test_case): - """Test asynchronous AI scrape with various inputs""" - try: - result = await async_jigsaw.web.ai_scrape(test_case["params"]) - - assert result["success"] - assert "data" in result - assert isinstance(result["data"], list) - - # Check for optional features - if "meta" in test_case["params"].get("features", []): - assert "meta" in result - if "link" in test_case["params"].get("features", []): - assert "link" in result - assert isinstance(result["link"], list) - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - class TestHTMLToAnyAsync: """Test asynchronous HTML to Any methods""" @@ -493,30 +315,3 @@ async def test_search_suggestions_async(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - -class TestDeepResearchAsync: - """Test asynchronous deep research methods""" - - @pytest.mark.parametrize( - "test_case", - DEEP_RESEARCH_TEST_CASES, - ids=[tc["name"] for tc in DEEP_RESEARCH_TEST_CASES], - ) - @pytest.mark.asyncio - async def test_deep_research_async(self, test_case): - """Test asynchronous deep research with various inputs""" - try: - result = await async_jigsaw.web.deep_research(test_case["params"]) - - assert result["success"] - assert "results" in result - assert isinstance(result["results"], str) - assert len(result["results"]) > 0 - - # Check for sources - if "sources" in result: - assert isinstance(result["sources"], list) - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}")