diff --git a/backend/app/api/v1/routes.py b/backend/app/api/v1/routes.py index 6644e472..d6bf12b5 100644 --- a/backend/app/api/v1/routes.py +++ b/backend/app/api/v1/routes.py @@ -30,7 +30,7 @@ async def _run_agents_in_background(report_id: str, token_id: str): break # Exit the async for loop on failure @router.post("/report/generate", response_model=ReportResponse) -async def generate_report_endpoint(request: ReportRequest, background_tasks: BackgroundTasks, session: AsyncSession = Depends(get_session)): +async def generate_report_endpoint(request: ReportRequest, background_tasks: BackgroundTasks, session: AsyncSession = Depends(get_db)): api_logger.info(f"Received report generation request for token_id: {request.token_id}") report_repository = ReportRepository(session) report_response = await generate_report(request, report_repository) @@ -39,7 +39,7 @@ async def generate_report_endpoint(request: ReportRequest, background_tasks: Bac return report_response @router.get("/reports/{report_id}/status") -async def get_report_status_endpoint(report_id: str, session: AsyncSession = Depends(get_session)): +async def get_report_status_endpoint(report_id: str, session: AsyncSession = Depends(get_db)): api_logger.info(f"Received status request for report_id: {report_id}") report_repository = ReportRepository(session) report = await get_report_status(report_id, report_repository) @@ -49,7 +49,7 @@ async def get_report_status_endpoint(report_id: str, session: AsyncSession = Dep return {"report_id": report_id, "status": report["status"]} @router.get("/reports/{report_id}/data") -async def get_report_data_endpoint(report_id: str, session: AsyncSession = Depends(get_session)): +async def get_report_data_endpoint(report_id: str, session: AsyncSession = Depends(get_db)): api_logger.info(f"Received data request for report_id: {report_id}") report_repository = ReportRepository(session) report_result = await get_report_data(report_id, report_repository) diff --git a/backend/app/cache/__pycache__/redis_client.cpython-313.pyc b/backend/app/cache/__pycache__/redis_client.cpython-313.pyc new file mode 100644 index 00000000..9055f162 Binary files /dev/null and b/backend/app/cache/__pycache__/redis_client.cpython-313.pyc differ diff --git a/backend/app/core/__pycache__/config.cpython-313.pyc b/backend/app/core/__pycache__/config.cpython-313.pyc index b1120bb1..b469c64a 100644 Binary files a/backend/app/core/__pycache__/config.cpython-313.pyc and b/backend/app/core/__pycache__/config.cpython-313.pyc differ diff --git a/backend/app/db/database.py b/backend/app/db/database.py index 3e6181d6..54f8c9f5 100644 --- a/backend/app/db/database.py +++ b/backend/app/db/database.py @@ -1,6 +1,5 @@ from typing import AsyncGenerator from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker -from backend.app.db.base import Base from backend.app.core.config import settings SQLALCHEMY_DATABASE_URL = settings.DATABASE_URL diff --git a/backend/app/db/migrations/env.py b/backend/app/db/migrations/env.py index 27ed9773..5c36b3d1 100644 --- a/backend/app/db/migrations/env.py +++ b/backend/app/db/migrations/env.py @@ -1,7 +1,8 @@ +import os +import sys from logging.config import fileConfig -from sqlalchemy import engine_from_config -from sqlalchemy import pool +from sqlalchemy import engine_from_config, pool from alembic import context @@ -16,22 +17,9 @@ # add your model's MetaData object here # for 'autogenerate' support -import os, sys sys.path.append(os.getcwd()) from backend.app.db.base import Base -# this is the Alembic Config object, which provides -# access to the values within the .ini file in use. -config = context.config - -# Interpret the config file for Python logging. -# This line sets up loggers basically. -if config.config_file_name is not None: - fileConfig(config.config_file_name) - -# add your model's MetaData object here -# for 'autogenerate' support -# from myapp import mymodel target_metadata = Base.metadata # other values from the config, defined by the needs of env.py, diff --git a/backend/app/db/tests/test_connection.py b/backend/app/db/tests/test_connection.py index cf7a597d..734e667c 100644 --- a/backend/app/db/tests/test_connection.py +++ b/backend/app/db/tests/test_connection.py @@ -3,7 +3,6 @@ from dotenv import load_dotenv from backend.app.db.connection import DatabaseConnection, initialize_db_connection, close_db_connection import unittest.mock -import asyncpg # Load environment variables from .env file for testing load_dotenv() diff --git a/backend/app/security/__pycache__/__init__.cpython-313.pyc b/backend/app/security/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 00000000..ca306433 Binary files /dev/null and b/backend/app/security/__pycache__/__init__.cpython-313.pyc differ diff --git a/backend/app/security/__pycache__/rate_limiter.cpython-313.pyc b/backend/app/security/__pycache__/rate_limiter.cpython-313.pyc new file mode 100644 index 00000000..31cb40f0 Binary files /dev/null and b/backend/app/security/__pycache__/rate_limiter.cpython-313.pyc differ diff --git a/backend/app/services/agents/__pycache__/code_audit_agent.cpython-313.pyc b/backend/app/services/agents/__pycache__/code_audit_agent.cpython-313.pyc index d67c5fb6..481f61c2 100644 Binary files a/backend/app/services/agents/__pycache__/code_audit_agent.cpython-313.pyc and b/backend/app/services/agents/__pycache__/code_audit_agent.cpython-313.pyc differ diff --git a/backend/app/services/agents/code_audit_agent.py b/backend/app/services/agents/code_audit_agent.py index dfad7d1b..6396e645 100644 --- a/backend/app/services/agents/code_audit_agent.py +++ b/backend/app/services/agents/code_audit_agent.py @@ -1,14 +1,49 @@ import os import re import logging +import json +import hashlib from typing import Dict, Any, List import httpx from pydantic import BaseModel, Field import urllib.parse from backend.app.security.rate_limiter import rate_limiter +from backend.app.utils.cache_utils import cache_request logger = logging.getLogger(__name__) +def serialize_httpx_response(response: httpx.Response) -> str: + """Serializes an httpx.Response object to a JSON string.""" + return json.dumps({ + "status_code": response.status_code, + "headers": dict(response.headers), + "text": response.text, + }) + +def deserialize_httpx_response(data_str: str) -> httpx.Response: + """Deserializes a JSON string back into a mock httpx.Response object.""" + data = json.loads(data_str) + + class MockResponse: + def __init__(self, status_code, headers, text): + self.status_code = status_code + self.headers = headers + self.text = text + + def json(self): + # Attempt to parse text as JSON, raise JSONDecodeError if not valid JSON + return json.loads(self.text) + + def raise_for_status(self): + if self.status_code >= 400: + # Create a dummy request for the HTTPStatusError + request = httpx.Request("GET", "http://cached-response/error") + raise httpx.HTTPStatusError( + f"Bad response: {self.status_code}", request=request, response=self + ) + + return MockResponse(data["status_code"], data["headers"], data["text"]) + class CommitActivity(BaseModel): total: int weeks: List[Dict[str, int]] @@ -102,7 +137,13 @@ def parse_link_header(link_header_str: str, fallback_len: int) -> int: if not rate_limiter.check_rate_limit("code_audit_agent"): logger.warning("Rate limit exceeded for code_audit_agent (GitHub commits).") return repo_data - commits_resp = await self.client.get(f"{base_url}/commits?per_page=1", headers=headers) + commits_resp = await cache_request( + url=f"{base_url}/commits?per_page=1", + external_api_call=lambda: self.client.get(f"{base_url}/commits?per_page=1", headers=headers), + serializer=serialize_httpx_response, + deserializer=deserialize_httpx_response, + params={"token_hash": hashlib.sha256(self.github_token.encode()).hexdigest()[:8]} if self.github_token else {} + ) commits_resp.raise_for_status() link_header = commits_resp.headers.get('link') or commits_resp.headers.get('Link') repo_data['commits_count'] = parse_link_header(link_header, len(commits_resp.json())) @@ -111,7 +152,13 @@ def parse_link_header(link_header_str: str, fallback_len: int) -> int: if not rate_limiter.check_rate_limit("code_audit_agent"): logger.warning("Rate limit exceeded for code_audit_agent (GitHub contributors).") return repo_data - contributors_resp = await self.client.get(f"{base_url}/contributors?per_page=1", headers=headers) + contributors_resp = await cache_request( + url=f"{base_url}/contributors?per_page=1", + external_api_call=lambda: self.client.get(f"{base_url}/contributors?per_page=1", headers=headers), + serializer=serialize_httpx_response, + deserializer=deserialize_httpx_response, + params={"token_hash": hashlib.sha256(self.github_token.encode()).hexdigest()[:8]} if self.github_token else {} + ) contributors_resp.raise_for_status() link_header = contributors_resp.headers.get('link') or contributors_resp.headers.get('Link') repo_data['contributors_count'] = parse_link_header(link_header, len(contributors_resp.json())) @@ -120,7 +167,13 @@ def parse_link_header(link_header_str: str, fallback_len: int) -> int: if not rate_limiter.check_rate_limit("code_audit_agent"): logger.warning("Rate limit exceeded for code_audit_agent (GitHub releases).") return repo_data - releases_resp = await self.client.get(f"{base_url}/releases/latest", headers=headers) + releases_resp = await cache_request( + url=f"{base_url}/releases/latest", + external_api_call=lambda: self.client.get(f"{base_url}/releases/latest", headers=headers), + serializer=serialize_httpx_response, + deserializer=deserialize_httpx_response, + params={"token_hash": hashlib.sha256(self.github_token.encode()).hexdigest()[:8]} if self.github_token else {} + ) if releases_resp.status_code == 200: repo_data['latest_release'] = releases_resp.json().get('tag_name', 'N/A') else: @@ -132,7 +185,13 @@ def parse_link_header(link_header_str: str, fallback_len: int) -> int: return repo_data search_query = urllib.parse.quote_plus(f"repo:{owner}/{repo}+type:issue") search_issues_url = f"https://api.github.com/search/issues?q={search_query}&per_page=1" - issues_search_resp = await self.client.get(search_issues_url, headers=headers) + issues_search_resp = await cache_request( + url=search_issues_url, + external_api_call=lambda: self.client.get(search_issues_url, headers=headers), + serializer=serialize_httpx_response, + deserializer=deserialize_httpx_response, + params={"token_hash": hashlib.sha256(self.github_token.encode()).hexdigest()[:8]} if self.github_token else {} + ) issues_search_resp.raise_for_status() issues_search_data = issues_search_resp.json() repo_data['issues_count'] = issues_search_data.get('total_count', 0) @@ -141,7 +200,13 @@ def parse_link_header(link_header_str: str, fallback_len: int) -> int: if not rate_limiter.check_rate_limit("code_audit_agent"): logger.warning("Rate limit exceeded for code_audit_agent (GitHub pull requests).") return repo_data - pulls_resp = await self.client.get(f"{base_url}/pulls?state=all&per_page=1", headers=headers) + pulls_resp = await cache_request( + url=f"{base_url}/pulls?state=all&per_page=1", + external_api_call=lambda: self.client.get(f"{base_url}/pulls?state=all&per_page=1", headers=headers), + serializer=serialize_httpx_response, + deserializer=deserialize_httpx_response, + params={"token_hash": hashlib.sha256(self.github_token.encode()).hexdigest()[:8]} if self.github_token else {} + ) pulls_resp.raise_for_status() link_header = pulls_resp.headers.get('link') or pulls_resp.headers.get('Link') repo_data['pull_requests_count'] = parse_link_header(link_header, len(pulls_resp.json())) @@ -174,7 +239,13 @@ async def _fetch_gitlab_repo_data(self, project_id: str) -> Dict[str, Any]: if not rate_limiter.check_rate_limit("code_audit_agent"): logger.warning("Rate limit exceeded for code_audit_agent (GitLab commits).") return repo_data - commits_resp = await self.client.get(f"{base_url}/repository/commits?per_page=1", headers=headers) + commits_resp = await cache_request( + url=f"{base_url}/repository/commits?per_page=1", + external_api_call=lambda: self.client.get(f"{base_url}/repository/commits?per_page=1", headers=headers), + serializer=serialize_httpx_response, + deserializer=deserialize_httpx_response, + params={"token_hash": hashlib.sha256(self.gitlab_token.encode()).hexdigest()[:8]} if self.gitlab_token else {} + ) commits_resp.raise_for_status() repo_data['commits_count'] = int(commits_resp.headers.get('x-total', 0)) @@ -182,7 +253,13 @@ async def _fetch_gitlab_repo_data(self, project_id: str) -> Dict[str, Any]: if not rate_limiter.check_rate_limit("code_audit_agent"): logger.warning("Rate limit exceeded for code_audit_agent (GitLab contributors).") return repo_data - contributors_resp = await self.client.get(f"{base_url}/repository/contributors?per_page=1", headers=headers) + contributors_resp = await cache_request( + url=f"{base_url}/repository/contributors?per_page=1", + external_api_call=lambda: self.client.get(f"{base_url}/repository/contributors?per_page=1", headers=headers), + serializer=serialize_httpx_response, + deserializer=deserialize_httpx_response, + params={"token_hash": hashlib.sha256(self.gitlab_token.encode()).hexdigest()[:8]} if self.gitlab_token else {} + ) contributors_resp.raise_for_status() repo_data['contributors_count'] = int(contributors_resp.headers.get('x-total', 0)) @@ -190,7 +267,13 @@ async def _fetch_gitlab_repo_data(self, project_id: str) -> Dict[str, Any]: if not rate_limiter.check_rate_limit("code_audit_agent"): logger.warning("Rate limit exceeded for code_audit_agent (GitLab tags).") return repo_data - tags_resp = await self.client.get(f"{base_url}/repository/tags?per_page=1", headers=headers) + tags_resp = await cache_request( + url=f"{base_url}/repository/tags?per_page=1", + external_api_call=lambda: self.client.get(f"{base_url}/repository/tags?per_page=1", headers=headers), + serializer=serialize_httpx_response, + deserializer=deserialize_httpx_response, + params={"token_hash": hashlib.sha256(self.gitlab_token.encode()).hexdigest()[:8]} if self.gitlab_token else {} + ) if tags_resp.status_code == 200 and tags_resp.json(): repo_data['latest_release'] = tags_resp.json()[0].get('name', 'N/A') else: @@ -200,7 +283,13 @@ async def _fetch_gitlab_repo_data(self, project_id: str) -> Dict[str, Any]: if not rate_limiter.check_rate_limit("code_audit_agent"): logger.warning("Rate limit exceeded for code_audit_agent (GitLab issues).") return repo_data - issues_resp = await self.client.get(f"{base_url}/issues?scope=all&per_page=1", headers=headers) + issues_resp = await cache_request( + url=f"{base_url}/issues?scope=all&per_page=1", + external_api_call=lambda: self.client.get(f"{base_url}/issues?scope=all&per_page=1", headers=headers), + serializer=serialize_httpx_response, + deserializer=deserialize_httpx_response, + params={"token_hash": hashlib.sha256(self.gitlab_token.encode()).hexdigest()[:8]} if self.gitlab_token else {} + ) issues_resp.raise_for_status() repo_data['issues_count'] = int(issues_resp.headers.get('x-total', 0)) @@ -208,7 +297,13 @@ async def _fetch_gitlab_repo_data(self, project_id: str) -> Dict[str, Any]: if not rate_limiter.check_rate_limit("code_audit_agent"): logger.warning("Rate limit exceeded for code_audit_agent (GitLab merge requests).") return repo_data - merge_requests_resp = await self.client.get(f"{base_url}/merge_requests?scope=all&per_page=1", headers=headers) + merge_requests_resp = await cache_request( + url=f"{base_url}/merge_requests?scope=all&per_page=1", + external_api_call=lambda: self.client.get(f"{base_url}/merge_requests?scope=all&per_page=1", headers=headers), + serializer=serialize_httpx_response, + deserializer=deserialize_httpx_response, + params={"token_hash": hashlib.sha256(self.gitlab_token.encode()).hexdigest()[:8]} if self.gitlab_token else {} + ) merge_requests_resp.raise_for_status() repo_data['pull_requests_count'] = int(merge_requests_resp.headers.get('x-total', 0)) diff --git a/backend/app/services/agents/tests/__pycache__/test_code_audit_agent.cpython-313-pytest-8.2.0.pyc b/backend/app/services/agents/tests/__pycache__/test_code_audit_agent.cpython-313-pytest-8.2.0.pyc new file mode 100644 index 00000000..42adac94 Binary files /dev/null and b/backend/app/services/agents/tests/__pycache__/test_code_audit_agent.cpython-313-pytest-8.2.0.pyc differ diff --git a/backend/app/services/agents/tests/test_code_audit_agent.py b/backend/app/services/agents/tests/test_code_audit_agent.py index 563a895b..65836825 100644 --- a/backend/app/services/agents/tests/test_code_audit_agent.py +++ b/backend/app/services/agents/tests/test_code_audit_agent.py @@ -2,7 +2,7 @@ import pytest_asyncio import respx from httpx import Response, Request, RequestError -from backend.app.services.agents.code_audit_agent import CodeAuditAgent, CodeMetrics, AuditSummary, CodeAuditResult +from backend.app.services.agents.code_audit_agent import CodeAuditAgent, CodeMetrics, AuditSummary @pytest_asyncio.fixture diff --git a/backend/app/services/nlg/__init__.py b/backend/app/services/nlg/__init__.py index d87d725a..e69de29b 100644 --- a/backend/app/services/nlg/__init__.py +++ b/backend/app/services/nlg/__init__.py @@ -1,2 +0,0 @@ -from .nlg_engine import NLGEngine -from .report_nlg_engine import ReportNLGEngine diff --git a/backend/app/services/nlg/tests/test_nlg_engine.py b/backend/app/services/nlg/tests/test_nlg_engine.py index 29be220b..4f4de190 100644 --- a/backend/app/services/nlg/tests/test_nlg_engine.py +++ b/backend/app/services/nlg/tests/test_nlg_engine.py @@ -2,7 +2,6 @@ import json from unittest.mock import AsyncMock, patch from backend.app.services.nlg.nlg_engine import NLGEngine -from backend.app.services.nlg.llm_client import LLMClient from backend.app.services.nlg.prompt_templates import get_template, fill_template # Concrete implementation for testing purposes diff --git a/backend/app/services/nlg/tests/test_report_nlg_engine.py b/backend/app/services/nlg/tests/test_report_nlg_engine.py index 9cd2a133..1e680247 100644 --- a/backend/app/services/nlg/tests/test_report_nlg_engine.py +++ b/backend/app/services/nlg/tests/test_report_nlg_engine.py @@ -2,7 +2,6 @@ import json from unittest.mock import AsyncMock, patch from backend.app.services.nlg.report_nlg_engine import ReportNLGEngine -from backend.app.services.nlg.llm_client import LLMClient from backend.app.services.nlg.prompt_templates import get_template, fill_template # Mock the LLMClient for all tests in this module diff --git a/backend/app/services/report_processor.py b/backend/app/services/report_processor.py index 73c2715f..696cde0b 100644 --- a/backend/app/services/report_processor.py +++ b/backend/app/services/report_processor.py @@ -4,7 +4,7 @@ from backend.app.services.agents.price_agent import run as price_agent_run from backend.app.services.agents.trend_agent import run as trend_agent_run from backend.app.services.agents.volume_agent import run as volume_agent_run -from backend.app.core.storage import save_report_data, set_report_status, try_set_processing +from backend.app.core.storage import try_set_processing from backend.app.services.nlg.report_nlg_engine import ReportNLGEngine from backend.app.services.summary.report_summary_engine import ReportSummaryEngine from backend.app.db.repositories.report_repository import ReportRepository diff --git a/backend/app/services/summary/__init__.py b/backend/app/services/summary/__init__.py index b9da1063..e69de29b 100644 --- a/backend/app/services/summary/__init__.py +++ b/backend/app/services/summary/__init__.py @@ -1,2 +0,0 @@ -from .summary_engine import SummaryEngine -from .report_summary_engine import ReportSummaryEngine diff --git a/backend/app/services/summary/summary_engine.py b/backend/app/services/summary/summary_engine.py index 67e0841d..d945082c 100644 --- a/backend/app/services/summary/summary_engine.py +++ b/backend/app/services/summary/summary_engine.py @@ -3,7 +3,7 @@ """ from abc import ABC, abstractmethod -from typing import Any, Dict, List +from typing import Any, Dict class SummaryEngine(ABC): """ diff --git a/backend/app/services/validation/tests/test_validation_engine.py b/backend/app/services/validation/tests/test_validation_engine.py index d75f5728..0835030f 100644 --- a/backend/app/services/validation/tests/test_validation_engine.py +++ b/backend/app/services/validation/tests/test_validation_engine.py @@ -1,4 +1,3 @@ -import pytest from backend.app.services.validation.validation_engine import perform_cross_source_checks, normalize_missing def test_normalize_missing(): diff --git a/backend/app/services/validation/validation_engine.py b/backend/app/services/validation/validation_engine.py index 99c4e1b1..03733f3d 100644 --- a/backend/app/services/validation/validation_engine.py +++ b/backend/app/services/validation/validation_engine.py @@ -2,7 +2,6 @@ Validation engine for ensuring data quality and consistency before NLG and summary generation. """ -import re from typing import Dict, Any, Optional, List from copy import deepcopy diff --git a/backend/app/utils/__pycache__/cache_utils.cpython-313.pyc b/backend/app/utils/__pycache__/cache_utils.cpython-313.pyc new file mode 100644 index 00000000..5cccd8a4 Binary files /dev/null and b/backend/app/utils/__pycache__/cache_utils.cpython-313.pyc differ diff --git a/backend/app/utils/cache_utils.py b/backend/app/utils/cache_utils.py new file mode 100644 index 00000000..13dc2958 --- /dev/null +++ b/backend/app/utils/cache_utils.py @@ -0,0 +1,62 @@ +import os +import hashlib +import json +import logging +from typing import Any, Awaitable, Callable, Dict, Optional + +from redis.exceptions import RedisError + +from backend.app.cache.redis_client import redis_client + +logger = logging.getLogger(__name__) + +CACHE_TTL = int(os.getenv("REDIS_CACHE_TTL", 3600)) # Cache time-to-live in seconds (1 hour) + + +def _generate_cache_key(url: str, params: Optional[Dict[str, Any]]) -> str: + """Generates a unique cache key based on the URL and parameters.""" + sorted_params = json.dumps(params, sort_keys=True, default=str) if params else "" + hash_input = f"{url}-{sorted_params}" + return hashlib.sha256(hash_input.encode('utf-8')).hexdigest() + + +async def cache_request( + url: str, + params: Optional[Dict[str, Any]] = None, + external_api_call: Optional[Callable[[], Awaitable[Any]]] = None, + serializer: Callable[[Any], str] = json.dumps, + deserializer: Callable[[str], Any] = json.loads, +) -> Any: + """ + Checks Redis cache before making an external API call. + Stores hashed request keys and responses with TTL values. + Accepts optional `serializer` and `deserializer` callables (defaulting to `json.dumps`/`json.loads`) + to handle complex object types consistently. + If serialization fails, logs the error and skips caching, returning the original response. + """ + cache_key = _generate_cache_key(url, params) + try: + cached_response = redis_client.get_cache(cache_key) + if cached_response: + try: + return deserializer(cached_response) + except json.JSONDecodeError as e: + logger.exception(f"Failed to deserialize cached response for key {cache_key}: {e}") + # If deserialization fails, treat as a cache miss + pass + except RedisError as e: + logger.warning(f"Redis GET error for key {cache_key}: {e}") + + if external_api_call: + response = await external_api_call() + try: + # Attempt to serialize the response before caching + serialized_response = serializer(response) + redis_client.set_cache(cache_key, serialized_response, CACHE_TTL) + except (TypeError, ValueError) as e: + logger.warning(f"Failed to serialize response for caching (key: {cache_key}): {e}. Skipping cache." + ) + except RedisError as e: + logger.warning(f"Redis SETEX error for key {cache_key}: {e}") + return response + return None diff --git a/backend/tests/test_orchestrator_config.py b/backend/tests/test_orchestrator_config.py index 88526767..d1a47cda 100644 --- a/backend/tests/test_orchestrator_config.py +++ b/backend/tests/test_orchestrator_config.py @@ -108,6 +108,8 @@ async def test_create_orchestrator_with_no_urls(): with patch('backend.app.core.orchestrator.orchestrator_logger') as mock_logger: settings.ONCHAIN_METRICS_URL = None orchestrator = create_orchestrator() + agents = orchestrator.get_agents() + assert agents == {} mock_logger.warning.assert_any_call( "Onchain Data Agent will not be registered due to invalid configuration." )