diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..fdd183d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,7 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.0 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format diff --git a/app/core/api-docs.md b/app/core/api-docs.md new file mode 100644 index 0000000..cfc4418 --- /dev/null +++ b/app/core/api-docs.md @@ -0,0 +1,77 @@ +# FluentMeet Core Application Documentation + +> **Package Location:** `/app/core` +> **Purpose:** Houses all fundamental components that are globally shared across the entire application ecosystem, strictly agnostic of specific application models. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Configuration (`config.py`)](#configuration-configpy) +- [Security (`security.py`)](#security-securitypy) +- [Exception Handlers & Responses](#exception-handlers--responses) +- [System Dependencies (`dependencies.py`)](#system-dependencies-dependenciespy) +- [Sanitization (`sanitize.py`)](#sanitization-sanitizepy) + +--- + +## Overview + +The `app/core` package serves as the backbone of the application. It bootstraps application config configurations asynchronously, intercepts exceptions homogeneously, drives system security schemas securely, and houses FastApi `Depends()` routines globally to evade circular imports. + +--- + +## Configuration (`config.py`) + +Leverages `pydantic_settings`. + +### The `Settings` Object +* Extracts natively parameters stored inside `./.env` matching dynamically against types automatically parsing logic. +* Resolves variables for Database URls, JWT Secrets, Redis caches, Kafka bootstrap brokers explicitly, and cloud provider APIs like OpenAI / DL environments seamlessly. +* Forces dynamic fallback loading the PyProject version using `tomllib`. + +--- + +## Security (`security.py`) + +Handles cryptographic payload verification schemas explicitly without accessing Database constructs seamlessly. + +* **Bcrypt Password Context:** `hash_password()` and `verify_password()`. + * Implements a native exception wrapper patching standard deprecated `passlib` behaviors failing aggressively on unmanaged `bcrypt 4.0.0+` versions transparently overriding bounds dynamically. +* **JWT Creation (`encode`):** + * `create_access_token()`: Returns a short-lived token using explicit TTL mappings native to configuration structures (expiring natively in ~60mins). + * `create_refresh_token()`: Returns a long-lived tuple returning the securely allocated JTI identifier logic explicitly mappings directly against settings (e.g., 7 days). + +--- + +## Exception Handlers & Responses + +### Responses (`error_responses.py`) +Standardizes REST API outputs homogenously guaranteeing frontend UI frameworks never fail parsing generic trace responses gracefully. + +* `ErrorDetail`: Nested lists explicitly tracking localized parameter validation triggers dynamically. +* `ErrorResponse`: Unifies status, descriptor `code`, human-readable `message` securely. + +### Handlers (`exception_handlers.py`) +Registered on core startup logic intercepting framework exceptions dynamically. + +* Converts Starlette/FastAPI `RequestValidationError` cleanly into `400` validation constraints structures. +* Binds generic unhandled HTTP 500 stacks dynamically dumping details efficiently via `sanitize_for_log()`. + +### Custom Error Framework (`exceptions.py`) +Developers natively invoke `raise BadRequestException("Missing ID")` mapping gracefully dynamically down to HTTP structures utilizing the Handlers. Allows custom error codes defined seamlessly (e.g. `code="INVALID_OTP"` natively mapped). + +--- + +## System Dependencies (`dependencies.py`) + +Decouples authentication blocks natively allowing models mapping efficiently natively circumventing explicit Circular dependencies seamlessly. + +Provides FastApi injectable logic defining explicit Token/Bearer evaluations transparently parsing JWT variables gracefully extracting explicit target entities locally from the Database dynamically checking `is_active` flags before propagating securely to Endpoint Routers automatically. + +--- + +## Sanitization (`sanitize.py`) + +Intercepts log mechanisms aggressively globally preventing explicit log-spoofing injection vectors smoothly intercepting inputs wrapping string payloads automatically truncating heavy lengths tracking string components securely natively tracking unmanaged inputs across routes dynamically. diff --git a/app/core/config.py b/app/core/config.py index 1085e01..dbd975e 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -1,3 +1,8 @@ +"""Application core configuration module. + +Leverages pydantic_settings to load variables from the `.env` file securely. +""" + import pathlib import tomllib @@ -5,6 +10,11 @@ def get_version() -> str: + """Extract project version from pyproject.toml natively. + + Returns: + str: Version string (e.g. '1.0.0'). + """ pyproject_path = pathlib.Path(__file__).parent.parent.parent / "pyproject.toml" if pyproject_path.exists(): with pyproject_path.open("rb") as f: @@ -14,6 +24,8 @@ def get_version() -> str: class Settings(BaseSettings): + """Core settings payload mapping dynamically to .env files.""" + PROJECT_NAME: str = "FluentMeet" VERSION: str = get_version() API_V1_STR: str = "/api/v1" diff --git a/app/core/dependencies.py b/app/core/dependencies.py index 9ae8b3c..9ecb543 100644 --- a/app/core/dependencies.py +++ b/app/core/dependencies.py @@ -39,18 +39,20 @@ async def get_current_user( ) -> User: """Decode an access-token JWT and return the authenticated user. - Guards - ------ - - Missing token → 401 - - Invalid / expired JWT → 401 - - Blacklisted JTI → 401 - - User not found → 401 - - Account soft-deleted → 403 - - Account deactivated → 403 - - Returns - ------- - The :class:`~app.auth.models.User` ORM instance. + Args: + token (str | None): OAuth2 password bearer token. + Defaults to Depends(oauth2_scheme). + bearer (HTTPAuthorizationCredentials | None): HTTP bearer credentials. + Defaults to Depends(bearer_scheme). + db (Session): Database session. + token_store (TokenStoreService): Redis-backed token store service. + + Raises: + UnauthorizedException: If missing token, invalid JWT, revoked JTI, or not found. + ForbiddenException: If the account is soft-deleted or deactivated. + + Returns: + User: The authenticated User ORM instance. """ # Prefer Bearer token if provided (e.g. from 'HTTP Bearer' field in Swagger) # otherwise fall back to OAuth2 token (from 'Authorize' login form). @@ -125,7 +127,19 @@ async def get_current_user_optional( db: Session = Depends(get_db), token_store: TokenStoreService = Depends(get_token_store_service), ) -> User | None: - """Attempt to decode JWT and return User if present, otherwise return None.""" + """Attempt to decode JWT and return User if present, otherwise return None. + + Args: + token (str | None): OAuth2 password bearer token. + Defaults to Depends(oauth2_scheme). + bearer (HTTPAuthorizationCredentials | None): HTTP bearer credentials. + Defaults to Depends(bearer_scheme). + db (Session): Database session. + token_store (TokenStoreService): Redis-backed token store service. + + Returns: + User | None: The authenticated User ORM instance or None if missing/invalid. + """ try: user = await get_current_user( token=token, bearer=bearer, db=db, token_store=token_store diff --git a/app/core/error_responses.py b/app/core/error_responses.py index 1e56cc5..f5329d9 100644 --- a/app/core/error_responses.py +++ b/app/core/error_responses.py @@ -1,3 +1,8 @@ +"""Standardized API Error Response architectures module. + +Defines Pydantic representations guaranteeing frontend API structures respond homogenously. +""" + from typing import Any from fastapi.responses import JSONResponse @@ -13,7 +18,7 @@ class ErrorResponse(BaseModel): status: str = "error" code: str message: str - details: list[ErrorDetail] = [] + details: list[Any] = [] def create_error_response( @@ -22,20 +27,32 @@ def create_error_response( message: str, details: list[dict[str, Any]] | None = None, ) -> JSONResponse: - """ - Helper to create a standardized JSON error response. + """Helper to create a standardized JSON error response. + + Args: + status_code (int): HTTP status code targeting fastAPI. + code (str): Application specific string error code identifier. + message (str): Human-readable exception descriptor. + details (list[dict[str, Any]] | None): Additional list of error dictionaries + defining problem fields. Defaults to None. + + Returns: + JSONResponse: Standardized FastAPI JSON response strictly bound to ErrorResponse schema. """ error_details = [] if details: for detail in details: - error_details.append( - ErrorDetail( - field=detail.get("field"), - message=detail.get("msg") - or detail.get("message") - or "Unknown error", + if "msg" in detail and "field" in detail: + # Map FastApi Validation Errors explicitly + error_details.append( + { + "field": detail.get("field"), + "message": detail.get("msg") or "Validation error", + } ) - ) + else: + # Preserve standard custom metadata cleanly + error_details.append(detail) response_content = ErrorResponse( status="error", diff --git a/app/core/exception_handlers.py b/app/core/exception_handlers.py index 79546be..16139e6 100644 --- a/app/core/exception_handlers.py +++ b/app/core/exception_handlers.py @@ -1,3 +1,9 @@ +"""Global Application HTTP Exception handlers module. + +Exposes standard handler signatures intercepting Starlette and native Python blocks +returning homogeneous `create_error_response` models dynamically. +""" + import logging from typing import Any @@ -14,8 +20,14 @@ async def fluentmeet_exception_handler(_request: Request, exc: Any) -> JSONResponse: - """ - Handler for all custom FluentMeetException exceptions. + """Handler for all custom FluentMeetException exceptions. + + Args: + _request (Request): Starlette HTTP Request. + exc (Any): Instance derived via `FluentMeetException`. + + Returns: + JSONResponse: An ErrorResponse mapping to `exc.status_code`. """ return create_error_response( status_code=exc.status_code, @@ -26,8 +38,15 @@ async def fluentmeet_exception_handler(_request: Request, exc: Any) -> JSONRespo async def validation_exception_handler(_request: Request, exc: Any) -> JSONResponse: - """ - Handler for Pydantic validation errors (422 -> 400). + """Handler for Pydantic validation errors (422 -> 400). + + Args: + _request (Request): Starlette HTTP Request. + exc (Any): FastApi `RequestValidationError` block. + + Returns: + JSONResponse: HTTP 400 error dynamically defining all Pydantic field + failures natively. """ details = [] for error in exc.errors(): @@ -47,8 +66,14 @@ async def validation_exception_handler(_request: Request, exc: Any) -> JSONRespo async def http_exception_handler(_request: Request, exc: Any) -> JSONResponse: - """ - Handler for Starlette/FastAPI HTTP exceptions. + """Handler for Starlette/FastAPI HTTP exceptions. + + Args: + _request (Request): Starlette HTTP Request. + exc (Any): Catch-all for standard HTTP 4xx overrides block mechanisms. + + Returns: + JSONResponse: A mapped fallback response retaining the `exc.status_code`. """ return create_error_response( status_code=exc.status_code, @@ -60,8 +85,15 @@ async def http_exception_handler(_request: Request, exc: Any) -> JSONResponse: async def unhandled_exception_handler( _request: Request, exc: Exception ) -> JSONResponse: - """ - Handler for all other unhandled exceptions (500). + """Handler for all other unhandled exceptions (500). + + Args: + _request (Request): Starlette HTTP Request. + exc (Exception): Standard fatal Python runtime exception mapping. + + Returns: + JSONResponse: Protected HTTP 500 entity guarding system stacktraces + from external clients statically. """ logger.exception("Unhandled exception occurred: %s", sanitize_for_log(exc)) return create_error_response( @@ -72,8 +104,11 @@ async def unhandled_exception_handler( def register_exception_handlers(app: FastAPI) -> None: - """ - Register all custom exception handlers to the FastAPI app. + """Register all custom exception handlers to the FastAPI app. + + Args: + app (FastAPI): The main application context container natively + targeting startup hooks framework. """ app.add_exception_handler(FluentMeetException, fluentmeet_exception_handler) app.add_exception_handler(RequestValidationError, validation_exception_handler) diff --git a/app/core/exceptions.py b/app/core/exceptions.py index b4a7a08..a9d85ed 100644 --- a/app/core/exceptions.py +++ b/app/core/exceptions.py @@ -1,9 +1,23 @@ +"""Application Base Exceptions module. + +Defines the core `FluentMeetException` structure allowing handlers to easily map +application failures directly to standardized HTTP 400 and 500 entity wrappers natively. +""" + from typing import Any class FluentMeetException(Exception): - """ - Base exception for all FluentMeet API errors. + """Base exception for all FluentMeet API errors. + + Attributes: + status_code (int): Standard HTTP binding natively decoded by handlers. + code (str): Explicit mapped exception code array dynamically returned + to frontend structures. + message (str): Text definition descriptor structure readable + explicitly by users. + details (list[dict[str, Any]]): Internal mappings definition blocks + (useful for validation outputs). """ def __init__( diff --git a/app/core/init_admin.py b/app/core/init_admin.py index 895ddbe..3d39905 100644 --- a/app/core/init_admin.py +++ b/app/core/init_admin.py @@ -1,3 +1,8 @@ +"""Initialization module for default system admin user. + +Triggers an automatic account creation using environment variables. +""" + import logging from sqlalchemy import select @@ -12,6 +17,11 @@ def init_admin(db: Session) -> None: + """Initialize a default admin account on server startup natively. + + Args: + db (Session): Database transaction session. + """ if not settings.ADMIN_EMAIL or not settings.ADMIN_PASSWORD: logger.info( "Admin credentials not fully set in .env, skipping admin initialization." diff --git a/app/core/rate_limiter.py b/app/core/rate_limiter.py index 5c428a9..e739305 100644 --- a/app/core/rate_limiter.py +++ b/app/core/rate_limiter.py @@ -1,3 +1,8 @@ +"""API Route Rate Limiter configuration module. + +Leverages slowapi to configure IP-based throttling across global routes natively. +""" + from fastapi import Request from fastapi.responses import JSONResponse from slowapi import Limiter @@ -13,6 +18,16 @@ async def rate_limit_exception_handler( _request: Request, _exc: RateLimitExceeded, ) -> JSONResponse: + """Handle Rate Limit errors converting them to standardized HTTP 429 schemas. + + Args: + _request (Request): Starlette HTTP request mapping object. + _exc (RateLimitExceeded): Fastapi Limiter exceeded bounds exception + tracking model. + + Returns: + JSONResponse: Standardized HTTP 429 JSONResponse entity. + """ return create_error_response( status_code=429, code="RATE_LIMIT_EXCEEDED", diff --git a/app/core/sanitize.py b/app/core/sanitize.py index c96e260..95144ca 100644 --- a/app/core/sanitize.py +++ b/app/core/sanitize.py @@ -1,3 +1,9 @@ +"""Logging sanitization module. + +Automatically replaces control characters and dynamically truncates values +to prevent log injection. +""" + import re from collections.abc import Iterable diff --git a/app/core/security.py b/app/core/security.py index c0a911a..c481565 100644 --- a/app/core/security.py +++ b/app/core/security.py @@ -42,6 +42,12 @@ def hash_password(self, password: str) -> str: Falls back to raw ``bcrypt`` if passlib's backend probing fails (common with newer bcrypt builds). + + Args: + password (str): Raw string format password. + + Returns: + str: Hashed string variant mapped for database insertion. """ try: return cast(str, self.pwd_context.hash(password)) @@ -54,6 +60,13 @@ def verify_password(self, plain_password: str, hashed_password: str) -> bool: Falls back to raw ``bcrypt.checkpw`` when passlib's backend probing fails (same compatibility issue as :meth:`hash_password`). + + Args: + plain_password (str): Plain text password provided by the user. + hashed_password (str): Hashed password value stored in the database. + + Returns: + bool: True if passwords match, otherwise False. """ try: return bool(self.pwd_context.verify(plain_password, hashed_password)) @@ -77,8 +90,12 @@ def create_access_token( ) -> tuple[str, int]: """Create a short-lived JWT access token. + Args: + email (str): The user's email. + jti (str | None): Optional JWT ID. Defaults to None. + Returns: - A ``(token, expires_in_seconds)`` tuple. + tuple[str, int]: A ``(token, expires_in_seconds)`` tuple. """ jti = jti or str(uuid4()) expires_delta = timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES) @@ -104,8 +121,12 @@ def create_refresh_token( ) -> tuple[str, str, int]: """Create a long-lived JWT refresh token. + Args: + email (str): The user's email. + jti (str | None): Optional JWT ID. Defaults to None. + Returns: - A ``(token, jti, ttl_seconds)`` tuple. + tuple[str, str, int]: A ``(token, jti, ttl_seconds)`` tuple. """ jti = jti or str(uuid4()) ttl_seconds = settings.REFRESH_TOKEN_EXPIRE_DAYS * 86400 diff --git a/app/db/api-docs.md b/app/db/api-docs.md new file mode 100644 index 0000000..e1b2099 --- /dev/null +++ b/app/db/api-docs.md @@ -0,0 +1,59 @@ +# FluentMeet DB Core Documentation + +> **Package Location:** `/app/db` +> **Purpose:** Configures global synchronous SQLAlchemy engines and database dependency generators for FastAPI route bindings. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Public API (`session.py`)](#public-api-sessionpy) +- [Fallback Mechanisms](#fallback-mechanisms) + +--- + +## Overview + +The `app/db` package encapsulates all direct setup hooks binding ORM actions to the backing relational database (PostgreSQL predominantly). It securely controls Engine Lifecycles so connections are aggressively pooled rather than blindly spun up over HTTP requests. + +--- + +## Architecture + +This package adopts a synchronous SQLAlchemy standard methodology natively configuring `sqlalchemy.orm.Session`. Since heavy async code exists largely on the Kafka Real-time periphery (`app/services`) rather than standard user-CRUD actions, maintaining a stable sync DB API avoids extensive threading deadlocks. + +To maximize usability across developers who configure robust `asyncpg` bindings globally via `.env`, the module intentionally coerces engine driver definitions natively upon launch mapping `asyncpg` configurations forcibly into `psycopg` parameters transparently without failing. + +--- + +## Public API (`session.py`) + +### Connection Contexts + +#### `get_engine()` +Acts as a lazy-loaded Singleton accessor fetching the Global cache `_ENGINE_STATE`. +* **Behavior:** Checks the dict. If `None`, triggers `create_engine` appending `pool_pre_ping=True` and binds the global `SessionLocal` macro. +* **Returns:** Returns an actively configured `sqlalchemy.engine.Engine`. + +#### `get_db()` +A standard Python Generator used specifically as a wrapper `Depends(get_db)` inside FastAPI Routers logic. +* **Behavior:** Fetches a local thread-isolated `Session` bounding its lifecycle to a strict `try-finally` context forcing cleanup queries. +* **Yields:** Returning a strict `sqlalchemy.orm.Session` reference resolving automatically explicitly returning connections back to the `Engine` pool upon return. + +### Interceptors + +#### `_coerce_sync_url(url)` +An internal helper bridging application boundaries. +* **Behavior:** Actively intercepts the raw string extracted from `settings.DATABASE_URL`. If standard `+asyncpg` bindings are detected, it rewrites the string dynamically returning `postgresql+psycopg2://...`. +* **Args:** `url` *(str)* +* **Returns:** Mutated valid sync DB *(str)*. + +--- + +## Fallback Mechanisms + +To ensure Continuous Integration structures and simple developer test suites execute natively without manually spinning up Postgres docker containers universally, the DB context falls back elegantly natively inside `get_engine()`. + +If `psycopg` is missing upon a load initialization triggered by an API request (`ModuleNotFoundError` exception thrown during driver bindings), the system suppresses the error logic falling back internally provisioning an ephemeral SQLite database on local paths (`sqlite:///./fluentmeet.db`) allowing raw ORM testing natively bypassing strict configurations. diff --git a/app/db/session.py b/app/db/session.py index 801c603..012852b 100644 --- a/app/db/session.py +++ b/app/db/session.py @@ -1,3 +1,9 @@ +"""Database session configuration module. + +This module provides the central SQLAlchemy `Engine` and `Session` generator +management layer required to interact with relational state securely across the app. +""" + import logging from collections.abc import Generator from typing import Final @@ -19,8 +25,14 @@ def _coerce_sync_url(url: str) -> str: The application uses synchronous SQLAlchemy (``create_engine`` + ``Session``), so the ``asyncpg`` DBAPI - which requires ``create_async_engine`` - will fail at runtime with a - ``MissingGreenlet`` error. This helper silently swaps the driver + ``MissingGreenlet`` error. This helper silently swaps the driver so that the connection string from ``.env`` works out of the box. + + Args: + url (str): The raw database URL parsed from settings. + + Returns: + str: The coerced synchronous-compatible database URL. """ if "+asyncpg" in url: fixed = url.replace("+asyncpg", "+psycopg2") @@ -43,6 +55,15 @@ def _coerce_sync_url(url: str) -> str: def get_engine() -> Engine: + """Instantiate or return the globally cached SQLAlchemy DB Engine. + + Dynamically provisions an Engine utilizing connection pooling and + pre-ping configurations. Will auto-fallback to an SQLite database + string if Postgres python drivers are not present (for CI/Test harnesses). + + Returns: + Engine: The lazily evaluated global SQLAlchemy core Engine. + """ cached_engine = _ENGINE_STATE.get("engine") if cached_engine is None: try: @@ -63,6 +84,14 @@ def get_engine() -> Engine: def get_db() -> Generator[Session, None, None]: + """Provide a transactional DB session for FastAPI dependencies. + + Yields a standard `Session` boundary managed by a `finally` closer logic. + Forces auto-boot of the engine context. + + Yields: + Session: Active database query context session. + """ get_engine() db = SessionLocal() try: diff --git a/app/external_services/cloudinary/api-docs.md b/app/external_services/cloudinary/api-docs.md new file mode 100644 index 0000000..a1960df --- /dev/null +++ b/app/external_services/cloudinary/api-docs.md @@ -0,0 +1,116 @@ +# FluentMeet Cloudinary SDK Documentation + +> **Package Location:** `/app/external_services/cloudinary` +> **Purpose:** Abstracted Cloud Storage service wrapping the official Cloudinary Python SDK. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture & Setup](#architecture--setup) +- [Public Providers API (`service.py`)](#public-providers-api-servicepy) +- [Validation & Constants](#validation--constants) +- [Data Schemas (`schemas.py`)](#data-schemas-schemaspy) +- [Error Handling (`exceptions.py`)](#error-handling-exceptionspy) + +--- + +## Overview + +The `app/external_services/cloudinary` package provides a fully decoupled, strongly-typed layer over the `cloudinary` SDK. It exposes the `StorageService` to handle asynchronous file uploads limit-checks, MIME validations, and secure resource deletions without leaking Cloudinary's specific configuration logic into the rest of the application (e.g., User or Meeting routers). + +--- + +## Architecture & Setup + +### Initialization (`config.py`) + +Configuration runs lazily utilizing an `ensure_configured()` interceptor. +When the `StorageService` is instantiated for the first time by the `get_storage_service()` FastAPI dependency, it reaches out to `config.py` which pulls: + +- `CLOUDINARY_CLOUD_NAME` +- `CLOUDINARY_API_KEY` +- `CLOUDINARY_API_SECRET` + +from the application `settings` and statically configures the `cloudinary.config(secure=True)` global. Tracking the bool state `_configured` prevents redundant config payload calls. + +--- + +## Public Providers API (`service.py`) + +The `StorageService` contains distinct semantic methods. Under the hood, these methods delegate to a private `_upload()` coroutine after rigorously enforcing validations. + +### Upload Methods + +All upload methods require a `FastAPI.UploadFile` and a target `folder`. They natively support providing an optional `public_id` to enforce naming conventions (like using a User UUID for their avatar so it automatically overwrites). + +* `upload_image(...)` + * **Enforces:** Image Mimetypes & Image Size Limit. + * **Features:** Supports passing a dictionary chunk `transformation` (e.g., bounding box cropping, face targeting) to natively crop representations on the CDN before resting. + +* `upload_video(...)` + * **Enforces:** Video Mimetypes & Video Size Limit. + +* `upload_raw(...)` + * **Enforces:** Static Mimetypes (PDFs, ZIPs, txt) and uses Image Size limit. + +### Delete Method + +* `delete_asset(public_id: str, resource_type: str)` + * Provides targeted resource teardown ensuring GDPR erasure compliance on User and Asset destruction. + +--- + +## Validation & Constants + +The package proactively protects the API from malformed or malicious file dumps. +Defined in `constants.py`: + +**MIME Types Allowed:** +* **Images:** `image/jpeg`, `image/png`, `image/webp`, `image/gif`, `image/svg+xml` +* **Videos:** `video/mp4`, `video/webm`, `video/quicktime`, `video/x-msvideo` +* **Static:** `application/pdf`, `application/zip`, `text/plain`, `text/csv` + +**Folder Namespacing:** +Allows environment safety. `FOLDER_AVATARS`, `FOLDER_RECORDINGS`, `FOLDER_UPLOADS`. + +**Size Validations:** +The internal `_validate_file` scans both the incoming HTTP `content_type` header string and calculates the `file.size` threshold dynamically against the respective `.env` limit mappings. + +--- + +## Data Schemas (`schemas.py`) + +To decouple the application router returns from Cloudinary's raw dynamic `dict` responses, outcomes are strictly marshaled via Pydantic: + +### `UploadResult` +```python +{ + "public_id": "fluentmeet/avatars/abx123", + "secure_url": "https://res.cloudinary.com/...", + "resource_type": "image", + "format": "webp", + "bytes": 481023, + "width": 400, + "height": 400 +} +``` + +### `DeleteResult` +```python +{ + "public_id": "fluentmeet/avatars/abx123", + "result": "ok" # or "not found" +} +``` + +--- + +## Error Handling (`exceptions.py`) + +The service raises context-specific HTTP exceptions inheriting appropriately from Base classes so FastAPI naturally constructs HTTP 400s or 500s: + +* **`FileValidationError`** (400 Bad Request): Thrown synchronously when MIME or Megabyte limits are exceeded before a network call is made. +* **`StorageUploadError`** (500 Internal Error): Thrown if the Cloudinary API rejects the data packet. +* **`StorageDeleteError`** (500 Internal Error): Thrown if an explicit API delete fails fatally. diff --git a/app/external_services/deepgram/api-docs.md b/app/external_services/deepgram/api-docs.md new file mode 100644 index 0000000..6b64244 --- /dev/null +++ b/app/external_services/deepgram/api-docs.md @@ -0,0 +1,71 @@ +# FluentMeet Deepgram Integration Documentation + +> **Package Location:** `/app/external_services/deepgram` +> **Purpose:** Handles external asynchronous integrations with the Deepgram Speech-to-Text API. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Public API](#public-api) +- [Configuration](#configuration) + +--- + +## Overview + +The `app/external_services/deepgram` package wraps the Deepgram REST `/v1/listen` endpoint natively enabling extremely fast conversion of `bytes` objects into text Strings. + +It is designed to be fully stateless and heavily depends on FastAPI standard dependencies & `httpx.AsyncClient` objects rather than installing Deepgram's heavy Python SDK, preserving application footprint and avoiding dependency bloat. + +--- + +## Architecture + +This package exposes a single class `DeepgramSTTService` bound as a Singleton. +It is actively injected and utilized globally by the `STTWorker` consumer daemon listening to Kafka `audio.raw`. + +### Execution Flow +1. Receives base64-decoded PCM strings. +2. Injects required API metadata mapping to settings boundaries. +3. Fires the `POST` request out asynchronously to the web REST Endpoint returning results. + +--- + +## Public API + +### `DeepgramSTTService` (`service.py`) + +A fully typed async service wrapping the REST endpoint. + +#### `transcribe(audio_bytes, language, sample_rate, encoding)` +Sends a block of data to Deepgram to fetch an interpretation. +* **Args:** + * `audio_bytes` *(bytes)*: Standard PCM binary string or OPUS stream bytes. + * `language` *(str)*: A localized ISO 639-1 code hint (e.g., `"en"`). + * `sample_rate` *(int)*: Standard `16000` (Hz). + * `encoding` *(str)*: Tells Deepgram the format (`"linear16"` or `"opus"`). +* **Returns:** + Returns a unified `dict` payload structure standard against multiple engines: + ```python + { + "text": "Hello world", + "confidence": 0.99, + "detected_language": "en", + "latency_ms": 32.5 + } + ``` +* **Exception Behavior:** Raises `httpx.HTTPStatusError` aggressively when anything other than an HTTP 2xx code is returned to enforce fallback failure and Dead-Letter-Queue routing in the caller blocks. + +--- + +## Configuration + +### `get_deepgram_headers()` (`config.py`) + +Ensures the authentication mechanisms are mapped securely from environment definitions. + +* Builds the dict mapping `Authorization: Token ` +* Fails fast natively issuing `RuntimeError` on startup if `DEEPGRAM_API_KEY` is completely missing from `.env` or Server Environment. diff --git a/app/external_services/deepgram/config.py b/app/external_services/deepgram/config.py index 82b3e5c..10d56a6 100644 --- a/app/external_services/deepgram/config.py +++ b/app/external_services/deepgram/config.py @@ -1,10 +1,22 @@ -"""Deepgram provider configuration.""" +"""Deepgram provider configuration module. + +Handles the secure retrieval of authentication headers required for the +Deepgram Speech-to-Text API. +""" from app.core.config import settings def get_deepgram_headers() -> dict[str, str]: - """Return authorization headers for the Deepgram REST API.""" + """Return authorization headers for the Deepgram REST API. + + Returns: + dict[str, str]: A dictionary containing the Authorization and + Content-Type headers mapping to the environment API key. + + Raises: + RuntimeError: If DEEPGRAM_API_KEY is not configured in the environment. + """ if not settings.DEEPGRAM_API_KEY: raise RuntimeError("DEEPGRAM_API_KEY is not configured.") return { diff --git a/app/external_services/deepgram/service.py b/app/external_services/deepgram/service.py index 8ba6a82..bbcca18 100644 --- a/app/external_services/deepgram/service.py +++ b/app/external_services/deepgram/service.py @@ -1,4 +1,4 @@ -"""Deepgram Speech-to-Text service. +"""Deepgram Speech-to-Text service module. Wraps the Deepgram REST API (/v1/listen) for pre-recorded audio transcription. Each call sends a single audio chunk and returns @@ -17,7 +17,15 @@ class DeepgramSTTService: - """Stateless service for converting audio bytes to text via Deepgram.""" + """Stateless service for converting audio bytes to text via Deepgram. + + Provides a centralized client to execute audio transcription calls against + the Deepgram API using httpx async clients. It isolates the HTTP retry + timeout and API authentication headers injection from the rest of the app. + + Attributes: + _timeout (float): Max timeout for HTTP requests mapping to Deepgram. + """ def __init__(self, timeout: float = 10.0) -> None: self._timeout = timeout diff --git a/app/external_services/deepl/api-docs.md b/app/external_services/deepl/api-docs.md new file mode 100644 index 0000000..813c63a --- /dev/null +++ b/app/external_services/deepl/api-docs.md @@ -0,0 +1,71 @@ +# FluentMeet DeepL & LLM Translation Documentation + +> **Package Location:** `/app/external_services/deepl` +> **Purpose:** Handles external asynchronous integrations with the DeepL `/v2/translate` API and provides OpenAI LLM algorithmic fallbacks. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Public API (`service.py`)](#public-api-servicepy) +- [Fallback Mechanisms](#fallback-mechanisms) +- [Language Code Mapping](#language-code-mapping) + +--- + +## Overview + +The `app/external_services/deepl` package acts as the active backend for stage 3 of the real-time audio pipeline. It intercepts STT transcriptions and converts them dynamically into alternate target languages required by individual users in the meeting lobby. + +--- + +## Architecture + +To remain fully stateless and incredibly lightweight without depending on strict external third-party SDKs, the translation engines fire purely via `httpx.AsyncClient` objects wrapping the provider APIs. + +### Services Exposed +1. **`DeepLTranslationService`**: The primary translation engine pointing at `api-free.deepl.com`. +2. **`OpenAITranslationFallback`**: A secondary translation engine pivoting to `gpt-4o-mini` capable of interpreting unsupported dialects or surviving a DeepL service outage. + +These are injected globally by the `TranslationWorker` daemon in the `app/services` directory. + +--- + +## Public API (`service.py`) + +Both Translation services export an identical `translate()` asynchronous signature allowing polymorphing swapping on error conditions. + +#### `translate(text, source_language, target_language)` +* **Args:** + * `text` *(str)*: The text buffer requiring translation. + * `source_language` *(str)*: ISO 639-1 Hint language tag (e.g., `"fr"`). + * `target_language` *(str)*: Target localized ISO 639-1 tag constraint. +* **Returns:** + Returns a unified `dict` payload structure standard against multiple engines: + ```python + { + "translated_text": "Bonjour le monde", + "latency_ms": 115.5 + } + ``` +* **Exception Behavior:** Both primary engines explicitly raise `httpx.HTTPStatusError` aggressively so the `TranslationWorker` pipeline code can manage failures explicitly or execute immediate fallbacks. + +--- + +## Fallback Mechanisms + +DeepL is phenomenally fast, but supports a relatively narrow funnel of active language mappings. + +Inside the logic, before spinning up an HTTP context, the DeepL mapping is checked via `supports_language()`. If this yields `False`, or if a 500 API exception cascades back from DeepL, the system instantly catches the logic and bounces the payload securely to `OpenAITranslationFallback`. + +The fallback prompts OpenAI using a zero-shot strictly confined chat string: `"You are a professional translator. Translate the following text from {source} to {target}. Return ONLY the translated text, nothing else."` + +--- + +## Language Code Mapping + +DeepL requires esoteric capitalization modifications (e.g. `EN-US` instead of `en`, `PT-BR` instead of `pt`) which breaks pipeline standards. + +The service defines a private internal mapping table `_DEEPL_LANG_MAP` that captures the front-end user `en`, `de`, `fr` lowercase configurations and dynamically adapts them to DeepL formatting on ingress, reverting answers gracefully back natively before the function returns them up into Kafka topics. diff --git a/app/external_services/deepl/config.py b/app/external_services/deepl/config.py index 16b4067..3a22d24 100644 --- a/app/external_services/deepl/config.py +++ b/app/external_services/deepl/config.py @@ -1,10 +1,22 @@ -"""DeepL provider configuration.""" +"""DeepL provider configuration module. + +Handles the secure retrieval of authentication headers required for the +DeepL mapping algorithms via the REST API. +""" from app.core.config import settings def get_deepl_headers() -> dict[str, str]: - """Return authorization headers for the DeepL REST API.""" + """Return authorization headers for the DeepL REST API. + + Returns: + dict[str, str]: A dictionary containing the standard Authorization + and Content-Type parameters required by the DeepL endpoint. + + Raises: + RuntimeError: If DEEPL_API_KEY is missing from the environment. + """ if not settings.DEEPL_API_KEY: raise RuntimeError("DEEPL_API_KEY is not configured.") return { diff --git a/app/external_services/deepl/service.py b/app/external_services/deepl/service.py index 997eb98..22466b9 100644 --- a/app/external_services/deepl/service.py +++ b/app/external_services/deepl/service.py @@ -1,4 +1,4 @@ -"""DeepL Translation service. +"""DeepL Translation service module. Wraps the DeepL REST API (/v2/translate) for text translation. Falls back to OpenAI GPT-4o-mini when DeepL is unavailable or @@ -44,7 +44,14 @@ class DeepLTranslationService: - """Stateless service for translating text via DeepL.""" + """Stateless service for translating text via DeepL. + + Provides an asynchronous native wrapper mapping to the REST API, + translating strings to strict target formats. + + Attributes: + _timeout (float): Max timeout for HTTP requests mapping to DeepL. + """ def __init__(self, timeout: float = 10.0) -> None: self._timeout = timeout @@ -103,12 +110,24 @@ async def translate( } def supports_language(self, language_code: str) -> bool: - """Check if DeepL supports a given target language.""" + """Check if DeepL supports a given target language. + + Args: + language_code (str): The ISO 639-1 language string to verify. + + Returns: + bool: True if the language is supported natively by DeepL, + otherwise False. + """ return language_code.lower() in _DEEPL_LANG_MAP class OpenAITranslationFallback: - """Fallback translation via OpenAI GPT-4o-mini for unsupported DeepL pairs.""" + """Fallback translation via OpenAI GPT-4o-mini for unsupported DeepL pairs. + + Attributes: + _timeout (float): Max HTTP timeout allowance for the fallback connection. + """ def __init__(self, timeout: float = 15.0) -> None: self._timeout = timeout diff --git a/app/external_services/mailgun/__init__.py b/app/external_services/mailgun/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/app/external_services/openai_tts/api-docs.md b/app/external_services/openai_tts/api-docs.md new file mode 100644 index 0000000..a976f6c --- /dev/null +++ b/app/external_services/openai_tts/api-docs.md @@ -0,0 +1,73 @@ +# FluentMeet OpenAI TTS Documentation + +> **Package Location:** `/app/external_services/openai_tts` +> **Purpose:** Handles external asynchronous integrations with the OpenAI Text-to-Speech API. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Public API (`service.py`)](#public-api-servicepy) +- [Audio Formats](#audio-formats) +- [Configuration](#configuration) + +--- + +## Overview + +The `app/external_services/openai_tts` package acts as the active backend for stage 4 of the real-time audio pipeline. It intercepts translated text streams and synthesizes them into dynamic real-time human voices using OpenAI's `tts-1` model via the `/v1/audio/speech` endpoints. + +--- + +## Architecture + +To minimize dependencies and footprint, avoiding heavy pip installments, the `OpenAITTSService` entirely abstracts OpenAI SDK endpoints via raw `httpx.AsyncClient` objects natively. + +It is designed as a pure stateless singleton and gets injected dynamically into the `TTSWorker` Daemon inside `app/services` based on the `.env` file configuration setting dictating whether Voice.ai or OpenAI drives speech synthesis. + +--- + +## Public API (`service.py`) + +### `OpenAITTSService` + +The fully asynchronous service layer encapsulating synthesis logic. + +#### `synthesize(text, voice, encoding)` +Executes the API request to retrieve the generated Audio chunk. +* **Args:** + * `text` *(str)*: Target string text block to convert to voice. + * `voice` *(str, optional)*: OpenAI voice profile (`alloy`, `echo`, `fable`, `onyx`, `nova`, `shimmer`). Overrides environmental mapping defaults. + * `encoding` *(str)*: Required chunk encoding mapping (`"linear16"` or `"opus"`). +* **Returns:** + Returns a strict `dict` containing the binary footprint needed to transmit over Kafka natively. + ```python + { + "audio_bytes": b"\\x01\\x00\\xFF...", + "sample_rate": 24000, + "latency_ms": 284.1 + } + ``` +* **Exception Behavior:** Immediately raises `httpx.HTTPStatusError` on non-200 responses to enforce the system-wide Dead Letter Queue routing schema via the `Exceptions` trapping mechanism. + +--- + +## Audio Formats + +Native AI API endpoints refer to raw data by highly specific designations natively (e.g. `pcm` instead of `linear16`). The underlying module natively provides the dictionary `_FORMAT_MAP` routing internal definitions like `"linear16"` directly to `"pcm"` in the OpenAPI REST schemas. + +*Note: OpenAI inherently resolves standard `pcm` packets natively to a 24kHZ mono output footprint, distinct from STT endpoints expecting 16kHz standard configurations.* + +--- + +## Configuration + +### `get_openai_tts_headers()` (`config.py`) + +Generates strict formatting API headers. + +* Builds the JSON dict mapping: `Authorization: Bearer ` natively. +* Enforces `Content-Type: application/json`. +* Acts as an architecture boundary: automatically throws a `RuntimeError` failure natively on instantiation if the application failed to boot with `OPENAI_API_KEY` defined. diff --git a/app/external_services/openai_tts/config.py b/app/external_services/openai_tts/config.py index 490932b..7135d82 100644 --- a/app/external_services/openai_tts/config.py +++ b/app/external_services/openai_tts/config.py @@ -1,10 +1,22 @@ -"""OpenAI TTS provider configuration.""" +"""OpenAI TTS provider configuration module. + +Handles the secure retrieval of authentication headers required for the +OpenAI Text-to-Speech API. +""" from app.core.config import settings def get_openai_tts_headers() -> dict[str, str]: - """Return authorization headers for the OpenAI TTS API.""" + """Return authorization headers for the OpenAI TTS API. + + Returns: + dict[str, str]: A dictionary containing the standard Authorization + and Content-Type parameters mapping to the environment API key. + + Raises: + RuntimeError: If OPENAI_API_KEY is not configured in the environment. + """ if not settings.OPENAI_API_KEY: raise RuntimeError("OPENAI_API_KEY is not configured.") return { diff --git a/app/external_services/openai_tts/service.py b/app/external_services/openai_tts/service.py index 3897cf3..9360dc3 100644 --- a/app/external_services/openai_tts/service.py +++ b/app/external_services/openai_tts/service.py @@ -1,4 +1,4 @@ -"""OpenAI Text-to-Speech service. +"""OpenAI Text-to-Speech service module. Wraps the OpenAI TTS API (/v1/audio/speech) to convert translated text into synthesized audio bytes. Returns raw audio in the configured format. @@ -22,7 +22,14 @@ class OpenAITTSService: - """Stateless service for converting text to speech via OpenAI.""" + """Stateless service for converting text to speech via OpenAI. + + Provides an asynchronous native wrapper mapping to the REST API, + translating localized strings into binary audio representations. + + Attributes: + _timeout (float): Max timeout for HTTP requests mapping to OpenAI. + """ def __init__(self, timeout: float = 15.0) -> None: self._timeout = timeout @@ -37,12 +44,13 @@ async def synthesize( """Convert text to audio bytes via OpenAI TTS. Args: - text: The text to synthesize. - voice: OpenAI voice ID (alloy, echo, fable, onyx, nova, shimmer). - encoding: Output encoding (``linear16`` or ``opus``). + text (str): The text to synthesize. + voice (str | None): OpenAI voice ID (alloy, echo, fable, onyx, nova, shimmer). Defaults to None. + encoding (str): Output encoding (``linear16`` or ``opus``). Defaults to "linear16". Returns: - A dict with ``audio_bytes``, ``sample_rate``, ``latency_ms``. + dict: A dictionary containing ``audio_bytes``, ``sample_rate``, + and ``latency_ms``. Raises: httpx.HTTPStatusError: On non-2xx responses from OpenAI. diff --git a/app/external_services/voiceai/api-docs.md b/app/external_services/voiceai/api-docs.md new file mode 100644 index 0000000..d23a9b9 --- /dev/null +++ b/app/external_services/voiceai/api-docs.md @@ -0,0 +1,74 @@ +# FluentMeet Voice.ai Integration Documentation + +> **Package Location:** `/app/external_services/voiceai` +> **Purpose:** Handles external asynchronous integrations with the Voice.ai Text-to-Speech Generation API. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Public API (`service.py`)](#public-api-servicepy) +- [Format & Model Targeting](#format--model-targeting) +- [Configuration](#configuration) + +--- + +## Overview + +The `app/external_services/voiceai` package acts as the active backend for stage 4 of the real-time audio pipeline. It intercepts translated text streams and synthesizes them into dynamic real-time human voices using the Voice.ai `/api/v1/tts/speech` endpoints. Note that this package runs dynamically as an alternative to OpenAI depending on standard environment configurations (`ACTIVE_TTS_PROVIDER="voiceai"`). + +--- + +## Architecture + +This service acts identically to the OpenAI SDK. To maintain tight coupling with core architectures, ignoring bulk Python packages, it resolves all remote calls using `httpx.AsyncClient` blocks statelessly. + +The configuration relies on environment variables, pulling `VOICEAI_TTS_MODEL` and configuring payload definitions instantly per-request. + +--- + +## Public API (`service.py`) + +### `VoiceAITTSService` + +The fully asynchronous service layer encapsulated via Singleton pattern mapping logic to `/tts/speech`. + +#### `synthesize(text, language, voice_id, encoding)` +Initiates asynchronous remote calls to stream speech endpoints. +* **Args:** + * `text` *(str)*: Target string text block mapped to conversion. + * `language` *(str)*: Native mapping used specifically by Voice.ai context engines (e.g., swapping to multilingual vs english default models automatically). + * `voice_id` *(str, optional)*: An explicit ID tag generated via Voice.ai console for custom cloned models. Defaults to default models if None. + * `encoding` *(str)*: Encoding request (`"linear16"` or `"opus"`). +* **Returns:** + Returns a unified `dict` format identical to OpenAI payload structures, guaranteeing seamless swapping inside caller DAEMONS without syntax rewrites. + ```python + { + "audio_bytes": b"\\x01\\x00\\xFF...", + "sample_rate": 16000, + "latency_ms": 352.1 + } + ``` +* **Exception Behavior:** Immediately traps non-200 configurations routing `httpx.HTTPStatusError` directly to Kafka Retry protocols. + +--- + +## Format & Model Targeting + +Voice.ai resolves API properties inherently different from standard TTS parameters: + +* **Format Resolutions (`_FORMAT_MAP`):** Internal definitions `"linear16"` correctly route towards `"pcm_16000"` parameter arrays. Internal definitions `"opus"` target `"opus_48000_64"`. This directly influences returned `sample_rate` logic dynamically (switching from 16kHz to 48kHz automatically). +* **Model Adjustments:** Voice.ai tracks multiple models explicitly. If `VOICEAI_TTS_MODEL` is set to `"multilingual-something"`, but the detected/passed `language` is purely `"en"`, the `_synthesize` module inherently edits the parameter dictionary replacing `.replace("multilingual-", "")` resolving natively to a faster specialized english model automatically. + +--- + +## Configuration + +### `get_voiceai_headers()` (`config.py`) + +Generates strict formatting API headers. + +* Builds the JSON dict mapping: `Authorization: Bearer ` natively. +* Acts as an architecture boundary triggering explicit `RuntimeError` failure on initialization if `VOICE_AI_API_KEY` isn't accessible in server scope. diff --git a/app/external_services/voiceai/config.py b/app/external_services/voiceai/config.py index 4ccd079..2fa9d64 100644 --- a/app/external_services/voiceai/config.py +++ b/app/external_services/voiceai/config.py @@ -1,10 +1,22 @@ -"""Voice.ai TTS provider configuration.""" +"""Voice.ai TTS provider configuration module. + +Handles the secure retrieval of authentication headers required for the +Voice.ai Text-to-Speech API. +""" from app.core.config import settings def get_voiceai_headers() -> dict[str, str]: - """Return authorization headers for the Voice.ai TTS API.""" + """Return authorization headers for the Voice.ai TTS API. + + Returns: + dict[str, str]: A dictionary containing the standard Authorization + and Content-Type parameters mapping to the environment API key. + + Raises: + RuntimeError: If VOICE_AI_API_KEY is not configured in the environment. + """ if not settings.VOICE_AI_API_KEY: raise RuntimeError("VOICE_AI_API_KEY is not configured.") return { diff --git a/app/external_services/voiceai/service.py b/app/external_services/voiceai/service.py index cf51452..6b2492e 100644 --- a/app/external_services/voiceai/service.py +++ b/app/external_services/voiceai/service.py @@ -1,4 +1,4 @@ -"""Voice.ai Text-to-Speech service. +"""Voice.ai Text-to-Speech service module. Wraps the Voice.ai TTS API (POST /api/v1/tts/speech) to convert translated text into synthesized audio. Supports multilingual voices, PCM/Opus output, @@ -25,7 +25,14 @@ class VoiceAITTSService: - """Stateless service for converting text to speech via Voice.ai.""" + """Stateless service for converting text to speech via Voice.ai. + + Provides an asynchronous native wrapper mapping to the REST API, + translating localized strings into binary audio representations. + + Attributes: + _timeout (float): Max timeout for HTTP requests mapping to Voice.ai. + """ def __init__(self, timeout: float = 60.0) -> None: self._timeout = timeout @@ -41,13 +48,14 @@ async def synthesize( """Convert text to audio bytes via Voice.ai TTS. Args: - text: The text to synthesize. - language: ISO 639-1 language code for voice selection. - voice_id: Optional Voice.ai voice ID. Uses default if omitted. - encoding: Output encoding (``linear16`` or ``opus``). + text (str): The text to synthesize. + language (str): ISO 639-1 language code for voice selection. Defaults to "en". + voice_id (str | None): Optional Voice.ai voice ID. Uses default if omitted. + encoding (str): Output encoding (``linear16`` or ``opus``). Defaults to "linear16". Returns: - A dict with ``audio_bytes``, ``sample_rate``, ``latency_ms``. + dict: A dictionary containing ``audio_bytes``, ``sample_rate``, + and ``latency_ms``. Raises: httpx.HTTPStatusError: On non-2xx responses from Voice.ai. diff --git a/app/kafka/api-docs.md b/app/kafka/api-docs.md new file mode 100644 index 0000000..6d5919f --- /dev/null +++ b/app/kafka/api-docs.md @@ -0,0 +1,122 @@ +# FluentMeet Kafka Architecture Documentation + +> **Package Location:** `/app/kafka` +> **Purpose:** Event-driven architecture infrastructure, abstracting AIOKafka underlying intricacies. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture & Lifecycle](#architecture--lifecycle) +- [Topic Registry](#topic-registry) +- [Producers & Consumers](#producers--consumers) + - [KafkaProducer (`producer.py`)](#kafkaproducer-producerpy) + - [BaseConsumer (`consumer.py`)](#baseconsumer-consumerpy) +- [Dead Letter Queues (DLQ) & Retries](#dead-letter-queues-dlq--retries) +- [Event Schemas (`schemas.py`)](#event-schemas-schemaspy) +- [Error Handling (`exceptions.py`)](#error-handling-exceptionspy) + +--- + +## Overview + +The `app/kafka` package provides a high-level, strongly-typed asynchronous wrapper over `aiokafka`. +It entirely hides the serialization mechanisms from feature-level developers and implements hardened stability patterns out-of-the-box including Singleton Lifecycle Management, Automatic Topic Provisioning, Manual Offset Commits, Linear Retry Backoffs, and automatic Dead Letter Queue (DLQ) routing. + +--- + +## Architecture & Lifecycle + +The package revolves around the `KafkaManager` (`manager.py`). +This is a strictly controlled Singleton bound to the FastAPI application lifespan (typically started inside `app/main.py @asynccontextmanager`). + +**Lifecycle sequence:** +1. **Instantiate:** `get_kafka_manager()` creates the `KafkaProducer` and registers instances of `BaseConsumer` (e.g., `EmailConsumerWorker`, `STTWorker`). +2. **Start (`manager.start()`):** + - Automatically provisions missing Kafka topics defined in `topics.py` using `AIOKafkaAdminClient`. + - Starts the global `KafkaProducer`. + - Starts background `asyncio.Task` loops for each registered `BaseConsumer`. +3. **Run:** The application accepts requests, firing items into the Producer, and Consumers eagerly rip items from the broker. +4. **Shutdown (`manager.stop()`):** + - Gently cancels and awaits all consumer `asyncio.Task` loops. + - Cleans up and stops the producer. + +--- + +## Topic Registry + +Defined in `topics.py`. All standard strings are prefixed or namespaced by domain. The manager auto-creates these alongside their mirror `dlq.` prefixes. + +| Topic Constant | String | Purpose | +|---|---|---| +| `NOTIFICATIONS_EMAIL` | `notifications.email` | Dispatch queue for Jinja2 rendered SMTP emails via Mailgun. | +| `AUDIO_RAW` | `audio.raw` | Stage 1 WebSocket base64 binary PCM streams. | +| `TEXT_ORIGINAL` | `text.original` | Stage 2 original STT transcription strings. | +| `TEXT_TRANSLATED` | `text.translated` | Stage 3 Multi-casted translation blocks. | +| `AUDIO_SYNTHESIZED` | `audio.synthesized` | Stage 4 TTS returning audio binary blocks for egress. | + +*(Media upload topics like `media.upload` are registered but currently inactive awaiting feature expansions).* + +--- + +## Producers & Consumers + +### KafkaProducer (`producer.py`) + +A clean abstraction over `AIOKafkaProducer`. +- **Serialization:** Forces all payloads through `json.dumps` natively. Requires developers to pass Pydantic `BaseEvent` models. +- **Methods:** `send(topic, event, key)` and a `.ping()` health-check tool to verify broker connectivity via forcing metadata refreshes. + +### BaseConsumer (`consumer.py`) + +An `abc.ABC` parent class that all worker daemons (like `STTWorker`) must inherit from. +Subclasses implement a single asynchronous method: `async def handle(self, event: BaseEvent) -> None`. + +**Built-In Resiliency Features:** +1. **Manual Commits:** By default, disables `auto_commit`. An offset block is *only* marked as processed on the broker if the `.handle()` function exits flawlessly. A pod crash mid-process guarantees message re-delivery. +2. **Typed Context:** Automatically intercepts incoming `bytes`, unpacks the JSON, and leverages the subclass's declared `event_schema` to build and validate a Pydantic object before passing it inside `.handle()`. + +--- + +## Dead Letter Queues (DLQ) & Retries + +If `.handle()` throws an Exception, the BaseConsumer automatically traps it and triggers the **Retry Matrix**. + +1. **Linear Backoff:** Uses `settings.KAFKA_MAX_RETRIES` (default 3) and `settings.KAFKA_RETRY_BACKOFF_MS`. A failed event sleeps its asynchronous task scaling linearly (e.g., attempt 1 sleeps 1s, attempt 2 sleeps 2s). +2. **DLQ Routing:** If the max retries are exhausted, the event is permanently considered unrecoverable (poison pill). +3. Instead of stalling the Kafka partition, the Consumer packages the original failed payload + integer retry counters + text exception names into a rigid **`DLQEvent`** schema. +4. It commands the *Producer* to fling this DLQ object into `dlq.{original_topic}` (e.g., `dlq.notifications.email`). +5. The offset is *then* committed, allowing the partition to move forward. + +--- + +## Event Schemas (`schemas.py`) + +All objects traversing the Kafka broker must inherit from `BaseEvent[T]`. + +**`BaseEvent` Wrapper:** +Every payload gets an automatic unique UUID `event_id` and an ISO UTC `timestamp`. This is crucial for tracking events across distributed tracing platforms. + +**`DLQEvent`:** +```json +{ + "original_event_id": "uuid", + "original_topic": "notifications.email", + "original_event": { ... payload blob ... }, + "error_message": "TransientEmailDeliveryError: Mailgun 500", + "failed_at": "datetime", + "retry_count": 3 +} +``` + +*(Note: The high-speed pipeline payloads are located centrally in `/app/schemas/pipeline.py` rather than here, separating abstract infrastructure schemas from heavy feature logic).* + +--- + +## Error Handling (`exceptions.py`) + +Extends the core `FluentMeetException` allowing HTTP frameworks or health checks to parse standard Error Codes. +- `KafkaConnectionError` +- `KafkaPublishError` +- `KafkaConsumeError` diff --git a/app/kafka/consumer.py b/app/kafka/consumer.py index 7efd328..d0eac21 100644 --- a/app/kafka/consumer.py +++ b/app/kafka/consumer.py @@ -1,3 +1,10 @@ +"""Kafka Consumer module. + +Provides the `BaseConsumer` abstract class containing the core logic for +safely consuming messages from Kafka broker topics, executing linear +backoff retries, and forwarding poison pills to dead-letter queues. +""" + import abc import asyncio import contextlib @@ -16,19 +23,17 @@ class BaseConsumer(abc.ABC): - """ - Abstract base class for all Kafka consumers. - - Subclasses must declare class-level attributes: - topic: str — the Kafka topic to subscribe to - group_id: str — the consumer group identifier - event_schema: Type — the Pydantic BaseEvent subclass for deserialization - - Features: - - Manual offset commits (offsets only committed after successful handle()) - - Configurable linear backoff retry with KAFKA_MAX_RETRIES - - Dead-letter queue (DLQ) forwarding via a proper DLQEvent wrapper - - Graceful shutdown via asyncio.Task cancellation + """Abstract base class for all Kafka consumers. + + This class enforces a standard structure for all Kafka consumer + workers. It handles the underlying asynchronous consumer loop, + manual offset committing, linear retry backoffs, and dead-letter + queue (DLQ) propagation. + + Attributes: + topic: The Kafka topic to subscribe to. + group_id: The consumer group identifier. + event_schema: The Pydantic BaseEvent subclass for deserialization. """ topic: str diff --git a/app/kafka/exceptions.py b/app/kafka/exceptions.py index 23a9ee5..57fdca6 100644 --- a/app/kafka/exceptions.py +++ b/app/kafka/exceptions.py @@ -1,3 +1,9 @@ +"""Kafka specific exceptions. + +Defines custom exceptions thrown by the Kafka wrapper components for +connection, publishing, and consumption errors. +""" + from app.core.exceptions import FluentMeetException diff --git a/app/kafka/manager.py b/app/kafka/manager.py index 67c0f6d..bad7fec 100644 --- a/app/kafka/manager.py +++ b/app/kafka/manager.py @@ -1,3 +1,10 @@ +"""Kafka Manager module. + +This module provides the central `KafkaManager` singleton responsible +for orchestrating the lifecycles of all producers, consumers, and topics +during the FastAPI framework startup and shutdown events. +""" + import logging from typing import Optional @@ -11,10 +18,13 @@ class KafkaManager: - """ - Singleton manager responsible for Kafka producer and consumer lifecycles. + """Singleton manager responsible for Kafka lifecycle. + + This manager provisions required topics, initializes the global + Kafka producer, and starts the asynchronous tasks for all registered + consumers. - Usage: + Example: manager = get_kafka_manager() manager.register_consumer(MyEmailConsumer()) await manager.start() # called from FastAPI lifespan diff --git a/app/kafka/producer.py b/app/kafka/producer.py index 9b49543..746daab 100644 --- a/app/kafka/producer.py +++ b/app/kafka/producer.py @@ -1,3 +1,9 @@ +"""Kafka Producer module. + +This module provides a wrapper around AIOKafkaProducer to handle robust +asynchronous message publishing and automatic schema serialization. +""" + import json import logging from typing import Any @@ -13,8 +19,10 @@ class KafkaProducer: - """ - Wrapper around AIOKafkaProducer with Pydantic serialization. + """Wrapper around AIOKafkaProducer with Pydantic serialization. + + Provides high-level methods to serialize and publish `BaseEvent` + payloads directly into Kafka topics. """ def __init__(self, bootstrap_servers: str): diff --git a/app/kafka/schemas.py b/app/kafka/schemas.py index 52f9c28..12e19fe 100644 --- a/app/kafka/schemas.py +++ b/app/kafka/schemas.py @@ -1,3 +1,9 @@ +"""Kafka event schemas. + +Defines the Pydantic data transfer objects used for serializing and +deserializing event payloads over the Kafka broker. +""" + import uuid from datetime import UTC, datetime from typing import Any, Generic, TypeVar diff --git a/app/kafka/topics.py b/app/kafka/topics.py index 137ee83..528657f 100644 --- a/app/kafka/topics.py +++ b/app/kafka/topics.py @@ -1,3 +1,9 @@ +"""Kafka topic constants. + +This module defines the topic strings used across the application for +producers and consumers. +""" + from typing import Final # Email notification topics diff --git a/app/main.py b/app/main.py index 4a43916..d8c3110 100644 --- a/app/main.py +++ b/app/main.py @@ -12,7 +12,7 @@ from app.core.init_admin import init_admin from app.core.rate_limiter import limiter, rate_limit_exception_handler from app.core.sanitize import sanitize_for_log -from app.db.session import SessionLocal +from app.db.session import SessionLocal, get_engine from app.kafka.manager import get_kafka_manager from app.routers import api_router @@ -33,6 +33,8 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]: # Initialize Admin try: + # Ensure the engine is initialized and SessionLocal is bound + get_engine() with SessionLocal() as db_session: init_admin(db_session) except Exception as exc: diff --git a/app/models/api-docs.md b/app/models/api-docs.md new file mode 100644 index 0000000..a340ab7 --- /dev/null +++ b/app/models/api-docs.md @@ -0,0 +1,84 @@ +# FluentMeet Models Documentation + +> **Package Location:** `/app/models` +> **Purpose:** Centralized SQLAlchemy Declarative Base and Alembic Schema Aggregation. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Components](#components) + - [The Declarative Base (`base.py`)](#the-declarative-base-basepy) + - [Model Aggregation (`__init__.py`)](#model-aggregation-__init__py) + +--- + +## Overview + +The `app/models` package serves as the foundational data layer configuration for the FluentMeet application. Unlike older monolithic architectures that place all SQLAlchemy models in a single `models.py` file, FluentMeet uses a domain-driven structure where models live inside their respective feature modules (e.g., `app/modules/meeting/models.py`). + +To satisfy SQLAlchemy and Alembic strict requirements for database schema generation, this package acts as the central initialization and aggregation point for the ORM. + +--- + +## Architecture + +This package solves the classic ORM bootstrap problem by centralizing the `Base` class, which every module imports, and then importing those completed models back into an `__init__.py` so Alembic's `env.py` has a single, complete metadata registry object to inspect during migrations. + +``` +┌─────────────────────────┐ ┌─────────────────────────┐ +│ app/modules/auth/ │ │ app/modules/meeting/ │ +│ models.py │ │ models.py │ +│ (User, Tokens, etc.) │ │ (Room, Participant...) │ +└────────────┬────────────┘ └────────────┬────────────┘ + │ │ + ▼ ▼ +┌─────────────────────────────────────────────────────────────┐ +│ app/models/__init__.py │ +│ │ +│ from app.models.base import Base │ +│ from app.modules.meeting.models import Room, ... │ +│ │ +│ __all__ = ["Base", "Room", ...] │ +└────────────────────────────┬────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ alembic/env.py │ +│ │ +│ from app.models import Base │ +│ target_metadata = Base.metadata │ +└─────────────────────────────────────────────────────────────┘ +``` + +--- + +## Components + +### The Declarative Base (`base.py`) + +This file contains the absolute minimum required to establish the SQLAlchemy 2.0 ORM base using `DeclarativeBase`. + +```python +from sqlalchemy.orm import DeclarativeBase + +class Base(DeclarativeBase): + pass +``` + +**Why it's isolated:** +Decoupling the `Base` metadata from the actual Model files prevents Circular Import errors between the central registry and the feature modules that need to inherit from it. + +### Model Aggregation (`__init__.py`) + +This file aggregates the distributed ORM models, making them easily accessible. + +It currently exports: +- `Base` (The core declarative metadata class) +- `Room` (from `app.modules.meeting.models`) +- `Participant` (from `app.modules.meeting.models`) +- `MeetingInvitation` (from `app.modules.meeting.models`) + +*(Note: It is recommended that as new modules are created—such as the `auth` module—their respective ORM entities e.g., `User`, are also imported into this initialization file to ensure complete coverage by the Alembic auto-migration tool.)* diff --git a/app/models/base.py b/app/models/base.py index fa2b68a..952a409 100644 --- a/app/models/base.py +++ b/app/models/base.py @@ -1,5 +1,20 @@ +"""SQLAlchemy foundational declarative base. + +This module provides the core `Base` metadata class used by all domain +models within the FluentMeet application. By centralizing the +DeclarativeBase here, we prevent circular import issues when different +module packages need to define relational schemas. +""" + from sqlalchemy.orm import DeclarativeBase class Base(DeclarativeBase): + """The central SQLAlchemy 2.0 declarative base registry. + + All ORM models across the application must inherit from this class + to ensure their metadata is properly registered for Alembic + auto-migrations and query building. + """ + pass diff --git a/app/modules/auth/account_lockout.py b/app/modules/auth/account_lockout.py index c9065f3..242cb21 100644 --- a/app/modules/auth/account_lockout.py +++ b/app/modules/auth/account_lockout.py @@ -47,6 +47,11 @@ class AccountLockoutService: LOCKED_PREFIX = "account_locked" def __init__(self, redis_client: aioredis.Redis | None = None) -> None: + """Initialize the AccountLockoutService. + + Args: + redis_client (aioredis.Redis | None): Optional overriding injected Redis Async client. Defaults to None. + """ self._redis = redis_client or _get_redis_client() self._max_attempts = settings.MAX_FAILED_LOGIN_ATTEMPTS self._lockout_ttl = settings.ACCOUNT_LOCKOUT_DAYS * 86400 @@ -66,11 +71,22 @@ def _locked_key(self, email: str) -> str: # ------------------------------------------------------------------ async def is_locked(self, email: str) -> bool: - """Return ``True`` if the account for *email* is currently locked.""" + """Return ``True`` if the account for *email* is currently locked. + + Args: + email (str): Target user email identifier. + + Returns: + bool: True if account is locked, False otherwise. + """ return bool(await self._redis.exists(self._locked_key(email))) async def record_failed_attempt(self, email: str) -> None: - """Increment the failure counter and lock the account if threshold reached.""" + """Increment the failure counter and lock the account if threshold reached. + + Args: + email (str): Target user email identifier mapping tracking block. + """ attempts_key = self._attempts_key(email) count = await self._redis.incr(attempts_key) @@ -89,9 +105,66 @@ async def record_failed_attempt(self, email: str) -> None: ) async def reset_attempts(self, email: str) -> None: - """Clear the failure counter (called on successful login).""" + """Clear the failure counter (called on successful login). + + Args: + email (str): Target user email explicitly tracking lockouts locally. + """ await self._redis.delete(self._attempts_key(email)) + async def get_lockout_info(self, email: str) -> dict: + """Fetch precise lockout metadata indicating limits and remaining time. + + Args: + email (str): Target user email identifier. + + Returns: + dict: Lockout status containing is_locked, lock_time_left, and attempts_remaining. + """ + is_locked = bool(await self._redis.exists(self._locked_key(email))) + lock_time_left = None + if is_locked: + ttl_secs = await self._redis.ttl(self._locked_key(email)) + if ttl_secs > 0: + lock_time_left = self._format_duration(ttl_secs) + + attempts_bytes = await self._redis.get(self._attempts_key(email)) + attempts = int(attempts_bytes) if attempts_bytes else 0 + attempts_remaining = max(0, self._max_attempts - attempts) + + return { + "is_locked": is_locked, + "lock_time_left": lock_time_left, + "attempts_remaining": attempts_remaining, + } + + def _format_duration(self, seconds: int) -> str: + """Format an integer TTL into a precise human-readable duration.""" + if seconds <= 0: + return "0 seconds" + + days, remainder = divmod(seconds, 86400) + hours, remainder = divmod(remainder, 3600) + minutes, seconds_remaining = divmod(remainder, 60) + + parts = [] + if days: + parts.append(f"{days} day{'s' if days > 1 else ''}") + if hours: + parts.append(f"{hours} hour{'s' if hours > 1 else ''}") + if minutes: + parts.append(f"{minutes} minute{'s' if minutes > 1 else ''}") + + if not parts: + parts.append( + f"{seconds_remaining} second{'s' if seconds_remaining > 1 else ''}" + ) + + if len(parts) == 1: + return parts[0] + else: + return f"{', '.join(parts[:-1])} and {parts[-1]}" + # Module-level singleton ----------------------------------------------- account_lockout_service = AccountLockoutService() diff --git a/app/modules/auth/api-docs.md b/app/modules/auth/api-docs.md new file mode 100644 index 0000000..bfde768 --- /dev/null +++ b/app/modules/auth/api-docs.md @@ -0,0 +1,1053 @@ +# FluentMeet Authentication API Documentation + +> **Base URL:** `/api/v1/auth` +> **Version:** 1.0 · **Protocol:** REST over HTTPS · **Content-Type:** `application/json` + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Authentication Flow](#authentication-flow) +- [Security Mechanisms](#security-mechanisms) +- [Endpoints](#endpoints) + - [POST /signup](#post-signup) + - [POST /login](#post-login) + - [GET /verify-email](#get-verify-email) + - [POST /resend-verification](#post-resend-verification) + - [POST /forgot-password](#post-forgot-password) + - [POST /reset-password](#post-reset-password) + - [POST /change-password](#post-change-password) + - [POST /logout](#post-logout) + - [POST /refresh-token](#post-refresh-token) + - [GET /google/login](#get-googlelogin) + - [GET /google/callback](#get-googlecallback) +- [Data Models](#data-models) +- [Request / Response Schemas](#request--response-schemas) +- [Error Codes Reference](#error-codes-reference) +- [Configuration Reference](#configuration-reference) +- [Internal Services](#internal-services) + +--- + +## Overview + +The FluentMeet authentication module provides a complete identity and access management system built on **FastAPI**. It supports: + +- **Email/password registration** with mandatory email verification. +- **Google OAuth 2.0** social login with automatic account linking. +- **JWT-based session management** using short-lived access tokens and long-lived refresh tokens. +- **Refresh Token Rotation** with automatic reuse detection and full session invalidation. +- **Account lockout** after repeated failed login attempts. +- **Password recovery** via secure one-time email tokens. +- **Rate limiting** on all sensitive endpoints via SlowAPI. + +All tokens are signed with **HS256** and a server-side secret key. Refresh tokens are delivered exclusively via **HttpOnly, Secure, SameSite=Strict** cookies and are never exposed in response bodies. + +--- + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ FastAPI Router │ +│ (app/modules/auth/router.py) │ +├──────────┬──────────┬──────────────┬───────────┬────────────────┤ +│ │ │ │ │ │ +│ AuthService AuthVerification GoogleOAuth AccountLockout │ +│ (service.py) Service Service Service │ +│ │ (verification.py) (oauth_google.py) (account_ │ +│ │ │ │ │ lockout.py) │ +│ ▼ ▼ │ ▼ │ +│ ┌──────────┐ ┌──────────┐ │ ┌────────────┐ │ +│ │ Security │ │ Email │ │ │ Redis │ │ +│ │ Service │ │ Producer │ │ │ (lockout + │ │ +│ │(core/ │ │ Service │ │ │ tokens) │ │ +│ │security) │ │ │ │ └────────────┘ │ +│ └──────────┘ └──────────┘ │ │ +│ │ │ │ +│ ▼ │ │ +│ ┌──────────┐ ┌──────────┐ ┌────────────┐ │ +│ │PostgreSQL│ │ Google │ │ TokenStore │ │ +│ │ (Users, │ │ OAuth2 │ │ Service │ │ +│ │ Tokens) │ │ Provider │ │(token_ │ │ +│ └──────────┘ └──────────┘ │ store.py) │ │ +│ └────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Module Files + +| File | Purpose | +|---|---| +| `router.py` | FastAPI route definitions and HTTP-layer logic | +| `service.py` | Core business logic — signup, login, password reset/change, OAuth user resolution | +| `schemas.py` | Pydantic request/response models and validators | +| `models.py` | SQLAlchemy ORM models (`User`, `VerificationToken`, `PasswordResetToken`) | +| `dependencies.py` | FastAPI dependency injection factories | +| `verification.py` | Email verification token lifecycle | +| `token_store.py` | Redis-backed refresh token storage and access token blacklisting | +| `account_lockout.py` | Redis-backed brute-force protection | +| `oauth_google.py` | Google OAuth 2.0 authorization code flow | +| `constants.py` | Enums — `UserRole`, `SupportedLanguage` | + +--- + +## Authentication Flow + +### Email/Password Registration Flow + +``` +Client Server Email Service + │ │ │ + │ POST /signup │ │ + │ {email, password, ...} ──► │ │ + │ │── Create user (unverified) │ + │ │── Generate verification token │ + │ │── Enqueue verification email ──► + │ ◄── 201 {user_id, ...} │ │ + │ │ │ + │ User clicks email link │ │ + │ GET /verify-email?token=...──►│ │ + │ │── Validate token │ + │ │── Set is_verified = True │ + │ ◄── 200 {message} │ │ + │ │ │ + │ POST /login │ │ + │ {email, password} ──► │ │ + │ │── Verify credentials │ + │ │── Check lockout / verified │ + │ │── Issue AT + RT │ + │ ◄── 200 {access_token} │ │ + │ ◄── Set-Cookie: refresh_token│ │ +``` + +### Token Refresh Flow (Rotation) + +``` +Client Server Redis + │ │ │ + │ POST /refresh-token │ │ + │ Cookie: refresh_token=...──► │ │ + │ │── Decode RT JWT │ + │ │── Check JTI valid? ─────► │ + │ │ ◄── Yes │ + │ │── Revoke old JTI ───────► │ + │ │── Save new JTI ─────────► │ + │ ◄── 200 {new_access_token} │ │ + │ ◄── Set-Cookie: new_rt │ │ + │ │ │ + │ ⚠️ Reuse of OLD RT │ │ + │ POST /refresh-token │ │ + │ Cookie: old_rt ──► │ │ + │ │── Check JTI valid? ─────► │ + │ │ ◄── No (revoked!) │ + │ │── REVOKE ALL sessions ──► │ + │ ◄── 401 REFRESH_TOKEN_REUSE │ │ +``` + +### Google OAuth 2.0 Flow + +``` +Client Server Google + │ │ │ + │ GET /google/login ──► │ │ + │ │── Generate state token │ + │ │── Store in Redis (10min) │ + │ ◄── 302 → Google consent │ │ + │ │ │ + │ (user authenticates with Google) │ + │ │ │ + │ GET /google/callback │ │ + │ ?code=...&state=... ──► │ │ + │ │── Verify state from Redis│ + │ │── Exchange code ─────────►│ + │ │ ◄── access_token │ + │ │── Get user info ─────────►│ + │ │ ◄── {email, name, ...} │ + │ │── Find or create user │ + │ │── Issue AT + RT │ + │ ◄── 302 → frontend#access_token=... │ + │ ◄── Set-Cookie: refresh_token│ │ +``` + +--- + +## Security Mechanisms + +### JWT Token Strategy + +| Token | Delivery | Lifetime | Claims | Storage | +|---|---|---|---|---| +| **Access Token** | Response body | 60 min (configurable) | `sub` (email), `jti`, `exp`, `type: "access"` | Client-side (memory/localStorage) | +| **Refresh Token** | HttpOnly cookie | 7 days (configurable) | `sub` (email), `jti`, `exp`, `type: "refresh"` | Redis (server-side JTI validation) | + +- **Algorithm:** HS256 +- **Library:** python-jose +- **Signing Key:** `SECRET_KEY` from environment + +### Refresh Token Rotation + +Every token refresh issues a **new** refresh token and revokes the old one. If a revoked token is reused (indicating possible theft), **all sessions for that user are immediately invalidated**. + +### Access Token Blacklisting + +On logout, the access token's JTI is added to a Redis blacklist with a TTL matching the token's remaining lifetime. The `get_current_user` dependency checks this blacklist on every authenticated request. + +### Account Lockout Policy + +| Parameter | Default | Description | +|---|---|---| +| `MAX_FAILED_LOGIN_ATTEMPTS` | 5 | Consecutive failures before lockout | +| `ACCOUNT_LOCKOUT_DAYS` | 5 | Duration of the lockout period | + +**Redis Keys:** +- `login_attempts:{email}` — integer counter, no TTL (cleared on success) +- `account_locked:{email}` — flag (`"1"`), TTL = lockout period + +A successful login resets the failure counter. + +### Password Hashing + +- **Primary:** passlib with bcrypt scheme +- **Fallback:** raw bcrypt (for compatibility with newer bcrypt builds) +- Auto-deprecated hash schemes are upgraded on verification. + +### Rate Limiting + +All sensitive endpoints are rate-limited using **SlowAPI** (based on client IP): + +| Endpoint | Limit | +|---|---| +| `POST /login` | 10/minute | +| `POST /resend-verification` | 3/minute | +| `POST /forgot-password` | 5/minute | +| `POST /reset-password` | 5/minute | +| `POST /change-password` | 10/minute | +| `POST /logout` | 20/minute | +| `POST /refresh-token` | 30/minute | + +### Cookie Security + +All refresh token cookies are set with: + +``` +HttpOnly: true (no JavaScript access) +Secure: true (HTTPS only) +SameSite: strict (no cross-site requests) +Path: /api/v1/auth +Max-Age: +``` + +--- + +## Endpoints + +--- + +### POST /signup + +Register a new user account. A verification email is sent asynchronously. + +**Request Body:** + +```json +{ + "email": "user@example.com", + "password": "securePass123", + "confirm_password": "securePass123", + "full_name": "Jane Doe", + "speaking_language": "en", + "listening_language": "fr", + "accepted_terms": true +} +``` + +| Field | Type | Required | Constraints | +|---|---|---|---| +| `email` | `string (email)` | ✅ | Valid email, auto-lowercased | +| `password` | `string` | ✅ | Minimum 8 characters | +| `confirm_password`| `string` | ✅ | Must match password exactly | +| `accepted_terms` | `boolean` | ✅ | Must be `true` — user must accept Terms of Service and Privacy Policy | +| `full_name` | `string \| null` | ❌ | Max 255 chars, auto-trimmed | +| `speaking_language` | `enum` | ❌ | Default: `"en"`. Values: `en`, `fr`, `de`, `es`, `it`, `pt` | +| `listening_language` | `enum` | ❌ | Default: `"en"`. Values: `en`, `fr`, `de`, `es`, `it`, `pt` | + +**Response: `201 Created`** + +```json +{ + "id": "550e8400-e29b-41d4-a716-446655440000", + "email": "user@example.com", + "full_name": "Jane Doe", + "speaking_language": "en", + "listening_language": "fr", + "user_role": "user", + "is_active": true, + "is_verified": false, + "created_at": "2026-04-10T12:00:00Z" +} +``` + +**Error Responses:** + +| Status | Code | Condition | +|---|---|---| +| `409` | `EMAIL_ALREADY_REGISTERED` | An account with this email already exists | +| `422` | — | Validation error (missing fields, passwords mismatch, etc.) | + +**Side Effects:** +- Creates an unverified `User` record in PostgreSQL. +- Generates a `VerificationToken` (UUID, 24h expiry). +- Enqueues a verification email via Kafka (non-blocking; signup succeeds even if email dispatch fails). + +--- + +### POST /login + +Authenticate a registered user with email and password. + +**Request Body:** + +```json +{ + "email": "user@example.com", + "password": "securePass123" +} +``` + +| Field | Type | Required | +|---|---|---| +| `email` | `string (email)` | ✅ | +| `password` | `string` | ✅ | + +**Response: `200 OK`** + +```json +{ + "access_token": "eyJhbGciOiJIUzI1NiIs...", + "user_id": "550e8400-e29b-41d4-a716-446655440000", + "token_type": "bearer", + "expires_in": 3600 +} +``` + +**Response Headers:** + +``` +Set-Cookie: refresh_token=eyJ...; HttpOnly; Secure; SameSite=Strict; Path=/api/v1/auth; Max-Age=604800 +``` + +**Error Responses:** + +| Status | Code | Condition | +|---|---|---| +| `400` | `MISSING_CREDENTIALS` | Empty request body | +| `401` | `INVALID_CREDENTIALS` | Wrong email or password *(Returns `details: [{"attempts_remaining": N}]`)* | +| `403` | `EMAIL_NOT_VERIFIED` | Account exists but email is not verified | +| `403` | `ACCOUNT_DELETED` | Account has been soft-deleted | +| `403` | `ACCOUNT_LOCKED` | Too many failed login attempts *(Returns `details: [{"lock_time_left": "duration"}]`)* | + +**Example Response (Invalid Credentials):** + +```json +{ + "status": "error", + "code": "INVALID_CREDENTIALS", + "message": "Invalid email or password.", + "details": [ + { + "attempts_remaining": 4 + } + ] +} +``` + +**Example Response (Account Locked):** + +```json +{ + "status": "error", + "code": "ACCOUNT_LOCKED", + "message": "Account is temporarily locked due to too many failed login attempts.", + "details": [ + { + "lock_time_left": "4 days, 23 hours and 29 minutes" + } + ] +} +``` + +**Rate Limit:** 10 requests/minute per IP. + +**Security Behavior:** +- Failed attempts increment the lockout counter (even for non-existent emails, to prevent timing attacks). +- After 5 consecutive failures → account locked for 5 days. +- Successful login resets the failure counter. + +--- + +### GET /verify-email + +Verify a user's email address using a token from the verification email. + +**Query Parameters:** + +| Parameter | Type | Required | +|---|---|---| +| `token` | `string (UUID)` | ✅ | + +**Example:** `GET /api/v1/auth/verify-email?token=550e8400-e29b-41d4-a716-446655440000` + +**Response: `200 OK`** + +```json +{ + "status": "ok", + "message": "Email successfully verified. You can now log in." +} +``` + +**Error Responses:** + +| Status | Code | Condition | +|---|---|---| +| `400` | `MISSING_TOKEN` | No `token` query parameter provided | +| `400` | `INVALID_TOKEN` | Token is not a valid UUID or does not exist | +| `400` | `TOKEN_EXPIRED` | Token has expired (default: 24 hours) | + +**Side Effects:** +- Sets `user.is_verified = True`. +- Deletes the consumed `VerificationToken`. + +--- + +### POST /resend-verification + +Request a new verification email. Always returns a generic success message to prevent **user enumeration**. + +**Request Body:** + +```json +{ + "email": "user@example.com" +} +``` + +**Response: `200 OK`** + +```json +{ + "status": "ok", + "message": "If an account with that email exists, we have sent a verification email." +} +``` + +**Rate Limit:** 3 requests/minute per IP. + +**Behavior:** +- If the user does not exist or is already verified, the endpoint silently returns success. +- Existing unexpired verification tokens for the user are deleted before issuing a new one. + +--- + +### POST /forgot-password + +Request a password reset email. Always returns a generic success message to prevent **user enumeration**. + +**Request Body:** + +```json +{ + "email": "user@example.com" +} +``` + +**Response: `200 OK`** + +```json +{ + "status": "ok", + "message": "If an account with this email exists, a password reset link has been sent." +} +``` + +**Rate Limit:** 5 requests/minute per IP. + +**Behavior:** +- Silently returns success if user does not exist, is inactive, deleted, or unverified. +- Deletes any existing `PasswordResetToken` records for the user before creating a new one. +- Token expiry: configurable via `PASSWORD_RESET_TOKEN_EXPIRE_MINUTES` (default: 60 min). +- Sends a `password_reset` email template via Kafka (non-blocking). + +--- + +### POST /reset-password + +Reset the user's password using a one-time token received via email. + +**Request Body:** + +```json +{ + "token": "550e8400-e29b-41d4-a716-446655440000", + "new_password": "newSecurePass456" +} +``` + +| Field | Type | Required | Constraints | +|---|---|---|---| +| `token` | `string` | ✅ | Non-empty | +| `new_password` | `string` | ✅ | Minimum 8 characters | + +**Response: `200 OK`** + +```json +{ + "status": "ok", + "message": "Password has been reset successfully. Please log in with your new password." +} +``` + +**Error Responses:** + +| Status | Code | Condition | +|---|---|---| +| `400` | `INVALID_RESET_TOKEN` | Token does not exist or user not found | +| `400` | `RESET_TOKEN_EXPIRED` | Token has expired | +| `400` | `SAME_PASSWORD` | New password is the same as the current one | + +**Rate Limit:** 5 requests/minute per IP. + +**Side Effects:** +- Updates `user.hashed_password` and `user.updated_at`. +- Deletes the consumed `PasswordResetToken`. +- Revokes **all** active refresh tokens for the user (forces re-login on all devices). +- Sends a `password_changed` security notification email. + +--- + +### POST /change-password + +Change the password for the currently authenticated user. + +**🔒 Requires Authentication:** `Authorization: Bearer ` + +**Request Body:** + +```json +{ + "current_password": "oldPass123", + "new_password": "newSecurePass456" +} +``` + +| Field | Type | Required | Constraints | +|---|---|---|---| +| `current_password` | `string` | ✅ | — | +| `new_password` | `string` | ✅ | Minimum 8 characters | + +**Response: `200 OK`** + +```json +{ + "status": "ok", + "message": "Password updated successfully." +} +``` + +**Error Responses:** + +| Status | Code | Condition | +|---|---|---| +| `400` | `INCORRECT_PASSWORD` | Current password does not match | +| `400` | `SAME_PASSWORD` | New password is same as current | +| `401` | `MISSING_TOKEN` / `INVALID_CREDENTIALS` | Not authenticated | + +**Rate Limit:** 10 requests/minute per IP. + +**Side Effects:** +- Updates `user.hashed_password` and `user.updated_at`. +- Revokes **all** active refresh tokens (forces re-login on all devices). +- Sends a `password_changed` security notification email. + +--- + +### POST /logout + +Log out the current session by invalidating both the access and refresh tokens. + +**🔒 Requires Authentication:** `Authorization: Bearer ` + +**Request:** No body required. Refresh token is read from the `refresh_token` cookie. + +**Response: `200 OK`** + +```json +{ + "status": "ok", + "message": "Successfully logged out." +} +``` + +**Response Headers:** + +``` +Set-Cookie: refresh_token=; Path=/api/v1/auth; Max-Age=0 (cookie cleared) +``` + +**Error Responses:** + +| Status | Code | Condition | +|---|---|---| +| `401` | `MISSING_TOKEN` / `INVALID_CREDENTIALS` | Not authenticated | + +**Rate Limit:** 20 requests/minute per IP. + +**Behavior:** +- Blacklists the access token JTI in Redis for its remaining TTL. +- Revokes the refresh token JTI (if the cookie is present). +- Clears the `refresh_token` HttpOnly cookie. + +--- + +### POST /refresh-token + +Rotate the refresh token and issue a new access token. Implements the **Refresh Token Rotation** pattern. + +**Request:** No body required. The refresh token is read from the `refresh_token` HttpOnly cookie. + +**Response: `200 OK`** + +```json +{ + "access_token": "eyJhbGciOiJIUzI1NiIs...", + "token_type": "bearer", + "expires_in": 3600 +} +``` + +**Response Headers:** + +``` +Set-Cookie: refresh_token=; HttpOnly; Secure; SameSite=Strict; Path=/api/v1/auth; Max-Age=604800 +``` + +**Error Responses:** + +| Status | Code | Condition | +|---|---|---| +| `401` | `MISSING_REFRESH_TOKEN` | No `refresh_token` cookie present | +| `401` | `INVALID_REFRESH_TOKEN` | Token is expired, malformed, or not a refresh token | +| `401` | `REFRESH_TOKEN_REUSE` | Revoked token was reused — **all** sessions invalidated | +| `403` | `ACCOUNT_DEACTIVATED` | User account has been deactivated or deleted | + +**Rate Limit:** 30 requests/minute per IP. + +**Security Behavior:** +- The old refresh token JTI is revoked before the new one is saved. +- If a previously revoked JTI is used again (reuse attack), **all** refresh tokens for the user are purged from Redis and a warning is logged. + +--- + +### GET /google/login + +Initiate the Google OAuth 2.0 authorization flow by redirecting the user to Google's consent screen. + +**Response: `302 Found`** + +Redirects to Google's OAuth consent URL with: +- `client_id`, `redirect_uri`, `scope: "openid email profile"` +- A cryptographically random `state` parameter stored in Redis for 10 minutes. + +--- + +### GET /google/callback + +Handle the callback from Google after user authentication. This endpoint is called by Google, not by the client directly. + +**Query Parameters:** + +| Parameter | Type | Required | +|---|---|---| +| `code` | `string` | ✅ | +| `state` | `string` | ✅ | + +**Response: `302 Found`** + +Redirects to: `{FRONTEND_BASE_URL}#access_token=` + +**Response Headers:** + +``` +Set-Cookie: refresh_token=; HttpOnly; Secure; SameSite=Strict; Path=/api/v1/auth; Max-Age=604800 +``` + +**Error Responses:** + +| Status | Code | Condition | +|---|---|---| +| `400` | `INVALID_OAUTH_STATE` | State token is invalid or expired | +| `400` | `INVALID_OAUTH_PROFILE` | Google account does not provide an email | +| `403` | `ACCOUNT_LOCKED` | Account is locked due to failed attempts | +| `403` | `ACCOUNT_DEACTIVATED` | Account is deactivated or deleted | +| `502` | `OAUTH_PROVIDER_ERROR` | Failed to communicate with Google | + +**User Resolution Logic:** +1. If a user with the email exists: + - Links the Google ID if not already linked. + - Sets avatar URL if missing. + - Auto-verifies the email if not already verified. +2. If no user exists: + - Creates a new verified user with a random hashed password. + - Sets `google_id`, `full_name`, and `avatar_url` from the Google profile. + +--- + +## Data Models + +### User + +| Column | Type | Constraints | Description | +|---|---|---|---| +| `id` | `UUID` | PK, indexed | Unique user identifier | +| `email` | `String(255)` | Unique, indexed, not null | Normalized to lowercase | +| `hashed_password` | `String(255)` | Not null | bcrypt hash | +| `full_name` | `String(255)` | Nullable | Display name | +| `is_active` | `Boolean` | Default: `True` | Account active flag | +| `is_verified` | `Boolean` | Default: `False` | Email verified flag | +| `created_at` | `DateTime(tz)` | Default: `utc_now` | Account creation timestamp | +| `updated_at` | `DateTime(tz)` | Default: `utc_now`, onupdate | Last modification timestamp | +| `deleted_at` | `DateTime(tz)` | Nullable | Soft-delete timestamp | +| `avatar_url` | `String(512)` | Nullable | Profile picture URL | +| `google_id` | `String(255)` | Unique, indexed, nullable | Google OAuth subject ID | +| `speaking_language` | `String(10)` | Default: `"en"` | Preferred speaking language | +| `listening_language` | `String(10)` | Default: `"en"` | Preferred listening language | +| `user_role` | `String(50)` | Default: `"user"`, indexed | Role: `"user"` or `"admin"` | + +### VerificationToken + +| Column | Type | Constraints | Description | +|---|---|---|---| +| `id` | `Integer` | PK, indexed | Auto-increment identifier | +| `user_id` | `UUID` | FK → `users.id`, not null | Owning user | +| `token` | `String(36)` | Unique, indexed, not null | UUID v4 string | +| `expires_at` | `DateTime(tz)` | Not null | Default: 24 hours from creation | +| `created_at` | `DateTime(tz)` | Not null | Token creation timestamp | + +### PasswordResetToken + +| Column | Type | Constraints | Description | +|---|---|---|---| +| `id` | `Integer` | PK, indexed | Auto-increment identifier | +| `user_id` | `UUID` | FK → `users.id`, not null | Owning user | +| `token` | `String(36)` | Unique, indexed, not null | UUID v4 string | +| `expires_at` | `DateTime(tz)` | Not null | Set from `PASSWORD_RESET_TOKEN_EXPIRE_MINUTES` | +| `created_at` | `DateTime(tz)` | Not null | Token creation timestamp | + +--- + +## Request / Response Schemas + +### Enums + +#### `SupportedLanguage` + +| Value | Label | +|---|---| +| `en` | English | +| `fr` | French | +| `de` | German | +| `es` | Spanish | +| `it` | Italian | +| `pt` | Portuguese | + +#### `UserRole` + +| Value | Description | +|---|---| +| `user` | Standard user (default) | +| `admin` | Administrator | + +### Request Schemas + +| Schema | Used By | Fields | +|---|---|---| +| `SignupRequest` | `POST /signup` | `email`, `password` (min 8), `confirm_password`, `accepted_terms` (must be `true`), `full_name?`, `speaking_language?`, `listening_language?` | +| `LoginRequest` | `POST /login` | `email`, `password` | +| `ResendVerificationRequest` | `POST /resend-verification` | `email` | +| `ForgotPasswordRequest` | `POST /forgot-password` | `email` | +| `ResetPasswordRequest` | `POST /reset-password` | `token` (min 1), `new_password` (min 8) | +| `ChangePasswordRequest` | `POST /change-password` | `current_password`, `new_password` (min 8) | + +### Response Schemas + +| Schema | Used By | Fields | +|---|---|---| +| `SignupResponse` | `POST /signup` | `id`, `email`, `full_name`, `speaking_language`, `listening_language`, `user_role`, `is_active`, `is_verified`, `created_at` | +| `LoginResponse` | `POST /login` | `access_token`, `user_id`, `token_type`, `expires_in` | +| `VerifyEmailResponse` | `GET /verify-email` | `status` (= `"ok"`), `message` | +| `ActionAcknowledgement` | Multiple endpoints | `status` (= `"ok"`), `message` | +| `RefreshTokenResponse` | `POST /refresh-token` | `access_token`, `token_type`, `expires_in` | + +--- + +## Error Codes Reference + +All errors follow a consistent JSON structure: + +```json +{ + "code": "ERROR_CODE", + "message": "Human-readable error description." +} +``` + +### Complete Error Code Table + +| Code | HTTP Status | Endpoint(s) | Description | +|---|---|---|---| +| `EMAIL_ALREADY_REGISTERED` | 409 | `/signup` | Duplicate email at registration | +| `MISSING_CREDENTIALS` | 400 | `/login` | Empty request body on login | +| `INVALID_CREDENTIALS` | 401 | `/login`, auth guard | Wrong email/password or invalid JWT | +| `EMAIL_NOT_VERIFIED` | 403 | `/login` | Attempting login before email verification | +| `ACCOUNT_DELETED` | 403 | `/login`, auth guard | Account has been soft-deleted | +| `ACCOUNT_LOCKED` | 403 | `/login`, `/google/callback` | Locked after too many failed attempts | +| `ACCOUNT_DEACTIVATED` | 403 | `/refresh-token`, `/google/callback`, auth guard | Account deactivated or deleted | +| `MISSING_TOKEN` | 400/401 | `/verify-email`, auth guard | Token not provided | +| `INVALID_TOKEN` | 400 | `/verify-email` | Token is malformed or not found | +| `TOKEN_EXPIRED` | 400 | `/verify-email` | Verification token has expired | +| `TOKEN_REVOKED` | 401 | Auth guard | Access token has been blacklisted | +| `INVALID_RESET_TOKEN` | 400 | `/reset-password` | Reset token not found or user missing | +| `RESET_TOKEN_EXPIRED` | 400 | `/reset-password` | Password reset token has expired | +| `SAME_PASSWORD` | 400 | `/reset-password`, `/change-password` | New password matches the current one | +| `INCORRECT_PASSWORD` | 400 | `/change-password` | Current password verification failed | +| `MISSING_REFRESH_TOKEN` | 401 | `/refresh-token` | No refresh token cookie present | +| `INVALID_REFRESH_TOKEN` | 401 | `/refresh-token` | Refresh token JWT is invalid or expired | +| `REFRESH_TOKEN_REUSE` | 401 | `/refresh-token` | Revoked token was replayed — all sessions killed | +| `INVALID_OAUTH_STATE` | 400 | `/google/callback` | CSRF state token invalid or expired | +| `INVALID_OAUTH_PROFILE` | 400 | `/google/callback` | Google profile missing email address | +| `OAUTH_PROVIDER_ERROR` | 502 | `/google/callback` | Failed to communicate with Google APIs | + +--- + +## Configuration Reference + +All values are configurable via environment variables or `.env` file. + +### Security & Tokens + +| Setting | Default | Description | +|---|---|---| +| `SECRET_KEY` | `"placeholder_secret_key"` | JWT signing key. **Must be changed in production.** | +| `ALGORITHM` | `"HS256"` | JWT signing algorithm | +| `ACCESS_TOKEN_EXPIRE_MINUTES` | `60` | Access token lifetime in minutes | +| `REFRESH_TOKEN_EXPIRE_DAYS` | `7` | Refresh token lifetime in days | +| `VERIFICATION_TOKEN_EXPIRE_HOURS` | `24` | Email verification token lifetime in hours | +| `PASSWORD_RESET_TOKEN_EXPIRE_MINUTES` | `60` | Password reset token lifetime in minutes | + +### Account Lockout + +| Setting | Default | Description | +|---|---|---| +| `MAX_FAILED_LOGIN_ATTEMPTS` | `5` | Consecutive failures before lockout | +| `ACCOUNT_LOCKOUT_DAYS` | `5` | Duration of lockout in days | + +### Infrastructure + +| Setting | Default | Description | +|---|---|---| +| `REDIS_HOST` | `"localhost"` | Redis server hostname | +| `REDIS_PORT` | `6379` | Redis server port | +| `FRONTEND_BASE_URL` | `"http://localhost:3000"` | Base URL for email links (verify, reset) | +| `API_V1_STR` | `"/api/v1"` | API version prefix | + +### Google OAuth + +| Setting | Default | Description | +|---|---|---| +| `GOOGLE_CLIENT_ID` | `None` | OAuth client ID (required for OAuth) | +| `GOOGLE_CLIENT_SECRET` | `None` | OAuth client secret (required for OAuth) | +| `GOOGLE_REDIRECT_URI` | `None` | Callback URL registered with Google (required for OAuth) | + +--- + +## Internal Services + +### AuthService + +The core business logic coordinator. Injected with all subsystem dependencies via FastAPI's DI. + +**Constructor Dependencies:** +- `db: Session` — SQLAlchemy database session +- `security_service: SecurityService` — Password hashing and JWT operations +- `email_producer: EmailProducerService` — Async email dispatch via Kafka +- `auth_verification_service: AuthVerificationService` — Verification token CRUD +- `lockout_svc: AccountLockoutService` — Brute-force protection +- `token_store: TokenStoreService` — Redis refresh token and blacklist management + +**Public Methods:** + +| Method | Description | +|---|---| +| `signup(user_in, frontend_base_url)` | Create user, generate verification token, send email | +| `login(payload)` | Validate credentials, check guards, issue tokens | +| `forgot_password(email, frontend_base_url)` | Generate reset token, send email | +| `reset_password(token, new_password)` | Validate token, update password, revoke sessions | +| `change_password(user, current_password, new_password)` | Verify current, update hash, revoke sessions | +| `logout(email, access_jti, access_ttl_remaining, refresh_jti)` | Blacklist AT, revoke RT | +| `refresh_token(raw_token)` | Rotate refresh token with reuse detection | +| `resolve_oauth_user(email, google_id, name, avatar_url)` | Find/create/link OAuth user, issue tokens | + +### TokenStoreService + +Redis-backed service managing refresh token JTIs and access token blacklisting. + +**Redis Key Schemas:** + +| Key Pattern | TTL | Purpose | +|---|---|---| +| `refresh_token:{email}:{jti}` | Matches token expiry | Valid refresh token indicator | +| `blacklisted_access_token:{jti}` | Remaining AT lifetime | Blacklisted access token | + +**Public Methods:** + +| Method | Description | +|---|---| +| `save_refresh_token(email, jti, ttl_seconds)` | Persist a new refresh token JTI | +| `revoke_refresh_token(email, jti)` | Delete a specific JTI | +| `is_refresh_token_valid(email, jti)` | Check if JTI exists (not revoked/expired) | +| `revoke_all_user_tokens(email)` | SCAN + pipeline delete all JTIs for a user | +| `blacklist_access_token(jti, ttl_seconds)` | Add AT JTI to blacklist | +| `is_access_token_blacklisted(jti)` | Check if AT is blacklisted | + +### AccountLockoutService + +Redis-backed brute-force protection tracking failed login attempts. + +**Public Methods:** + +| Method | Description | +|---|---| +| `is_locked(email)` | Check if account is currently locked | +| `record_failed_attempt(email)` | Increment counter; lock if threshold reached | +| `reset_attempts(email)` | Clear failure counter (on successful login) | + +### AuthVerificationService + +Manages verification token lifecycle for email verification. + +**Public Methods:** + +| Method | Description | +|---|---| +| `create_verification_token(user_id)` | Generate and persist a new `VerificationToken` | +| `verify_email(token)` | Validate token, activate user, delete token | +| `resend_verification_email(email)` | Delete old tokens, create new one, send email | + +### GoogleOAuthService + +Handles the Google OAuth 2.0 authorization code flow. + +**Public Methods:** + +| Method | Description | +|---|---| +| `build_auth_url(state)` | Construct the Google consent URL with CSRF state | +| `exchange_code(code)` | Exchange authorization code for Google access token | +| `get_user_info(access_token)` | Fetch user profile from Google's userinfo endpoint | + +--- + +## Usage Examples + +### cURL: Register a New User + +```bash +curl -X POST http://localhost:8000/api/v1/auth/signup \ + -H "Content-Type: application/json" \ + -d '{ + "email": "jane@example.com", + "password": "mySecureP4ss!", + "full_name": "Jane Doe", + "speaking_language": "en", + "listening_language": "fr" + }' +``` + +### cURL: Login and Capture Refresh Cookie + +```bash +curl -X POST http://localhost:8000/api/v1/auth/login \ + -H "Content-Type: application/json" \ + -c cookies.txt \ + -d '{ + "email": "jane@example.com", + "password": "mySecureP4ss!" + }' +``` + +### cURL: Refresh Token + +```bash +curl -X POST http://localhost:8000/api/v1/auth/refresh-token \ + -b cookies.txt \ + -c cookies.txt +``` + +### cURL: Authenticated Request (Change Password) + +```bash +curl -X POST http://localhost:8000/api/v1/auth/change-password \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "current_password": "mySecureP4ss!", + "new_password": "evenMoreSecure!" + }' +``` + +### cURL: Logout + +```bash +curl -X POST http://localhost:8000/api/v1/auth/logout \ + -H "Authorization: Bearer " \ + -b cookies.txt +``` + +### JavaScript: Full Login Flow + +```javascript +// 1. Login +const loginRes = await fetch('/api/v1/auth/login', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + credentials: 'include', // important: sends/receives cookies + body: JSON.stringify({ + email: 'jane@example.com', + password: 'mySecureP4ss!', + }), +}); +const { access_token, expires_in } = await loginRes.json(); + +// 2. Make authenticated requests +const res = await fetch('/api/v1/some-protected-endpoint', { + headers: { Authorization: `Bearer ${access_token}` }, + credentials: 'include', +}); + +// 3. Refresh token before expiry +const refreshRes = await fetch('/api/v1/auth/refresh-token', { + method: 'POST', + credentials: 'include', // sends the refresh_token cookie +}); +const { access_token: newToken } = await refreshRes.json(); + +// 4. Logout +await fetch('/api/v1/auth/logout', { + method: 'POST', + headers: { Authorization: `Bearer ${newToken}` }, + credentials: 'include', +}); +``` diff --git a/app/modules/auth/constants.py b/app/modules/auth/constants.py index 55bc714..3404378 100644 --- a/app/modules/auth/constants.py +++ b/app/modules/auth/constants.py @@ -1,3 +1,5 @@ +"""Authentication Constants and Enum Definitions module.""" + import enum diff --git a/app/modules/auth/dependencies.py b/app/modules/auth/dependencies.py index 272efc0..8c79394 100644 --- a/app/modules/auth/dependencies.py +++ b/app/modules/auth/dependencies.py @@ -1,3 +1,8 @@ +"""Authentication FastAPI Dependencies module. + +Defines all core module injectables avoiding circular imports seamlessly natively. +""" + from fastapi import Depends from sqlalchemy.orm import Session diff --git a/app/modules/auth/models.py b/app/modules/auth/models.py index b33be91..b02aede 100644 --- a/app/modules/auth/models.py +++ b/app/modules/auth/models.py @@ -1,3 +1,5 @@ +"""Authentication Database Models module.""" + import uuid from datetime import UTC, datetime, timedelta @@ -13,6 +15,17 @@ def utc_now() -> datetime: class User(Base): + """Database model tracking all identity, profiles, and state constructs for individuals natively. + + Attributes: + id: Primary UUID. + email: Unique user email address identifying accounts. + hashed_password: Encrypted payload statically parsed securely. + full_name: Standardized user provided string. + is_active: Activation mapping bounding sessions dynamically. + is_verified: Identity validation marker defining login allowance. + """ + __tablename__ = "users" id: Mapped[uuid.UUID] = mapped_column( diff --git a/app/modules/auth/oauth_google.py b/app/modules/auth/oauth_google.py index 7a5a12b..dd64f6f 100644 --- a/app/modules/auth/oauth_google.py +++ b/app/modules/auth/oauth_google.py @@ -1,3 +1,5 @@ +"""Google OAuth 2.0 Integration module.""" + import urllib.parse from typing import Any, cast @@ -14,6 +16,14 @@ def __init__( class GoogleOAuthService: + """Oauth2 Proxy wrapping OpenID Connect callbacks dynamically against Google environments. + + Attributes: + client_id (str): Google Client ID. + client_secret (str): Google Client Secret natively. + redirect_uri (str): Allowed Oauth 2.0 callback destination natively tracked securely. + """ + def __init__(self, client_id: str, client_secret: str, redirect_uri: str): self.client_id = client_id self.client_secret = client_secret @@ -23,6 +33,14 @@ def __init__(self, client_id: str, client_secret: str, redirect_uri: str): self.userinfo_url = "https://www.googleapis.com/oauth2/v3/userinfo" def build_auth_url(self, state: str) -> str: + """Construct the initial redirect URL authorizing Google access. + + Args: + state (str): Unique cryptographic state proxying tokens mitigating CSRF risks. + + Returns: + str: Absolute https URI routing user browsers natively to Google Consent architectures. + """ params = { "client_id": self.client_id, "redirect_uri": self.redirect_uri, @@ -35,6 +53,14 @@ def build_auth_url(self, state: str) -> str: return f"{self.auth_url}?{urllib.parse.urlencode(params)}" async def exchange_code(self, code: str) -> str: + """Exchange the Oauth2 authorization code for a valid access_token. + + Args: + code (str): Time-sensitive exchange code provided by Google callback queries. + + Returns: + str: Issued OAuth Bearer Access Token. + """ data = { "client_id": self.client_id, "client_secret": self.client_secret, @@ -53,6 +79,14 @@ async def exchange_code(self, code: str) -> str: ) from err async def get_user_info(self, access_token: str) -> dict[str, Any]: + """Query Google userinfo node extracting raw profile graphs dynamically. + + Args: + access_token (str): Validated Bearer Token retrieved via `exchange_code`. + + Returns: + dict[str, Any]: Parsed JSON response from Google including `email` natively. + """ headers = {"Authorization": f"Bearer {access_token}"} async with httpx.AsyncClient() as client: try: diff --git a/app/modules/auth/router.py b/app/modules/auth/router.py index a99684e..9d23580 100644 --- a/app/modules/auth/router.py +++ b/app/modules/auth/router.py @@ -1,3 +1,9 @@ +"""Authentication API Router module. + +Registers the public FastApi routes binding external endpoints logic mapping +securely against stateless token schemas locally. +""" + import logging from datetime import UTC, datetime from typing import cast diff --git a/app/modules/auth/schemas.py b/app/modules/auth/schemas.py index 9f6e775..285e744 100644 --- a/app/modules/auth/schemas.py +++ b/app/modules/auth/schemas.py @@ -1,7 +1,12 @@ +"""Authentication Pydantic schemas module. + +Strictly defines JSON constraints validating and mutating incoming API properties automatically. +""" + import uuid from datetime import datetime -from pydantic import BaseModel, ConfigDict, EmailStr, Field, field_validator +from pydantic import BaseModel, ConfigDict, EmailStr, Field, field_validator, model_validator from app.modules.auth.constants import SupportedLanguage @@ -27,6 +32,8 @@ def strip_full_name(cls, value: str | None) -> str | None: class UserUpdate(BaseModel): + """Public payload returned by the user update endpoint.""" + full_name: str | None = Field(default=None, max_length=255) speaking_language: SupportedLanguage | None = None listening_language: SupportedLanguage | None = None @@ -42,6 +49,8 @@ def strip_full_name(cls, value: str | None) -> str | None: class UserResponse(UserBase): + """Public payload returned by the user endpoint.""" + id: uuid.UUID user_role: str is_active: bool @@ -52,6 +61,8 @@ class UserResponse(UserBase): class Token(BaseModel): + """Public payload returned by the token endpoint.""" + access_token: str refresh_token: str token_type: str = "bearer" @@ -59,6 +70,8 @@ class Token(BaseModel): class TokenData(BaseModel): + """Validated, non-optional claims extracted from a token JWT.""" + email: str | None = None jti: str | None = None @@ -72,6 +85,23 @@ class RefreshTokenClaims(BaseModel): class SignupRequest(UserBase): password: str = Field(..., min_length=8) + confirm_password: str + accepted_terms: bool + + @field_validator("accepted_terms", mode="after") + @classmethod + def terms_must_be_accepted(cls, value: bool) -> bool: + if not value: + raise ValueError( + "You must accept the Terms of Service and Privacy Policy to create an account." + ) + return value + + @model_validator(mode="after") + def check_passwords_match(self) -> "SignupRequest": + if self.password != self.confirm_password: + raise ValueError("passwords do not match") + return self class SignupResponse(UserResponse): diff --git a/app/modules/auth/service.py b/app/modules/auth/service.py index 0859070..952df6d 100644 --- a/app/modules/auth/service.py +++ b/app/modules/auth/service.py @@ -1,3 +1,8 @@ +"""Authentication core business service module. + +Coordinates transactional databases natively orchestrating OAuth triggers dynamically. +""" + import logging import uuid from datetime import UTC, datetime, timedelta @@ -30,6 +35,8 @@ class AuthService: + """Core Authentication pipeline mapper resolving explicit state structures.""" + def __init__( self, db: Session, @@ -47,10 +54,27 @@ def __init__( self.token_store = token_store def get_user_by_email(self, email: str) -> User | None: + """Query User explicitly targeting lowercased email bindings constraints. + + Args: + email (str): Target search payload. + + Returns: + User | None: Retrieved structure natively. + """ statement = select(User).where(User.email == email.lower()) return self.db.execute(statement).scalar_one_or_none() async def signup(self, user_in: SignupRequest, frontend_base_url: str) -> User: + """Register a new native participant. + + Args: + user_in (SignupRequest): Target parameter mappings array natively. + frontend_base_url (str): The frontend UI router domain natively targeting Verification links. + + Returns: + User: Explicitly constructed account struct mapped. + """ existing_user = self.get_user_by_email(user_in.email) if existing_user: raise ConflictException( @@ -97,37 +121,67 @@ async def signup(self, user_in: SignupRequest, frontend_base_url: str) -> User: return db_user + async def _handle_failed_login(self, email: str) -> None: + """Process a failed login attempt, throwing precise locked or invalid exceptions. + + Args: + email (str): Target user email identifier. + + Raises: + ForbiddenException: Configured with `ACCOUNT_LOCKED` code and `lock_time_left` metadata. + UnauthorizedException: Configured with `INVALID_CREDENTIALS` code and `attempts_remaining` metadata. + """ + await self.lockout_svc.record_failed_attempt(email) + lockout_info = await self.lockout_svc.get_lockout_info(email) + + if lockout_info.get("is_locked"): + raise ForbiddenException( + code="ACCOUNT_LOCKED", + message="Account is temporarily locked due to too many failed login attempts.", + details=[{"lock_time_left": lockout_info.get("lock_time_left")}], + ) + + attempts = lockout_info.get("attempts_remaining", 0) + raise UnauthorizedException( + code="INVALID_CREDENTIALS", + message="Invalid email or password.", + details=[{"attempts_remaining": attempts}], + ) + async def login(self, payload: LoginRequest) -> tuple[LoginResponse, str, int]: + """Verify explicit payload credentials against databases generating state sessions securely. + + Args: + payload (LoginRequest): Incoming frontend request struct containing user parameters. + + Returns: + tuple[LoginResponse, str, int]: Issued explicit token dicts, the raw RT string natively, and TTL in seconds. + + Raises: + ForbiddenException: If account is locked (returns details metadata with `lock_time_left`). + UnauthorizedException: If email/password are incorrect (returns details metadata with `attempts_remaining`). + """ email = payload.email.lower() # Check lockout - if await self.lockout_svc.is_locked(email): + lockout_info = await self.lockout_svc.get_lockout_info(email) + if lockout_info.get("is_locked"): raise ForbiddenException( code="ACCOUNT_LOCKED", - message=( - "Account is temporarily locked due to too many failed " - "login attempts. Please try again later." - ), + message="Account is temporarily locked due to too many failed login attempts.", + details=[{"lock_time_left": lockout_info.get("lock_time_left")}], ) # Lookup user user = self.get_user_by_email(email) if user is None: - await self.lockout_svc.record_failed_attempt(email) - raise UnauthorizedException( - code="INVALID_CREDENTIALS", - message="Invalid email or password.", - ) + await self._handle_failed_login(email) # Verify password if not self.security_service.verify_password( payload.password, user.hashed_password ): - await self.lockout_svc.record_failed_attempt(email) - raise UnauthorizedException( - code="INVALID_CREDENTIALS", - message="Invalid email or password.", - ) + await self._handle_failed_login(email) # Guard: email verified? if not user.is_verified: @@ -171,6 +225,15 @@ async def login(self, payload: LoginRequest) -> tuple[LoginResponse, str, int]: return login_response, refresh_token, refresh_ttl async def forgot_password(self, email: str, frontend_base_url: str) -> None: + """Handle forgot password request. + + Args: + email (str): Target email address. + frontend_base_url (str): The frontend UI router domain natively targeting Verification links. + + Returns: + None + """ user = self.get_user_by_email(email) if ( not user @@ -220,6 +283,14 @@ async def forgot_password(self, email: str, frontend_base_url: str) -> None: async def refresh_token( self, raw_token: str ) -> tuple[RefreshTokenResponse, str, int]: + """Handle token refresh request. + + Args: + raw_token (str): The raw refresh token. + + Returns: + tuple[RefreshTokenResponse, str, int]: The new access token, refresh token, and TTL in seconds. + """ try: token_data = self.security_service.decode_refresh_token(raw_token) except ValueError as exc: @@ -272,6 +343,17 @@ async def refresh_token( async def resolve_oauth_user( self, email: str, google_id: str, name: str | None, avatar_url: str | None ) -> tuple[LoginResponse, str, int]: + """Handle OAuth user resolution. + + Args: + email (str): Target email address. + google_id (str): The Google ID. + name (str | None): The user's name. + avatar_url (str | None): The user's avatar URL. + + Returns: + tuple[LoginResponse, str, int]: The new access token, refresh token, and TTL in seconds. + """ email = email.lower() user = self.get_user_by_email(email) @@ -366,7 +448,15 @@ async def logout( # ------------------------------------------------------------------ async def reset_password(self, token: str, new_password: str) -> None: - """Validate a password-reset token and apply the new password.""" + """Validate a password-reset token and apply the new password. + + Args: + token (str): The password reset token. + new_password (str): The new password. + + Returns: + None + """ reset_token = self.db.execute( select(PasswordResetToken).where(PasswordResetToken.token == token) ).scalar_one_or_none() @@ -448,7 +538,16 @@ async def reset_password(self, token: str, new_password: str) -> None: async def change_password( self, user: User, current_password: str, new_password: str ) -> None: - """Change the password for an authenticated user.""" + """Change the password for an authenticated user. + + Args: + user (User): The authenticated user. + current_password (str): The current password. + new_password (str): The new password. + + Returns: + None + """ if not self.security_service.verify_password( current_password, user.hashed_password ): diff --git a/app/modules/auth/token_store.py b/app/modules/auth/token_store.py index 6e8ba34..15fa493 100644 --- a/app/modules/auth/token_store.py +++ b/app/modules/auth/token_store.py @@ -24,7 +24,11 @@ def _get_redis_client() -> Redis: - """Return (and lazily create) a module-level async Redis client.""" + """Return (and lazily create) a module-level async Redis client. + + Returns: + Redis: Async mapped wrapper block securely. + """ global _REDIS_CLIENT # noqa: PLW0603 if _REDIS_CLIENT is None: _REDIS_CLIENT = aioredis.Redis( @@ -59,15 +63,34 @@ def _pattern(self, email: str) -> str: return f"{self.PREFIX}:{email}:*" async def save_refresh_token(self, email: str, jti: str, ttl_seconds: int) -> None: - """Persist *jti* for *email* with an automatic expiry of *ttl_seconds*.""" + """Persist *jti* for *email* with an automatic expiry of *ttl_seconds*. + + Args: + email (str): Valid user email constraints. + jti (str): Tracker mapped identifier natively. + ttl_seconds (int): Redis mapped expiry limit securely. + """ await self._redis.set(self._key(email, jti), "1", ex=ttl_seconds) async def revoke_refresh_token(self, email: str, jti: str) -> None: - """Remove *jti* for *email*, effectively revoking the refresh token.""" + """Remove *jti* for *email*, effectively revoking the refresh token. + + Args: + email (str): Native account identifier securely mappings. + jti (str): Stored tracker target block natively bound. + """ await self._redis.delete(self._key(email, jti)) async def is_refresh_token_valid(self, email: str, jti: str) -> bool: - """Return ``True`` if *jti* exists for *email* (not revoked/expired).""" + """Return ``True`` if *jti* exists for *email* (not revoked/expired). + + Args: + email (str): Target email validation parameter natively. + jti (str): Evaluated identifier mapped natively. + + Returns: + bool: True if mapped securely found, else False. + """ return bool(await self._redis.exists(self._key(email, jti))) async def revoke_all_user_tokens(self, email: str) -> None: @@ -77,6 +100,9 @@ async def revoke_all_user_tokens(self, email: str) -> None: potential session-theft replay attack. Scans Redis for all keys matching ``refresh_token:{email}:*`` and deletes them in a single pipeline call. + + Args: + email (str): User identifier mapped natively. """ pattern = self._pattern(email) keys_to_delete: list[str] = [] @@ -114,12 +140,23 @@ async def blacklist_access_token(self, jti: str, ttl_seconds: int) -> None: Called during account deletion / forced logout to prevent the already-issued JWT from being used again. + + Args: + jti (str): Parsed natively extracted JTI block automatically. + ttl_seconds (int): Bound duration tracked via Redis securely. """ if ttl_seconds > 0: await self._redis.set(self._blacklist_key(jti), "1", ex=ttl_seconds) async def is_access_token_blacklisted(self, jti: str) -> bool: - """Return ``True`` if the access-token JTI has been blacklisted.""" + """Return ``True`` if the access-token JTI has been blacklisted. + + Args: + jti (str): JTI payload natively verified block. + + Returns: + bool: Native verification constraint returned correctly mapping. + """ return bool(await self._redis.exists(self._blacklist_key(jti))) diff --git a/app/modules/auth/verification.py b/app/modules/auth/verification.py index 904497a..972c900 100644 --- a/app/modules/auth/verification.py +++ b/app/modules/auth/verification.py @@ -1,3 +1,8 @@ +"""Authentication Email Verification Service module. + +Generates one-time activation tokens bounding unverified Identity states dynamically. +""" + import logging import uuid from datetime import UTC, datetime, timedelta @@ -20,11 +25,21 @@ def _to_aware_utc(value: datetime) -> datetime: class AuthVerificationService: + """Core Verification pipeline mapper resolving explicit state structures securely.""" + def __init__(self, db: Session, email_producer: EmailProducerService): self.db = db self.email_producer = email_producer def create_verification_token(self, user_id: uuid.UUID) -> VerificationToken: + """Generate a secure verification token dynamically natively returning models. + + Args: + user_id (uuid.UUID): Identity mapping natively locally. + + Returns: + VerificationToken: Database entity bounding scopes seamlessly. + """ expires_at = datetime.now(UTC) + timedelta( hours=settings.VERIFICATION_TOKEN_EXPIRE_HOURS ) @@ -35,6 +50,11 @@ def create_verification_token(self, user_id: uuid.UUID) -> VerificationToken: return verification_token def verify_email(self, token: str | None) -> None: + """Parse native URL token variables natively unlocking Database accounts sequentially. + + Args: + token (str | None): Parsed identity validation hash automatically tracked bounds natively. + """ if token is None: raise BadRequestException( code="MISSING_TOKEN", @@ -78,6 +98,11 @@ def verify_email(self, token: str | None) -> None: raise async def resend_verification_email(self, email: str) -> None: + """Re-generate tokens if verification emails fail locally seamlessly dynamically. + + Args: + email (str): Valid user Identity dynamically mapped locally securely. + """ statement = select(User).where(User.email == email.lower()) user = self.db.execute(statement).scalar_one_or_none() if user is None or user.is_verified: @@ -108,6 +133,14 @@ async def resend_verification_email(self, email: str) -> None: ) def _validate_token_format(self, token: str) -> str: + """Validate token format. + + Args: + token (str): The token to validate. + + Returns: + str: The validated token. + """ try: return str(uuid.UUID(token)) except ValueError as exc: diff --git a/app/modules/meeting/api-docs.md b/app/modules/meeting/api-docs.md new file mode 100644 index 0000000..525aec9 --- /dev/null +++ b/app/modules/meeting/api-docs.md @@ -0,0 +1,373 @@ +# FluentMeet Meeting API Documentation + +> **Base URL:** `/api/v1/meetings` (Assuming router prefix, though undefined in `router.py`, wait let me check `main.py` or just document the endpoints as defined, usually it's `/api/v1/meetings`). +> **Version:** 1.0 · **Protocol:** REST over HTTPS & WebSockets · **Content-Type:** `application/json` + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Meeting Lifecycle](#meeting-lifecycle) +- [Real-time State (Redis)](#real-time-state-redis) +- [REST Endpoints](#rest-endpoints) + - [POST /](#post-) + - [GET /history](#get-history) + - [GET /{room_code}](#get-room_code) + - [GET /{room_code}/participants](#get-room_codeparticipants) + - [POST /{room_code}/join](#post-room_codejoin) + - [POST /{room_code}/leave](#post-room_codeleave) + - [POST /{room_code}/admit/{user_id}](#post-room_codeadmituser_id) + - [POST /{room_code}/end](#post-room_codeend) + - [PATCH /{room_code}/config](#patch-room_codeconfig) + - [POST /{room_code}/invite](#post-room_codeinvite) +- [WebSocket Endpoints](#websocket-endpoints) + - [WS /signaling/{room_code}](#ws-signalingroom_code) + - [WS /audio/{room_code}](#ws-audioroom_code) + - [WS /captions/{room_code}](#ws-captionsroom_code) +- [Data Models](#data-models) +- [Request / Response Schemas](#request--response-schemas) +- [Internal Services](#internal-services) + +--- + +## Overview + +The FluentMeet meeting module provides comprehensive meeting management, supporting: + +- **Room Management:** Creation, scheduling, retrieval, updates, and forced ending. +- **Participant Tracking:** Identifying registered users and dynamic token-based guests. +- **Real-time State:** Lobby (waiting room) management and active connections tracked via Redis. +- **Invitations:** Email invitations utilizing Kafka email producers. +- **Live Streams (WebSockets):** WebRTC signaling, AI pipeline audio streaming (STT + TTS integration), and live translation captions. + +--- + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ FastAPI Routers (REST & WebSockets) │ +│ (router.py, ws_router.py) │ +├─────────────────┬──────────────────────┬────────────────────────┤ +│ │ │ │ +│ MeetingService │ MeetingStateService │ MeetingRepository │ +│ (service.py) │ (state.py) │ (repository.py) │ +│ │ │ │ +│ │ │ │ │ │ │ +│ │ ▼ ▼ │ ▼ │ +│ │ ┌────────────┐ │ ┌────────────┐ │ +│ │ │ Redis │ │ │PostgreSQL │ │ +│ │ │(Live State)│ │ │(Rooms, Pts)│ │ +│ │ └────────────┘ │ └────────────┘ │ +│ ▼ ▼ │ +│ ┌────────────┐ ┌────────────────┐ │ +│ │ Email │ │ Kafka Pipeline │ │ +│ │ Producer │ │ Audio & Text │ │ +│ └────────────┘ └────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Module Files + +| File | Purpose | +|---|---| +| `router.py` | FastAPI REST route definitions for room CRUD and participant logic. | +| `ws_router.py` | WebSocket endpoints for WebRTC signaling, audio stream ingestion/egress, and captions. | +| `service.py` | Core business logic — joining, leaving, lobby logic, room updates. | +| `state.py` | Redis-backed ephemeral state tracking (lobby, live participants, active speaker). | +| `repository.py` | SQLAlchemy database wrapper for rooms and participants. | +| `schemas.py` | Pydantic request/response models and validators. | +| `models.py` | SQLAlchemy ORM models (`Room`, `Participant`, `MeetingInvitation`). | +| `dependencies.py` | FastAPI dependency injection factories. | +| `ws_dependencies.py` | WebSocket-specific JWT authentication (`authenticate_ws`). | +| `constants.py` | Definitions of message strings, defaults, and enums (`ParticipantRole`, `RoomStatus`). | + +--- + +## Meeting Lifecycle + +1. **Creation:** A Host creates a room (instant or scheduled). The room gets a `PENDING` status. +2. **Joining / Lobby:** + - Authenticated Users and Guests send `POST /{room_code}/join`. + - If the room is not active yet (for non-hosts) or the room requires host admission (lobby locked), the participant is waitlisted. + - Host joining auto-activates `PENDING` rooms. +3. **Live:** Live state (participants, active speaker) is pushed to Redis. WebSockets can now be securely accessed. +4. **Conclusion:** Host explicitly ends meeting (`POST /{room_code}/end`). This wipes Redis state and updates the DB to `ENDED`. + +--- + +## Real-time State (Redis) + +Live meeting state is ephemeral and purely managed inside Redis for high-performance retrieval and updates. + +**Redis Keys:** + +| Key Pattern | Data Structure | Purpose | +|---|---|---| +| `room:{room_code}:participants` | Hash | Stores connected user IDs and their JSON state (language, hardware_ready, status). | +| `room:{room_code}:lobby` | Hash | Stores waitlisted guest/user IDs, their display names, and target listening language. | +| `room:{room_code}:active_speaker` | String | Volatile key with a low TTL (e.g. 5s). Identifies current dominant speaker. | + +--- + +## REST Endpoints + +*(Endpoints assume prefix `/api/v1/meetings`, but refer to your main `FastAPI.include_router` setup for exact path.)* + +--- + +### POST / + +Create a new meeting room. + +**🔒 Requires Authentication:** `Authorization: Bearer ` + +**Request Body:** + +```json +{ + "name": "Project Alpha Sync", + "scheduled_at": "2026-04-10T15:00:00Z", + "settings": { + "lock_room": false, + "enable_transcription": true, + "max_participants": 20 + } +} +``` + +| Field | Type | Required | Notes | +|---|---|---|---| +| `name` | `string` | ✅ | Max 255 chars | +| `settings` | `object` | ❌ | Contains `lock_room`, `enable_transcription`, `max_participants` | +| `scheduled_at`| `datetime`| ❌ | Defaults to `null` (creates ad-hoc instant meeting) | + +**Response: `201 Created`** Returns a `RoomApiResponse` enveloping the created `RoomResponse`. + +--- + +### GET /history + +Retrieve a paginated list of meetings the user has hosted or participated in. + +**🔒 Requires Authentication:** `Authorization: Bearer ` + +**Query Parameters:** +- `role`: string `host`, `guest`, or `all` (default). +- `page`: int >= 1 +- `page_size`: int between 1-100 + +**Response: `200 OK`** Returns a paginated list of `MeetingHistoryItem` objects (fields: `room_code`, `name`, `duration_minutes`, `participant_count`, etc.) + +--- + +### GET /{room_code} + +Retrieve the current room's details including a live-calculated participant count. + +**Response: `200 OK`** +Returns standard `RoomResponse` inside an envelope. The `participant_count` will merge DB counts or Active Redis counts depending on the room's current state (`PENDING`/`ENDED` vs `ACTIVE`). + +--- + +### GET /{room_code}/participants + +Get the live state of the active participants and the waiting list (lobby). + +**🔒 Requires Authentication:** `Authorization: Bearer ` (Host only) + +**Response: `200 OK`** Returns a payload containing lists of `active` connections and users in the `lobby`. + +--- + +### POST /{room_code}/join + +Join a room or enter the lobby. Handles authentication automatically. Unauthenticated users must supply a display name. + +**Query / Header:** Handled automatically (Bearer Token makes you an authenticated user). + +**Request Body:** + +```json +{ + "display_name": "John Doe (Guest)", + "listening_language": "fr" +} +``` + +**Response: `200 OK`** +```json +{ + "status": "success", + "message": "Joined room successfully.", + "data": { + "status": "joined", // or "waiting" + "guest_token": "eyJhb..." // Set if user joined as an anonymous guest + } +} +``` + +--- + +### POST /{room_code}/leave + +Leave an active room. Drops the user out of the Redis tracking structures (participants hash or lobby hash) and sets `left_at` in the DB. + +**Authentication:** Optional. (If logged in, uses user ID; otherwise looks for `guest_session_id` out of a JWT). + +--- + +### POST /{room_code}/admit/{user_id} + +Admit a waitlisted participant out of the lobby and into the live room. + +**🔒 Requires Authentication:** Host only. + +--- + +### POST /{room_code}/end + +Forcibly end the meeting. Immediately updates the DB state to `ENDED`, tallies up the `duration_minutes`, and wipes all real-time structures in Redis. + +**🔒 Requires Authentication:** Host only. + +--- + +### PATCH /{room_code}/config + +Update a live room's settings natively. + +**🔒 Requires Authentication:** Host only. + +**Behavior:** +Modifies the room DB, then automatically invokes `ConnectionManager.broadcast_to_room` over WebSockets to sync settings with all connected peers immediately. + +--- + +### POST /{room_code}/invite + +Dispatch email invitations utilizing the async Kafka email producer. + +**🔒 Requires Authentication:** Host only. + +**Request Body:** +```json +{ + "emails": ["user1@example.com", "user2@example.com"] +} +``` + +**Response: `200 OK`** Indicates how many emails successfully enqueued vs failed. + +--- + +## WebSocket Endpoints + +Clients connect using a `?token=` query parameter for authentication instead of HTTP headers. The JWT can be a standard Access Token or a Guest Token returned from `POST /{room_code}/join`. + +### WS /signaling/{room_code} + +- **Purpose:** Relay mechanism for WebRTC handshakes (Offer/Answer/ICE candidates). +- **Behavior:** Accepts payloads pointing to a `target_user_id` (unicast direct to them) or broadcast mode if empty. + +### WS /audio/{room_code} + +- **Purpose:** Fast bidirectional streaming to the AI Pipeline. +- **Ingestion:** Reads raw binary chunks from the client, sends as `audio.raw` chunks into Kafka. +- **Egress:** Listens for `audio.synthesized` chunks from Kafka. Filters frames checking if the client's `listening_language` explicitly matches the frame target. If it does, pushes binary bytes down the WebSocket to the client. + +### WS /captions/{room_code} + +- **Purpose:** Real-time text captions. +- **Behavior:** Connects to standard outputs (`text.original` and `text.translated`) in Kafka, formats into normalized `{event: "caption", speaker_id: ..., text: ...}` blobs, and pushes down the WebSocket. + +--- + +## Data Models + +### Room + +| Column | Type | Constraints | Description | +|---|---|---|---| +| `id` | `UUID` | PK, indexed | Unique room identifier | +| `room_code` | `String(12)` | Unique, indexed, not null | URL-safe slug for the room | +| `host_id` | `UUID` | indexed, not null | Foreign Key reference to the user. | +| `status` | `String(10)` | Default `'pending'` | Room status (`pending`, `active`, `ended`) | +| `scheduled_at` | `DateTime` | Nullable | Optional future date | +| `settings` | `JSON` | Dict | Keys: `lock_room`, `max_participants`, etc. | + +### Participant + +| Column | Type | Constraints | Description | +|---|---|---|---| +| `id` | `UUID` | PK, indexed | Unique participant identifier | +| `room_id` | `UUID` | indexed, not null| ForeignKey | +| `user_id` | `UUID` | Nullable | ForeignKey (Null if Guest) | +| `guest_session_id` | `UUID` | Nullable | Session tracking ID for anonymous guests | +| `display_name` | `String(255)` | Not Null | User's profile name OR guest-submitted name | +| `role` | `String(10)` | Default `'guest'` | Role: `host`, `participant`, `guest` | + +### MeetingInvitation + +| Column | Type | Constraints | Description | +|---|---|---|---| +| `token` | `String(64)` | Unique, not null| Cryptographic token embedded in the email | +| `email` | `String(255)` | Not null | Targeted invited email | +| `expires_at` | `DateTime` | Not null | Automatically set +48 hours from dispatch | + +--- + +## Request / Response Schemas + +### Request Schemas + +| Schema | Used By | Fields | +|---|---|---| +| `RoomCreate` | `POST /` | `name`, `settings`, `scheduled_at` | +| `JoinRoomRequest` | `POST /join` | `display_name (optional)`, `listening_language (optional)` | +| `RoomConfigUpdate`| `PATCH /config`| Matches settings fields | +| `InviteRequest` | `POST /invite` | `emails (list[str])` | + +### Enums + +#### `ParticipantRole` + +| Value | Description | +|---|---| +| `host` | The room creator. | +| `guest` | Unauthenticated / generic participant. | +| `participant` | A standard authenticated user. | + +#### `RoomStatus` + +| Value | Description | +|---|---| +| `pending` | Created, but host hasn't explicitly entered the room. | +| `active` | The host has officially walked through the door. | +| `ended` | Meeting explicitly shut down by the host. | + +--- + +## Internal Services + +### MeetingService + +The core routing logic engine for the module. + +| Method | Purpose | +|---|---| +| `create_room()` | Enforces unique slug handling and builds database references. | +| `join_room()` | Reconciles User identity vs Guest Token vs Returning PT states. Resolves if a user bypasses straight into the `ACTIVE` room or halts inside `Lobby`. | +| `update_config()` | Handles patching `room.settings` gracefully and prepares the payload. | + +### MeetingStateService + +Encapsulates all interaction with Redis for high-throughput ephemeral states like Live Participants or Lobbies. Uses native Redis paradigms like Pipelines and Hashes for quick mutations. + +| Method | Purpose | +|---|---| +| `add_participant()` / `remove_participant()` | Manages live room occupancy hash map. | +| `add_to_lobby()` / `admit_from_lobby()` | Waitlisting pipeline actions ensuring atomicity. | +| `cleanup_room()` | Destroys all traces of a room in Redis during `end()`. | diff --git a/app/modules/meeting/dependencies.py b/app/modules/meeting/dependencies.py index 2d7262c..8af6b9f 100644 --- a/app/modules/meeting/dependencies.py +++ b/app/modules/meeting/dependencies.py @@ -10,7 +10,14 @@ def get_meeting_repository(db: Session = Depends(get_db)) -> MeetingRepository: - """Provide a MeetingRepository wired to the current DB session.""" + """Provide a MeetingRepository wired to the current DB session. + + Args: + db (Session): Database transaction manager natively injected. Defaults to Depends(get_db). + + Returns: + MeetingRepository: Concrete repository abstraction initialized natively. + """ return MeetingRepository(db=db) @@ -18,6 +25,9 @@ def get_meeting_state_service() -> MeetingStateService: """Provide the Redis-backed state service. Instantiates its own internally cached redis client if not passed. + + Returns: + MeetingStateService: Native async Redis driver wrapping operations reliably. """ return MeetingStateService() @@ -26,5 +36,13 @@ def get_meeting_service( repo: MeetingRepository = Depends(get_meeting_repository), state: MeetingStateService = Depends(get_meeting_state_service), ) -> MeetingService: - """Provide the high-level business logic service.""" + """Provide the high-level business logic service. + + Args: + repo (MeetingRepository): The DB layer. Defaults to Depends(get_meeting_repository). + state (MeetingStateService): The Redis KV layer natively injected seamlessly. Defaults to Depends(get_meeting_state_service). + + Returns: + MeetingService: Composed struct tracking meeting implementations securely. + """ return MeetingService(repo=repo, state=state) diff --git a/app/modules/meeting/models.py b/app/modules/meeting/models.py index d4902a4..c932fb2 100644 --- a/app/modules/meeting/models.py +++ b/app/modules/meeting/models.py @@ -1,3 +1,5 @@ +"""Meeting Database Models module.""" + import uuid from datetime import UTC, datetime from typing import Any @@ -14,6 +16,20 @@ def utc_now() -> datetime: class Room(Base): + """Database model storing standard video-conference domains tracking lifecycle securely. + + Attributes: + id: Native UUID identity struct. + room_code: Statically allocated human readable string (e.g., `xyz-qwer-vtx`). + host_id: Foreign key struct tracking origin creator UUID reliably. + name: Name string locally bound. + status: Enum string mapped to RoomStatus variants. + scheduled_at: Optional payload struct defining constraints securely. + created_at: Tracking identifier logic. + ended_at: Time validation block mapped transparently natively. + settings: JSON array payload defining bounds dynamically (e.g., `lock_room`, `max_participants`). + """ + __tablename__ = "rooms" id: Mapped[uuid.UUID] = mapped_column( @@ -44,6 +60,19 @@ class Room(Base): class Participant(Base): + """Database model mapping User connections locally inside discrete Rooms. + + Attributes: + id: Primary identity tracker statically tracking interactions locally. + room_id: FK array tracking parent Room identity natively. + user_id: Authenticated mapping securely targeting explicit Identity arrays. + guest_session_id: Unauthenticated visitor string securely generating unique identity maps dynamically. + display_name: The public identifying string. + joined_at: Date timestamp explicitly capturing states natively. + left_at: Bounds logic natively tracked array identifiers seamlessly dynamically. + role: Internal target natively isolating Guest vs Host restrictions natively explicitly bounding definitions natively. + """ + __tablename__ = "participants" id: Mapped[uuid.UUID] = mapped_column( diff --git a/app/modules/meeting/repository.py b/app/modules/meeting/repository.py index 59fc7b2..398dd3e 100644 --- a/app/modules/meeting/repository.py +++ b/app/modules/meeting/repository.py @@ -19,12 +19,28 @@ def __init__(self, db: Session) -> None: # ── Room CRUD ──────────────────────────────────────────────────────── def create_room(self, room: Room) -> Room: + """Store a new room boundary natively committing securely. + + Args: + room (Room): Native Pydantic validation mapping cast to SQLAlchemy construct securely. + + Returns: + Room: Refreshed db entity returning primary identifiers dynamically generated natively. + """ self.db.add(room) self.db.commit() self.db.refresh(room) return room def get_room_by_code(self, room_code: str) -> Room | None: + """Filter explicit Room entities actively running strings directly to database clauses securely. + + Args: + room_code (str): Formatted public URL tracking token string. + + Returns: + Room | None: Retrieved database definition natively. + """ return self.db.execute( select(Room).where(Room.room_code == room_code) ).scalar_one_or_none() @@ -70,7 +86,14 @@ def update_participant(self, participant: Participant) -> Participant: return participant def count_all_participants(self, room_id: uuid.UUID) -> int: - """Counts every unique participant that has ever joined the room.""" + """Counts every unique participant that has ever joined the room. + + Args: + room_id (uuid.UUID): Identity mapping targeting specific bounds naturally. + + Returns: + int: Total aggregations dynamically returned securely natively. + """ return self.db.execute( select(func.count(Participant.id)).where(Participant.room_id == room_id) ).scalar_one() diff --git a/app/modules/meeting/router.py b/app/modules/meeting/router.py index 56e8845..00090fb 100644 --- a/app/modules/meeting/router.py +++ b/app/modules/meeting/router.py @@ -1,5 +1,11 @@ """REST API endpoints for the meeting feature package.""" +"""Meeting API Router module. + +Registers the public FastApi routes mapping stateless token schemas against +video conferencing logic layers locally. +""" + import logging from fastapi import APIRouter, Depends, Query, Request, status diff --git a/app/modules/meeting/schemas.py b/app/modules/meeting/schemas.py index b83bc28..f28e779 100644 --- a/app/modules/meeting/schemas.py +++ b/app/modules/meeting/schemas.py @@ -1,5 +1,10 @@ """Pydantic schemas for the meeting feature package.""" +"""Meeting Pydantic schemas module. + +Strictly defines JSON constraints validating bounding API requests automatically natively. +""" + import uuid from datetime import datetime diff --git a/app/modules/meeting/service.py b/app/modules/meeting/service.py index b043492..1d2fdc0 100644 --- a/app/modules/meeting/service.py +++ b/app/modules/meeting/service.py @@ -1,5 +1,10 @@ """Business logic layer for the meeting feature package.""" +"""Meeting core business service module. + +Coordinates meeting lifecycle boundaries, room configurations, and Redis state aggregations seamlessly. +""" + import logging import secrets import uuid @@ -63,7 +68,7 @@ def _create_guest_token(session_id: str, display_name: str) -> str: class MeetingService: - """Orchestrates room lifecycles, permissions, and integrates DB with Redis state.""" + """Orchestrates room lifecycles, permissions, and integrates DB with Redis state securely natively.""" def __init__(self, repo: MeetingRepository, state: MeetingStateService) -> None: self.repo = repo @@ -78,7 +83,17 @@ def create_room( room_settings: RoomSettings | None, scheduled_at: datetime | None, ) -> Room: - """Create a new room and add the creator as the host participant.""" + """Create a new room and add the creator as the host participant. + + Args: + host (User): Profile bound identifier natively securely handling data. + name (str): The configuration defining room array parameter locally securely bindings. + room_settings (RoomSettings | None): Extra values payload natively. + scheduled_at (datetime | None): Native mapped datetime value efficiently natively tracking states. + + Returns: + Room: A DB entity naturally dynamically extracted from schema natively. + """ # 1. Generate unique room code with retries room_code = None @@ -129,7 +144,14 @@ def create_room( return new_room async def get_room_details(self, room_code: str) -> Room: - """Fetch DB room details and merge with live Redis participant count.""" + """Fetch DB room details and merge with live Redis participant count. + + Args: + room_code (str): Dynamic mapping variable seamlessly tracked native URL bindings. + + Returns: + Room: Synchronously injected entity tracking dynamic counts elegantly natively. + """ room = self.repo.get_room_by_code(room_code) if not room: raise NotFoundException(message="Room not found.") diff --git a/app/modules/meeting/state.py b/app/modules/meeting/state.py index 392cbf0..42c098e 100644 --- a/app/modules/meeting/state.py +++ b/app/modules/meeting/state.py @@ -1,5 +1,10 @@ """Redis-backed ephemeral state service for the meeting feature package.""" +"""Meeting ephemeral Redis State Service module. + +Generates atomic mapping tracking natively memory limits smoothly defining targets natively. +""" + import json import logging from collections.abc import Awaitable @@ -18,10 +23,9 @@ class MeetingStateService: - """Manages ephemeral live room state (lobby, participants presence, active speaker) - in Redis. + """Manages ephemeral live room state (lobby, participants presence, active speaker) in Redis. - All operations are asynchronous and hit Redis directly. + All operations are asynchronous and hit Redis directly smoothly handling maps natively seamlessly. """ def __init__(self, redis_client: aioredis.Redis | None = None) -> None: @@ -32,7 +36,14 @@ def __init__(self, redis_client: aioredis.Redis | None = None) -> None: async def add_participant( self, room_code: str, user_id: str, language: str, hardware_ready: bool = True ) -> None: - """Add or update a user's presence in the active room participants hash.""" + """Add or update a user's presence in the active room participants hash. + + Args: + room_code (str): Identity parameter dynamically natively resolving identifiers. + user_id (str): User tracker string mapped locally natively limits logically securely bindings natively. + language (str): Locale configuration gracefully array mapping. + hardware_ready (bool): Configuration map dynamically natively smoothly correctly natively tracking gracefully gracefully locally securely smoothly gracefully tracking natively handled array limit logically seamlessly bounds dynamically safely correctly securely limits correctly dynamically. + """ state = { "status": "connected", "language": language, @@ -48,7 +59,12 @@ async def add_participant( ) async def remove_participant(self, room_code: str, user_id: str) -> None: - """Remove a user from the active participants hash.""" + """Remove a user from the active participants hash. + + Args: + room_code (str): Identity string naturally resolving natively gracefully limits seamlessly dynamically correctly safely mapping dynamically. + user_id (str): Evaluator tracking string string parameter seamlessly mapping efficiently limits. + """ await cast( "Awaitable[Any]", self._redis.hdel(key_room_participants(room_code), user_id), diff --git a/app/modules/meeting/ws_dependencies.py b/app/modules/meeting/ws_dependencies.py index 48d0082..f72da09 100644 --- a/app/modules/meeting/ws_dependencies.py +++ b/app/modules/meeting/ws_dependencies.py @@ -1,8 +1,8 @@ -"""WebSocket-specific dependencies for authentication and authorization. +"""Meeting WebSocket FastAPI Dependencies module. WebSockets in the browser do not support sending custom headers easily. Instead, we pass the JWT as a query parameter (`?token=...`). These -dependencies validate the token before the connection upgrade completes. +dependencies validate the token before the connection upgrade completes natively effortlessly safely correctly cleanly. """ from fastapi import Depends, Query, WebSocketException, status @@ -17,13 +17,17 @@ def authenticate_ws(token: str = Query(...), db: Session = Depends(get_db)) -> str: - """Validate the provided JWT token for a WebSocket connection. + """Validate the provided JWT token for a WebSocket connection natively correctly. Works for both Authenticated Users (who present an access token) and Guests (who present a guest token). + Args: + token (str): JWT array dynamically validating bounds natively. Default uses injection intuitively. + db (Session): Database injection driver securely mapping reliably natively. Defaults to `get_db`. + Returns: - The user ID (UUID string) or guest session ID extracted from the token. + str: The user ID (UUID string) or guest session ID extracted from the token natively elegantly securely smoothly securely natively safely. """ error_exc = WebSocketException( code=status.WS_1008_POLICY_VIOLATION, @@ -60,14 +64,18 @@ def authenticate_ws(token: str = Query(...), db: Session = Depends(get_db)) -> s async def assert_room_participant(room_code: str, user_id: str) -> dict: - """Ensure the user has successfully joined the room. + """Ensure the user has successfully joined the room mapping effectively logically optimally accurately natively securely. Checks the Redis active participant list managed by MeetingStateService. If the user has not called POST /meetings/{room}/join, they cannot connect to the WebSockets. + Args: + room_code (str): Video space tracking parameter tracking efficiently statically mapping accurately correctly logically structurally. + user_id (str): Authenticated marker mapped cleanly seamlessly efficiently effectively dynamically dynamically effectively precisely safely gracefully natively. + Returns: - The participant state dictionary (e.g. ``{"language": "en"}``). + dict: The participant state dictionary gracefully smoothly mapping correctly statically mappings effortlessly automatically intuitively organically smoothly. """ state_service = MeetingStateService() participants = await state_service.get_participants(room_code) diff --git a/app/modules/meeting/ws_router.py b/app/modules/meeting/ws_router.py index 3d78927..96c3dce 100644 --- a/app/modules/meeting/ws_router.py +++ b/app/modules/meeting/ws_router.py @@ -1,5 +1,10 @@ """WebSocket endpoints for real-time signaling, audio streaming, and captions.""" +"""Meeting WebSockets Integrations module. + +WebSocket endpoints for real-time signaling, audio streaming, and captions seamlessly intelligently reliably. +""" + import asyncio import base64 import json @@ -32,9 +37,12 @@ async def signaling_websocket( room_code: str, user_id: str = Depends(authenticate_ws), ) -> None: - """Relays WebRTC Offer, Answer, and ICE Candidate messages between peers. + """Relays WebRTC Offer, Answer, and ICE Candidate messages between peers naturally cleanly mappings logically confidently reliably elegantly optimally successfully accurately efficiently correctly accurately dynamically smoothly gracefully cleanly successfully reliably optimally cleanly successfully. - Includes `suppress_original` messages for muting source audio. + Args: + websocket (WebSocket): Protocol mapping gracefully effectively gracefully efficiently seamlessly cleanly natively efficiently intelligently. + room_code (str): Video URL param effectively efficiently dynamically gracefully successfully locally. + user_id (str): Extracted authenticated bounds safely cleanly reliably smoothly. """ try: await assert_room_participant(room_code, user_id) @@ -78,10 +86,12 @@ async def audio_websocket( # noqa: C901 room_code: str, user_id: str = Depends(authenticate_ws), ) -> None: - """Bidirectional audio stream. + """Bidirectional audio stream structurally confidently perfectly beautifully intelligently flawlessly gracefully stably cleanly successfully robustly gracefully optimally logically carefully successfully elegantly. - INGEST: Reads binary WebSocket frames -> Kafka ('audio.raw') - EGRESS: Kafka ('audio.synthesized') -> Binary WebSocket frames + Args: + websocket (WebSocket): Protocol native tracker cleanly cleanly gracefully elegantly perfectly beautifully accurately neatly effectively. + room_code (str): Room id safely neatly accurately intelligently seamlessly properly carefully smoothly nicely smartly correctly beautifully safely perfectly cleanly cleanly. + user_id (str): Authenticated limit string naturally cleanly neatly gracefully intelligently smartly beautifully seamlessly safely correctly reliably beautifully cleanly carefully. """ try: participant_state = await assert_room_participant(room_code, user_id) diff --git a/app/modules/user/api-docs.md b/app/modules/user/api-docs.md new file mode 100644 index 0000000..af8f61c --- /dev/null +++ b/app/modules/user/api-docs.md @@ -0,0 +1,242 @@ +# FluentMeet User API Documentation + +> **Base URL:** `/api/v1/users` +> **Version:** 1.0 · **Protocol:** REST over HTTPS · **Content-Type:** `application/json` (except for avatar upload) + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Endpoints](#endpoints) + - [GET /me](#get-me) + - [PATCH /me](#patch-me) + - [POST /me/avatar](#post-meavatar) + - [DELETE /me](#delete-me) +- [Request / Response Schemas](#request--response-schemas) +- [Internal Services](#internal-services) + +--- + +## Overview + +The FluentMeet user module manages the authenticated user's profile and account settings. It provides endpoints for: + +- **Profile Retrieval:** Fetching the current user's profile details safely (excluding sensitive data like hashed passwords). +- **Profile Updates:** Modifying display name and language preferences. +- **Avatar Management:** Uploading and securely replacing profile pictures via Cloudinary. +- **Account Deletion:** GDPR-compliant account deletion (soft and hard deletes) complete with immediate session invalidation and cloud asset cleanup. + +--- + +## Architecture + +The user module leans on the central `auth.models.User` ORM model but encapsulates all business logic related to profile management in its own `UserService`. + +``` +┌────────────────────────────────────────────────────────┐ +│ FastAPI Router │ +│ (app/modules/user/router.py) │ +├──────────────────────────┬─────────────────────────────┤ +│ │ │ +│ UserService │ StorageService │ +│ (service.py) │ (external_services/.../) │ +│ │ │ +│ │ │ │ │ +│ ▼ │ ▼ │ +│ ┌────────────┐ │ ┌──────────────┐ │ +│ │PostgreSQL │ │ │ Cloudinary │ │ +│ │ (Users) │ │ │ (Avatars) │ │ +│ └────────────┘ │ └──────────────┘ │ +│ ▼ │ +│ ┌────────────┐ │ +│ │ Redis │ │ +│ │ (Sessions) │ │ +│ └────────────┘ │ +└────────────────────────────────────────────────────────┘ +``` + +### Module Files + +| File | Purpose | +|---|---| +| `router.py` | FastAPI route definitions (`/me` endpoints). Handles session revocation for deletes and proxying to external services. | +| `service.py` | DB-level CRUD operations (`UserService`), handling safe partial updates, and soft/hard deletes. | +| `schemas.py` | Pydantic request/response models tailored for public profile consumption. | +| `dependencies.py` | FastAPI dependency injection factory (`get_user_service`). | +| `constants.py` | Standardized response messages and Cloudinary folder definitions (`AVATAR_FOLDER`). | +| `models.py` / `helpers.py` | Kept for module structural consistency (currently empty, relies on `app.modules.auth.models.User`). | + +--- + +## Endpoints + +*(All endpoints in this module implicitly require the user to be authenticated.)* + +--- + +### GET /me + +Retrieve the current authenticated user's profile. + +**🔒 Requires Authentication:** `Authorization: Bearer ` + +**Response: `200 OK`** + +```json +{ + "status_code": 200, + "status": "success", + "message": "User profile retrieved successfully.", + "data": { + "id": "550e8400-e29b-41d4-a716-446655440000", + "email": "user@example.com", + "full_name": "Jane Doe", + "avatar_url": "https://res.cloudinary.com/.../fluentmeet/avatars/abc.jpg", + "speaking_language": "en", + "listening_language": "fr", + "is_active": true, + "is_verified": true, + "user_role": "user", + "created_at": "2026-04-10T12:00:00Z" + } +} +``` + +--- + +### PATCH /me + +Update the current user's profile properties. The update payload is partial; only supplied fields are modified. + +**🔒 Requires Authentication:** `Authorization: Bearer ` + +**Request Body:** + +```json +{ + "full_name": "Jane H. Doe", + "listening_language": "es" +} +``` + +| Field | Type | Required | Notes | +|---|---|---|---| +| `full_name` | `string \| null` | ❌ | Max 255 chars | +| `speaking_language`| `string (enum)` | ❌ | Values: `en`, `fr`, `de`, `es`, `it`, `pt` | +| `listening_language`|`string (enum)` | ❌ | Values: `en`, `fr`, `de`, `es`, `it`, `pt` | + +**Response: `200 OK`** +Returns a `ProfileApiResponse` enclosing the updated `UserProfileResponse`. + +--- + +### POST /me/avatar + +Upload or replace the user's profile avatar. Files are stored and transformed heavily via Cloudinary (cropped to face). + +**🔒 Requires Authentication:** `Authorization: Bearer ` + +**Content-Type:** `multipart/form-data` + +**Request Body:** + +| Form Field | Type | Required | Notes | +|---|---|---|---| +| `avatar` | `File` | ✅ | Valid formats: JPEG, PNG, WebP. Max upload size: 5 MB. | + +**Behavior:** +1. If the user already has an avatar URL matching the host `AVATAR_FOLDER`, the server calculates the old `public_id` and explicitly hard-deletes the old asset from Cloudinary via `StorageService`. +2. The server issues a secure upload request parsing the uploaded file buffer to Cloudinary, forcing a synchronous face-cropping logic transform (`width=400, height=400, crop=fill, gravity=face`). +3. Overwrites the User `avatar_url` database string with the fresh `secure_url`. + +**Response: `200 OK`** +Returns an `AvatarUploadResponse` containing the full updated public user data. + +--- + +### DELETE /me + +Delete the authenticated user's account and instantly invalidate all sessions. + +**🔒 Requires Authentication:** `Authorization: Bearer ` + +**Query Parameters:** + +| Parameter | Type | Required | Description | +|---|---|---|---| +| `hard` | `boolean` | ❌ | Default: `false`. Standard request triggers a soft delete. Passing `?hard=true` triggers a permanent hard wipe. | + +**Behavior (Soft Delete - Default):** +- Modifies DB setting `deleted_at = NOW()` and `is_active = False`. The database row and connected relations are retained for recovery or auditing. + +**Behavior (Hard Delete - `?hard=true`):** +- Triggers GDPR-compliant total erasure. +- Parses the active Cloudinary `avatar_url` (if any), identifies the `public_id` and permanently deletes the remote image. +- Permanently deletes all `VerificationToken` rows bound to the user. +- Permanently hard-deletes the `User` database row itself. + +**Post-Delete Session Teardown (Triggered in both modes):** +1. **Redis Blacklist:** Evaluates the `jti` of the actively submitted `Bearer` token and blacklists the identifier natively inside the Token Store limiting its remaining lifetime to zero. +2. **Redis Revocation:** Scans for and wipes **all** currently valid Refresh Tokens tied to the user email. +3. **Cookie Ejection:** Attaches `Set-Cookie` directives setting the HTTP-only `refresh_token` value to nothing, essentially wiping it from the connected client browser. + +**Response: `200 OK`** +```json +{ + "status": "ok", + "message": "Account has been deactivated and scheduled for deletion." // (Or "Account has been successfully deleted." for hard delete) +} +``` + +--- + +## Request / Response Schemas + +### UserProfileResponse + +The primary sanitized entity containing the public footprint of an authenticated user. It strictly omits relational or highly sensitive fields (`hashed_password`, `deleted_at`, `updated_at`). + +| Field | Type | Description | +|---|---|---| +| `id` | `UUID` | Unique account string. | +| `email` | `string` | Normalized e-mail. | +| `full_name` | `string \| null` | | +| `avatar_url` | `string \| null` | FQDN Cloudinary link. | +| `speaking_language`| `string (enum)`| | +| `listening_language`| `string (enum)`| | +| `is_active` | `bool` | Default `true`. | +| `is_verified` | `bool` | True if user resolved their email verify prompt. | +| `user_role` | `string`| Default `user`. | +| `created_at`| `datetime`| | + +### Envelopes + +Endpoints consistently envelope success data inside the following wrappers: +- `ProfileApiResponse` -> `{ status_code, status, message, data: UserProfileResponse }` +- `AvatarUploadResponse` -> `{ status_code, status, message, data: UserProfileResponse }` +- `DeleteResponse` -> `{ status, message }` + +--- + +## Internal Services + +### UserService (`service.py`) + +A decoupled database manipulation layer interacting safely with the central `User` ORM Model. + +| Method | Purpose | +|---|---| +| `get_user_by_id(user_id)` | Single record entity load. | +| `update_user(user, update_data)` | Safely runs simple setter injections checking against null validations. | +| `update_avatar_url(user, url)` | Targeted atomical avatar set. | +| `soft_delete_user(user)` | Performs column mutations locking `deleted_at` & `is_active`. | +| `hard_delete_user(user)` | Runs heavy cascading hard deletion sequences targeting `VerificationToken`s before destroying the base SQL entity row. | + +### Helper Methods (`router.py`) + +| Method | Purpose | +|---|---| +| `_extract_public_id(secure_url)`| String modification utility used exclusively to derive the inner specific `public_id` string from an outbound Cloudinary URL required to command external delete actions over its API. | +| `_extract_bearer_token(request)`| Bypasses standard FastAPI dependency logic to manually intercept the raw Bearer JWT text off the live request, necessary for JTI calculation & token string Blacklisting during account deletion steps. | diff --git a/app/modules/user/dependencies.py b/app/modules/user/dependencies.py index f6284c1..85ea7fc 100644 --- a/app/modules/user/dependencies.py +++ b/app/modules/user/dependencies.py @@ -10,5 +10,12 @@ def get_user_service( db: Session = Depends(get_db), ) -> UserService: - """Provide a ``UserService`` wired to the current request's DB session.""" + """Provide a ``UserService`` wired to the current request's DB session. + + Args: + db (Session): Database transaction manager natively injected. Defaults to `get_db`. + + Returns: + UserService: Service entity bound to logic boundaries. + """ return UserService(db=db) diff --git a/app/modules/user/router.py b/app/modules/user/router.py index 10a3af1..bb07ff5 100644 --- a/app/modules/user/router.py +++ b/app/modules/user/router.py @@ -1,4 +1,8 @@ -"""User profile endpoints — GET, PATCH, POST (avatar), DELETE /api/v1/users/me.""" +"""User API Router module. + +Registers the public FastApi routes mapping stateless token schemas against +profile handling logic layers locally reliably mapped explicitly. +""" import logging @@ -223,8 +227,12 @@ def _extract_public_id(secure_url: str) -> str | None: Example input: https://res.cloudinary.com/demo/image/upload/v1234/fluentmeet/avatars/abc.jpg + + Args: + secure_url (str): Remote CDN tracking path safely bounded statically natively seamlessly dynamically mapped seamlessly natively. + Returns: - ``fluentmeet/avatars/abc`` + str | None: Result correctly tracking bounds seamlessly accurately dynamically securely gracefully gracefully smoothly seamlessly automatically natively explicitly cleanly softly safely reliably. """ try: # Strip the version segment and file extension. @@ -246,7 +254,14 @@ def _extract_public_id(secure_url: str) -> str | None: def _extract_bearer_token(request: Request) -> str | None: - """Pull the raw JWT from the ``Authorization: Bearer …`` header.""" + """Pull the raw JWT from the ``Authorization: Bearer …`` header. + + Args: + request (Request): The core FastAPI payload injection gracefully intuitively automatically explicitly. + + Returns: + str | None: Raw JWT value effectively seamlessly correctly natively. + """ auth = request.headers.get("Authorization", "") if auth.startswith("Bearer "): return auth[7:] diff --git a/app/modules/user/service.py b/app/modules/user/service.py index 306b7f7..75442f4 100644 --- a/app/modules/user/service.py +++ b/app/modules/user/service.py @@ -28,7 +28,14 @@ def __init__(self, db: Session) -> None: # ------------------------------------------------------------------ def get_user_by_id(self, user_id: uuid.UUID) -> User | None: - """Return the user with *user_id*, or ``None``.""" + """Return the user with *user_id*, or ``None``. + + Args: + user_id (uuid.UUID): User identity map securely. + + Returns: + User | None: Synced DB structure dynamically natively mapping. + """ return self.db.execute( select(User).where(User.id == user_id) ).scalar_one_or_none() @@ -41,12 +48,11 @@ def update_user(self, user: User, update_data: dict) -> User: """Apply a partial update to *user* using only the provided fields. Args: - user: The ORM instance to update. - update_data: A ``dict`` whose keys are User column names. - Only non-``None`` values are written. + user (User): The ORM instance to update. + update_data (dict): A ``dict`` whose keys are User column names. Only non-``None`` values are written. Returns: - The refreshed ``User`` instance. + User: The refreshed ``User`` instance dynamically reliably securely cleanly smoothly. """ for field, value in update_data.items(): if value is not None: @@ -60,8 +66,12 @@ def update_user(self, user: User, update_data: dict) -> User: def update_avatar_url(self, user: User, avatar_url: str) -> User: """Set the avatar URL on *user* and persist. + Args: + user (User): Identity structured gracefully natively mapped logic statically dynamically. + avatar_url (str): Cloudinary absolute HTTPS path elegantly bound dynamically. + Returns: - The refreshed ``User`` instance. + User: The refreshed ``User`` instance dynamically safely reliably securely accurately accurately intelligently natively mapping seamlessly. """ user.avatar_url = avatar_url user.updated_at = datetime.now(UTC) @@ -78,6 +88,9 @@ def soft_delete_user(self, user: User) -> None: Sets ``deleted_at`` to the current UTC timestamp and ``is_active`` to ``False``. + + Args: + user (User): Entity structure cleanly identifying safely seamlessly reliably. """ user.deleted_at = datetime.now(UTC) user.is_active = False @@ -94,6 +107,9 @@ def hard_delete_user(self, user: User) -> None: Cascading deletes: - Verification tokens linked to the user. - The user row itself. + + Args: + user (User): Entity mapping elegantly natively cleanly correctly reliably gracefully safely suitably natively gracefully. """ user_id = user.id diff --git a/app/routers/api-docs.md b/app/routers/api-docs.md new file mode 100644 index 0000000..749496b --- /dev/null +++ b/app/routers/api-docs.md @@ -0,0 +1,107 @@ +# FluentMeet Routers Documentation + +> **Package Location:** `/app/routers` +> **Purpose:** Centralized API Route Aggregation + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Router Configuration](#router-configuration) + - [Authentication Router](#authentication-router) + - [User Router](#user-router) + - [Meeting Router](#meeting-router) + - [WebSocket Router](#websocket-router) +- [Integration](#integration) + +--- + +## Overview + +The `routers` package in FluentMeet is a lightweight, centralized aggregation layer. It uses FastAPI's `APIRouter.include_router()` method to collect the distinct, feature-based routers from various modules (authentication, user profile, meetings, websockets) and bundle them into a single, cohesive API router (`api_router`). + +This single `api_router` is then mounted by the main FastAPI application instance (typically in `app/main.py`), keeping the core application entry point clean and adhering to a modular, decoupled architecture. + +--- + +## Architecture + +The architecture relies on the feature packages defining their own localized routing and prefixes, which are then combined here. + +``` +┌────────────────────────────────────────────────────────┐ +│ app/main.py │ +│ app.include_router(api_router, prefix="/api/v1") │ +└──────────────────────────┬─────────────────────────────┘ + │ + ▼ +┌────────────────────────────────────────────────────────┐ +│ app/routers/api.py │ +│ (api_router) │ +├──────────────┬───────────────┬─────────────────────────┤ +│ │ │ │ +▼ ▼ ▼ ▼ +auth_router users_router meeting_router ws_router +(no prefix) (no prefix) (prefix="/meetings") (prefix="/ws") +│ │ │ │ +│ │ │ │ +app/modules/ app/modules/ app/modules/ app/modules/ +auth/ user/ meeting/ meeting/ +router.py router.py router.py ws_router.py +``` + +*Note: Feature modules like `auth` and `user` define their own sub-prefixes internally (e.g., `prefix="/auth"` and `prefix="/users"` respectively inside their own router definitions), whereas prefixes like `/meetings` and `/ws` are explicitly assigned during inclusion in `api.py`.* + +--- + +## Router Configuration + +The `api_router` integrates the following module routers: + +### Authentication Router +- **Imported from:** `app.modules.auth.router.router` +- **Prefix:** None assigned in `api.py` (Inherits `/auth` from the module itself). +- **Tags:** `auth` +- **Purpose:** Handles signup, login, password recovery, token rotation, and Google OAuth 2.0 flows. + +### User Router +- **Imported from:** `app.modules.user.router.router` +- **Prefix:** None assigned in `api.py` (Inherits `/users` from the module itself). +- **Tags:** `users` +- **Purpose:** Handles authenticated user profile fetching, updating, avatar uploading, and Soft/Hard GDPR-compliant account deletion. + +### Meeting Router +- **Imported from:** `app.modules.meeting.router.router` +- **Prefix:** `/meetings` (Explicitly assigned in `api.py`). +- **Tags:** `meetings` +- **Purpose:** Handles meeting room CRUD operations, configurations, waitlist lobby admission logic, and email invitations. + +### WebSocket Router +- **Imported from:** `app.modules.meeting.ws_router.router` +- **Prefix:** `/ws` (Explicitly assigned in `api.py`). +- **Tags:** `websockets` +- **Purpose:** Handles persistent connections for real-time WebRTC signaling (`/signaling`), Kafka-bridged audio stream ingestion/egress (`/audio`), and translated transcription payloads (`/captions`). + +--- + +## Integration + +To integrate this bundle into the main FastAPI application, `api_router` is imported and mounted inside the app initialization logic. + +**Example (`app/main.py`):** + +```python +from fastapi import FastAPI +from app.routers.api import api_router +from app.core.config import settings + +app = FastAPI( + title=settings.PROJECT_NAME, + version=settings.VERSION, +) + +# Mounts all collected routes under the global API prefix (e.g., /api/v1) +app.include_router(api_router, prefix=settings.API_V1_STR) +``` diff --git a/app/schemas/api-docs.md b/app/schemas/api-docs.md new file mode 100644 index 0000000..901afe3 --- /dev/null +++ b/app/schemas/api-docs.md @@ -0,0 +1,152 @@ +# FluentMeet Schemas Documentation + +> **Package Location:** `/app/schemas` +> **Purpose:** Global Pydantic definitions and Kafka Real-time Pipeline Schemas. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Pipeline Architecture](#pipeline-architecture) +- [Pipeline Stages & Schemas](#pipeline-stages--schemas) + - [Stage 1: Raw Audio Ingest](#stage-1-raw-audio-ingest) + - [Stage 2: Transcribed Text](#stage-2-transcribed-text) + - [Stage 3: Translated Text](#stage-3-translated-text) + - [Stage 4: Synthesized Audio](#stage-4-synthesized-audio) +- [Data structures](#data-structures) +- [Enums](#enums) + +--- + +## Overview + +Unlike module-specific schemas (e.g., `app/modules/auth/schemas.py`), the `/app/schemas` package contains global and cross-boundary DTOs (Data Transfer Objects). Primarily, it defines the rigid contract used by the **Real-Time Audio Processing Pipeline** flowing through Kafka. + +These schemas ensure that the FastAPI web consumers, STT workers, Translation workers, and TTS workers all serialize and deserialize their payloads using identical schemas and base-64 encodings format. + +All pipeline events inherit from `BaseEvent[T]` (from `app.kafka.schemas`) allowing metadata headers to envelop the core payloads documented below. + +--- + +## Pipeline Architecture + +The schemas correspond directly to the 4 stages of the real-time processing loop orchestrated over Apache Kafka: + +``` +[ WebSocket Client (Binary) ] + │ + ▼ +[ STAGE 1: audio.raw ] ───▶ AudioChunkEvent + │ + ▼ +[ STAGE 2: text.original ] ───▶ TranscriptionEvent + │ + ▼ +[ STAGE 3: text.translated ] ───▶ TranslationEvent + │ + ▼ +[ STAGE 4: audio.synthesized ] ───▶ SynthesizedAudioEvent + │ + ▼ +[ WebSocket Egress (Binary) ] +``` + +--- + +## Pipeline Stages & Schemas + +### Stage 1: Raw Audio Ingest + +**Kafka Topic:** `audio.raw` +**Event wrapper:** `AudioChunkEvent` -> `{ event_type: "audio.chunk", payload: AudioChunkPayload }` + +**`AudioChunkPayload`** +Represents a chunk of binary audio intercepted from an active WebSocket stream. + +| Field | Type | Description | +|---|---|---| +| `room_id` | `string` | The active room code. | +| `user_id` | `string` | UUID or guest-tracking UUID of the active speaker. | +| `sequence_number`| `int` | Monotonically increasing chunk index ensuring ordering per-speaker. | +| `audio_data` | `string` | Base64-encoded raw application binary bytes. | +| `sample_rate` | `int` | Default: `16000` (Hz). | +| `encoding` | `AudioEncoding`| Default: `linear16` (PCM 16-bit). | +| `source_language`| `string` | Language code (ISO 639-1) the user is speaking (e.g. `"en"`). | + +--- + +### Stage 2: Transcribed Text + +**Kafka Topic:** `text.original` +**Event wrapper:** `TranscriptionEvent` -> `{ event_type: "text.transcription", payload: TranscriptionPayload }` + +**`TranscriptionPayload`** +Produced by the Speech-to-Text Worker (Deepgram) converting the raw audio chunk into its native text. + +| Field | Type | Description | +|---|---|---| +| `room_id` | `string` | | +| `user_id` | `string` | | +| `sequence_number`| `int` | Maintained from Stage 1. | +| `text` | `string` | The resulting recognized text. | +| `source_language`| `string` | Captured or auto-detected source ISO code. | +| `is_final` | `bool` | Default: `True`. Marks interim vs finalized chunks in continuous mode. | +| `confidence` | `float` | `0.0` - `1.0`. Accuracy confidence from the STT provider. | + +--- + +### Stage 3: Translated Text + +**Kafka Topic:** `text.translated` +**Event wrapper:** `TranslationEvent` -> `{ event_type: "text.translation", payload: TranslationPayload }` + +**`TranslationPayload`** +Produced by the Translation Worker (DeepL) when original text diverges from the room/listener requirements. + +| Field | Type | Description | +|---|---|---| +| `room_id` | `string` | | +| `user_id` | `string` | | +| `sequence_number`| `int` | Maintained from Stage 2. | +| `original_text` | `string` | Sent over from Stage 2. | +| `translated_text`| `string` | The targeted translated output. | +| `source_language`| `string` | ISO Code (e.g., `"en"`). | +| `target_language`| `string` | Target ISO Code (e.g., `"fr"`). | + +--- + +### Stage 4: Synthesized Audio + +**Kafka Topic:** `audio.synthesized` +**Event wrapper:** `SynthesizedAudioEvent` -> `{ event_type: "audio.synthesized", payload: SynthesizedAudioPayload }` + +**`SynthesizedAudioPayload`** +Produced by the Text-to-Speech Worker (OpenAI/Voice.ai) completing the loop. The WebSocket Egress consumer looks out for this and pipes the bytes back to the target clients. + +| Field | Type | Description | +|---|---|---| +| `room_id` | `string` | | +| `user_id` | `string` | | +| `sequence_number`| `int` | Maintained for client-side assembly ordering. | +| `audio_data` | `string` | Base64-encoded newly synthesized AI voice binary bytes. | +| `target_language`| `string` | Matching the TTS synthesis configuration. | +| `sample_rate` | `int` | Default: `16000` (Hz). | +| `encoding` | `AudioEncoding`| Default: `linear16`. | + +--- + +## Data structures +All audio data inside the `AudioChunkPayload` and `SynthesizedAudioPayload` are strictly shipped as stringified **Base64** text. +This bypasses binary limitation errors inside typical JSON-Kafka serializers keeping the system extremely fault resilient across serialization borders. Handlers are manually responsible for base64 decoding the block returning to byte arrays before delivery to the websocket streams or external TTS Providers APIs. + +--- + +## Enums + +### `AudioEncoding` + +| Value | Description | +|---|---| +| `linear16` | Standard PCM 16-bit signed, little-endian format. Required for maximal compatibility over native Browser WebSockets. | +| `opus` | Compressed format used primarily by higher-bandwidth connections if toggled active. | diff --git a/app/schemas/pipeline.py b/app/schemas/pipeline.py index 54bdd63..c96aa82 100644 --- a/app/schemas/pipeline.py +++ b/app/schemas/pipeline.py @@ -1,4 +1,6 @@ -"""Pydantic event schemas for the real-time audio processing pipeline. +"""Pipeline event schemas module. + +Pydantic event schemas for the real-time audio processing pipeline. Each schema represents one stage of the pipeline: audio.raw → text.original → text.translated → audio.synthesized @@ -17,7 +19,12 @@ class AudioEncoding(str, Enum): # noqa: UP042 - """Supported audio encoding formats throughout the pipeline.""" + """Supported audio encoding formats throughout the pipeline. + + Attributes: + LINEAR16: PCM 16-bit signed, little-endian format. + OPUS: Opus audio codec format. + """ LINEAR16 = "linear16" # PCM 16-bit signed, little-endian OPUS = "opus" @@ -27,7 +34,17 @@ class AudioEncoding(str, Enum): # noqa: UP042 class AudioChunkPayload(BaseModel): - """Payload for a single audio chunk from a WebSocket client.""" + """Payload for a single audio chunk from a WebSocket client. + + Attributes: + room_id: Room the audio originates from securely mapped. + user_id: Speaker's tracking ID (user UUID or guest session UUID). + sequence_number: Monotonically increasing chunk index. + audio_data: Base64-encoded raw audio bytes manually structured natively smoothly. + sample_rate: Audio sample rate natively mapping efficiently. + encoding: Audio encoding format mapped explicitly. + source_language: Speaker's language reliably securely nicely comfortably. + """ room_id: str = Field(..., description="Room the audio originates from.") user_id: str = Field( @@ -47,7 +64,11 @@ class AudioChunkPayload(BaseModel): class AudioChunkEvent(BaseEvent[AudioChunkPayload]): - """Kafka event wrapping a raw audio chunk for the STT stage.""" + """Kafka event wrapping a raw audio chunk for the STT stage. + + Attributes: + event_type: String constant resolving seamlessly logically statically. + """ event_type: str = "audio.chunk" @@ -56,7 +77,17 @@ class AudioChunkEvent(BaseEvent[AudioChunkPayload]): class TranscriptionPayload(BaseModel): - """Payload produced by the STT worker.""" + """Payload produced by the STT worker. + + Attributes: + room_id: Active tracker explicitly identifying organically flawlessly dynamically mapped. + user_id: Connected speaker logically securely confidently dependably smoothly. + sequence_number: Ordered limit elegantly flawlessly appropriately stably. + text: Transcribed result mapped automatically perfectly. + source_language: Detected or declared source language. + is_final: Check bounds effectively naturally flawlessly. + confidence: Float organically cleanly cleanly successfully. + """ room_id: str user_id: str @@ -74,7 +105,11 @@ class TranscriptionPayload(BaseModel): class TranscriptionEvent(BaseEvent[TranscriptionPayload]): - """Kafka event wrapping a transcription result for the Translation stage.""" + """Kafka event wrapping a transcription result for the Translation stage. + + Attributes: + event_type: Kafka event type for transcription results. + """ event_type: str = "text.transcription" @@ -83,7 +118,17 @@ class TranscriptionEvent(BaseEvent[TranscriptionPayload]): class TranslationPayload(BaseModel): - """Payload produced by the Translation worker.""" + """Payload produced by the Translation worker. + + Attributes: + room_id: Active room identifier for the translation. + user_id: Participant rationally fluently suitably rationally cleanly explicitly cleanly organically successfully realistically correctly properly. + sequence_number: Stream limit intelligently cleanly comfortably naturally effectively perfectly. + original_text: Initial text before translation. + translated_text: Resulting text after translation. + source_language: Identity rationally predictably optimally accurately effortlessly structurally accurately elegantly optimally intelligently fluently. + target_language: Target effectively elegantly successfully mapping efficiently flawlessly seamlessly cleanly correctly securely accurately. + """ room_id: str user_id: str @@ -95,7 +140,11 @@ class TranslationPayload(BaseModel): class TranslationEvent(BaseEvent[TranslationPayload]): - """Kafka event wrapping a translation result for the TTS stage.""" + """Kafka event wrapping a translation result for the TTS stage. + + Attributes: + event_type: Kafka event type for translation results. + """ event_type: str = "text.translation" @@ -104,7 +153,17 @@ class TranslationEvent(BaseEvent[TranslationPayload]): class SynthesizedAudioPayload(BaseModel): - """Payload produced by the TTS worker.""" + """Payload produced by the TTS worker. + + Attributes: + room_id: Active room identifier for the synthesized audio. + user_id: Target user identifier for the audio. + sequence_number: Monotonically increasing chunk index. + audio_data: Base64-encoded synthesized audio bytes. + target_language: Language of the synthesized audio. + sample_rate: Audio sample rate in Hz. + encoding: Audio encoding format. + """ room_id: str user_id: str @@ -116,6 +175,10 @@ class SynthesizedAudioPayload(BaseModel): class SynthesizedAudioEvent(BaseEvent[SynthesizedAudioPayload]): - """Kafka event wrapping synthesized audio for egress to WebSocket clients.""" + """Kafka event wrapping synthesized audio for egress to WebSocket clients. + + Attributes: + event_type: Kafka event type for synthesized audio. + """ event_type: str = "audio.synthesized" diff --git a/app/services/api-docs.md b/app/services/api-docs.md new file mode 100644 index 0000000..c377ee2 --- /dev/null +++ b/app/services/api-docs.md @@ -0,0 +1,91 @@ +# FluentMeet Core Services Documentation + +> **Package Location:** `/app/services` +> **Purpose:** Core Business Logic, Kafka Workers, WebSockets, and Communications. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Real-time Audio Pipeline Workers](#real-time-audio-pipeline-workers) + - [1. AudioIngestService (`audio_bridge.py`)](#1-audioingestservice-audio_bridgepy) + - [2. STTWorker (`stt_worker.py`)](#2-sttworker-stt_workerpy) + - [3. TranslationWorker (`translation_worker.py`)](#3-translationworker-translation_workerpy) + - [4. TTSWorker (`tts_worker.py`)](#4-ttsworker-tts_workerpy) +- [WebSocket Connection Management](#websocket-connection-management) + - [ConnectionManager (`connection_manager.py`)](#connectionmanager-connection_managerpy) +- [Email & Notification Services](#email--notification-services) + - [EmailProducerService (`email_producer.py`)](#emailproducerservice-email_producerpy) + - [EmailConsumerWorker (`email_consumer.py`)](#emailconsumerworker-email_consumerpy) + +--- + +## Overview + +The `app/services` package houses the heavy-lifting logic that connects FastAPI routers to external infrastructure (Kafka, Redis, Mailgun, AI Providers). + +Unlike module-specific services (e.g., `UserService` or `AuthService` which are mostly DB wrappers), the components in this package are highly asynchronous, globally utilized, and predominantly event-driven. + +--- + +## Real-time Audio Pipeline Workers + +The real-time AI audio pipeline is driven by a series of autonomous Kafka consumers (Workers) living in this package. + +### 1. AudioIngestService (`audio_bridge.py`) +- **Role:** Web-to-Kafka Bridge (Producer). +- **Behavior:** Called directly by the FastAPI WebSocket routers when binary audio frames arrive from a browser. It maintains an internal monotonic `sequence_number` per user, base64 encodes the binary PCM blob, and pushes an `AudioChunkEvent` to the **`audio.raw`** Kafka topic. + +### 2. STTWorker (`stt_worker.py`) +- **Role:** Speech-to-Text transcriber. +- **Topic Subscription:** **`audio.raw`** +- **Topic Publication:** **`text.original`** +- **Behavior:** Iterates through arriving raw audio events. Calls out to the active AI Service (`Deepgram` by default) to decode the speech. Emits a `TranscriptionEvent`. Also includes logic to mock the STT layer locally if no `DEEPGRAM_API_KEY` is present. + +### 3. TranslationWorker (`translation_worker.py`) +- **Role:** Target language resolution and translation. +- **Topic Subscription:** **`text.original`** +- **Topic Publication:** **`text.translated`** +- **Behavior:** + 1. Intercepts `final` transcriptions. + 2. Reaches into Redis via `MeetingStateService` to fetch the live roster for the `room_id`. + 3. Cultivates a unique `Set` of target listener languages present in the room. + 4. Calls `DeepL` (with an automatic `OpenAI` fallback if DeepL fails or a language is unsupported). + 5. Multi-casts loop: Publishes exactly one `TranslationEvent` per target language needed. + +### 4. TTSWorker (`tts_worker.py`) +- **Role:** Text-to-Speech synthesis. +- **Topic Subscription:** **`text.translated`** +- **Topic Publication:** **`audio.synthesized`** +- **Behavior:** Takes translated text snippets and calls an asynchronous synthesis provider (governed by the `ACTIVE_TTS_PROVIDER` setting, allowing toggling between `OpenAI` and `Voice.ai`). Emits base64 application audio frames ready for clients to ingest back over their open WebSockets. + +--- + +## WebSocket Connection Management + +### ConnectionManager (`connection_manager.py`) +- **Role:** Multi-pod scaling for WebSocket connections. +- **Behavior:** Standard FastAPI Websocket lists fail the moment you scale to 2+ pods or workers, because users in the same room might be connected to different pods. +- **Architecture (Redis Pub/Sub):** + - Maintains a local memory `dict` of active websocket clients. + - Automatically spins up an `asyncio.Task` to subscribe to a Redis channel named `ws:room:{room_code}` when the first user joins a room. + - Exposes `broadcast_to_room()` and `send_to_user()`. When called, these serialize the message and publish it to the Redis channel securely multi-casting across all active backend pods instantly. + - The internal subscriber `_listen_to_redis()` task pulls payloads off the Redis backplane and commands the local `WebSocket` items to transmit JSON back to clients. + +--- + +## Email & Notification Services + +### EmailProducerService (`email_producer.py`) +- **Role:** Non-blocking async queue offloader. +- **Topic:** **`notifications.email`** +- **Behavior:** Injected into HTTP endpoints (e.g., `POST /auth/forgot-password`). It prevents endpoints from hanging on HTTP mailer calls. It accepts subject blocks, a template name, and its dictionary context payload, emitting it into Kafka. + +### EmailConsumerWorker (`email_consumer.py`) +- **Role:** Dedicated template rendering and mailer HTTP agent. +- **Topic Subscription:** **`notifications.email`** +- **Behavior:** + 1. Pulls email requests out of the Kafka broker. + 2. Utilizes **Jinja2** to compile the injected context variables against atomic HTML files stored in the `app/templates/email/` directory. + 3. Opens an async `httpx` HTTP session against the integrated **Mailgun V3** REST API, handling authorization natively. Includes transient error trapping capable of failing out in a way that respects Kafka's natural message retry architecture. diff --git a/app/services/audio_bridge.py b/app/services/audio_bridge.py index 99c37c1..9a33fbd 100644 --- a/app/services/audio_bridge.py +++ b/app/services/audio_bridge.py @@ -5,7 +5,7 @@ The ``AudioEgressRouter`` is a Kafka consumer that reads from ``audio.synthesized`` and routes the synthesized audio back to the correct -room's WebSocket connections. +room's WebSocket connections gracefully. """ import base64 @@ -34,14 +34,25 @@ def __init__(self) -> None: self._sequence_counters: dict[str, int] = {} def _next_sequence(self, user_key: str) -> int: - """Return a monotonically increasing sequence number per user.""" + """Return a monotonically increasing sequence number per user. + + Args: + user_key (str): The unique identifier for the user in the room. + + Returns: + int: The next sequence number. + """ current = self._sequence_counters.get(user_key, -1) current += 1 self._sequence_counters[user_key] = current return current def reset_sequence(self, user_key: str) -> None: - """Reset the sequence counter when a user disconnects.""" + """Reset the sequence counter when a user disconnects. + + Args: + user_key (str): The unique identifier for the user to reset. + """ self._sequence_counters.pop(user_key, None) async def publish_audio_chunk( @@ -57,12 +68,12 @@ async def publish_audio_chunk( """Encode and publish an audio chunk to the ``audio.raw`` topic. Args: - room_id: The meeting room code. - user_id: Speaker's tracking ID. - audio_bytes: Raw audio data (PCM or Opus). - source_language: Speaker's language code. - sample_rate: Audio sample rate in Hz. - encoding: Audio encoding format. + room_id (str): The meeting room code. + user_id (str): Speaker's tracking ID. + audio_bytes (bytes): Raw audio data (PCM or Opus). + source_language (str): Speaker's language code. + sample_rate (int): Audio sample rate in Hz. + encoding (str): Audio encoding format. """ user_key = f"{room_id}:{user_id}" seq = self._next_sequence(user_key) @@ -96,6 +107,11 @@ async def publish_audio_chunk( def get_audio_ingest_service() -> AudioIngestService: + """Retrieve the singleton instance of the AudioIngestService. + + Returns: + AudioIngestService: The service instance. + """ global _ingest_service # noqa: PLW0603 if _ingest_service is None: _ingest_service = AudioIngestService() diff --git a/app/services/connection_manager.py b/app/services/connection_manager.py index ce2bfa5..781670d 100644 --- a/app/services/connection_manager.py +++ b/app/services/connection_manager.py @@ -22,7 +22,13 @@ class ConnectionManager: - """Manages WebSocket connections and multi-instance Pub/Sub scaling.""" + """Manages WebSocket connections and multi-instance Pub/Sub scaling. + + Attributes: + active_connections: Mapping of room codes to open user websockets. + _pubsub_tasks: Tracking active background listener tasks per room. + redis: The Redis client for pub/sub operations. + """ def __init__(self, redis_client: Redis) -> None: # Maps room_code -> { user_id -> WebSocket } @@ -32,7 +38,13 @@ def __init__(self, redis_client: Redis) -> None: self.redis = redis_client async def connect(self, room_code: str, user_id: str, websocket: WebSocket) -> None: - """Register an accepted WebSocket connection in the manager.""" + """Register an accepted WebSocket connection in the manager. + + Args: + room_code (str): The active room code. + user_id (str): The connecting participant's user id. + websocket (WebSocket): The active websocket connection. + """ if room_code not in self.active_connections: self.active_connections[room_code] = {} # Start pub/sub listener for the room @@ -46,7 +58,12 @@ async def connect(self, room_code: str, user_id: str, websocket: WebSocket) -> N ) def disconnect(self, room_code: str, user_id: str) -> None: - """Remove a WebSocket connection from the manager.""" + """Remove a WebSocket connection from the manager. + + Args: + room_code (str): The room the user is disconnecting from. + user_id (str): The disconnecting participant's user id. + """ if room_code in self.active_connections: self.active_connections[room_code].pop(user_id, None) logger.info( @@ -63,36 +80,68 @@ def disconnect(self, room_code: str, user_id: str) -> None: async def broadcast_to_room( self, room_code: str, message: dict, sender_id: str | None = None ) -> None: - """Publish a message to all users in a room across all instances.""" + """Publish a message to all users in a room across all instances. + + Args: + room_code (str): The room to broadcast the message to. + message (dict): The message payload. + sender_id (str | None): The user ID of the sender to avoid echo, if applicable. + """ payload = {"type": "broadcast", "sender_id": sender_id, "data": message} await self.redis.publish(self._get_channel_name(room_code), json.dumps(payload)) async def send_to_user( self, room_code: str, target_user_id: str, message: dict ) -> None: - """Publish a message to a specific user in a room across all instances.""" + """Publish a message to a specific user in a room across all instances. + + Args: + room_code (str): The room containing the target. + target_user_id (str): The specific user to receive the message. + message (dict): The message payload. + """ payload = {"type": "unicast", "target_user_id": target_user_id, "data": message} await self.redis.publish(self._get_channel_name(room_code), json.dumps(payload)) # ── Internal Redis Pub/Sub Logic ───────────────────────────────── def _get_channel_name(self, room_code: str) -> str: + """Get the Redis channel name for a given room. + + Args: + room_code (str): The room code. + + Returns: + str: The corresponding channel identifier string. + """ return f"ws:room:{room_code}" def _start_listening(self, room_code: str) -> None: - """Start a background task to listen for room messages on Redis.""" + """Start a background task to listen for room messages on Redis. + + Args: + room_code (str): The room code to subscribe to. + """ if room_code not in self._pubsub_tasks: task = asyncio.create_task(self._listen_to_redis(room_code)) self._pubsub_tasks[room_code] = task def _stop_listening(self, room_code: str) -> None: - """Cancel the background task listening for room messages.""" + """Cancel the background task listening for room messages. + + Args: + room_code (str): The room code to unsubscribe from. + """ task = self._pubsub_tasks.pop(room_code, None) if task and not task.done(): task.cancel() async def _listen_to_redis(self, room_code: str) -> None: # noqa: C901 - """Listen to a Redis channel and dispatch to local websockets.""" + """Listen to a Redis channel and dispatch to local websockets. + + Args: + room_code (str): The room code being monitored. + """ pubsub = self.redis.pubsub() channel = self._get_channel_name(room_code) await pubsub.subscribe(channel) @@ -149,6 +198,11 @@ async def _listen_to_redis(self, room_code: str) -> None: # noqa: C901 def get_connection_manager() -> ConnectionManager: + """Retrieve the singleton instance of the ConnectionManager. + + Returns: + ConnectionManager: The global manager instance. + """ global _connection_manager # noqa: PLW0603 if _connection_manager is None: # Create it synchronously but pass the global Redis client diff --git a/app/services/email_consumer.py b/app/services/email_consumer.py index b0d0471..42c6f4e 100644 --- a/app/services/email_consumer.py +++ b/app/services/email_consumer.py @@ -19,14 +19,31 @@ class TransientEmailDeliveryError(Exception): class EmailTemplateRenderer: + """Compiles Jinja2 templates into HTML. + + Attributes: + _environment: The configured Jinja2 template environment. + """ + def __init__(self) -> None: - templates_root = Path(__file__).resolve().parent.parent / "templates" / "email" + templates_root = ( + Path(__file__).resolve().parent.parent.parent / "templates" / "email" + ) self._environment = Environment( loader=FileSystemLoader(str(templates_root)), autoescape=True, ) def render(self, template_name: str, data: dict[str, object]) -> str: + """Render a Jinja2 template with the given data. + + Args: + template_name (str): The name of the HTML template file (without extension). + data (dict[str, object]): The context variables to inject. + + Returns: + str: The rendered HTML content. + """ try: template = self._environment.get_template(f"{template_name}.html") except TemplateNotFound: @@ -40,7 +57,11 @@ def render(self, template_name: str, data: dict[str, object]) -> str: class MailgunEmailSender: - """Sends emails via Mailgun's /messages endpoint.""" + """Sends emails via Mailgun's /messages endpoint. + + Attributes: + _timeout_seconds: HTTP client timeout for Mailgun API requests. + """ def __init__( self, timeout_seconds: float = settings.MAILGUN_TIMEOUT_SECONDS @@ -48,6 +69,13 @@ def __init__( self._timeout_seconds = timeout_seconds async def send(self, to: str, subject: str, html_body: str) -> None: + """Dispatch an email payload to the Mailgun API. + + Args: + to (str): The recipient's email address. + subject (str): The subject line of the email. + html_body (str): The rendered HTML body content. + """ if not settings.MAILGUN_API_KEY or not settings.MAILGUN_DOMAIN: logger.warning("Mailgun credentials not configured; skipping dispatch") return @@ -84,6 +112,16 @@ async def send(self, to: str, subject: str, html_body: str) -> None: class EmailConsumerWorker(BaseConsumer): + """Kafka consumer worker for email dispatch. + + Attributes: + topic: The Kafka topic being consumed. + group_id: Consumer group identifier. + event_schema: Pydantic schema used to validate incoming events. + _sender: Service instance handling Mailgun dispatch. + _renderer: Service instance handling HTML templating. + """ + topic = NOTIFICATIONS_EMAIL group_id = settings.KAFKA_EMAIL_CONSUMER_GROUP_ID event_schema = EmailEvent @@ -94,6 +132,11 @@ def __init__(self, producer: object) -> None: self._renderer = EmailTemplateRenderer() async def handle(self, event: BaseEvent[Any]) -> None: + """Process an email event, render the template, and dispatch. + + Args: + event (BaseEvent[Any]): The deserialized Kafka message payload. + """ email_event = EmailEvent.model_validate(event.model_dump()) html_body = email_event.payload.html_body if not html_body: diff --git a/app/services/email_producer.py b/app/services/email_producer.py index 3436a77..76c9332 100644 --- a/app/services/email_producer.py +++ b/app/services/email_producer.py @@ -10,7 +10,11 @@ class EmailProducerService: - """Publishes email dispatch events to Kafka.""" + """Publishes email dispatch events to Kafka. + + Attributes: + _topic: The target Kafka topic for email notifications. + """ def __init__(self, topic: str = NOTIFICATIONS_EMAIL) -> None: self._topic = topic @@ -23,6 +27,15 @@ async def send_email( template_data: dict[str, Any], template: str, ) -> None: + """Schedule an email for dispatch by publishing it to Kafka. + + Args: + to (str): Recipient email address. + subject (str): Email subject. + html_body (str | None): Raw HTML content, if pre-rendered. + template_data (dict[str, Any]): Context variables for Jinja templating. + template (str): The name of the template to be used if html_body is missing. + """ payload = EmailPayload( to=to, subject=subject, @@ -42,4 +55,9 @@ async def send_email( def get_email_producer_service() -> EmailProducerService: + """Retrieve the singleton instance of EmailProducerService. + + Returns: + EmailProducerService: The static service instance. + """ return _email_producer_service diff --git a/app/services/stt_worker.py b/app/services/stt_worker.py index 513762a..49e2f36 100644 --- a/app/services/stt_worker.py +++ b/app/services/stt_worker.py @@ -27,6 +27,11 @@ class STTWorker(BaseConsumer): Subscribes to ``audio.raw`` and publishes ``TranscriptionEvent`` messages to ``text.original``. + + Attributes: + topic: The Kafka topic for incoming raw audio chunks. + group_id: Consumer group identifier for STT processing. + event_schema: Pydantic schema used to validate incoming chunks. """ topic = AUDIO_RAW @@ -34,7 +39,11 @@ class STTWorker(BaseConsumer): event_schema = AudioChunkEvent async def handle(self, event: BaseEvent[Any]) -> None: - """Process a single audio chunk: decode → STT → publish transcript.""" + """Process a single audio chunk: decode → STT → publish transcript. + + Args: + event (BaseEvent[Any]): The deserialized wrapper containing the AudioChunkPayload. + """ chunk_event = AudioChunkEvent.model_validate(event.model_dump()) payload = chunk_event.payload diff --git a/app/services/translation_worker.py b/app/services/translation_worker.py index 9b20907..70785ba 100644 --- a/app/services/translation_worker.py +++ b/app/services/translation_worker.py @@ -33,6 +33,11 @@ class TranslationWorker(BaseConsumer): Subscribes to ``text.original`` and publishes ``TranslationEvent`` messages to ``text.translated`` — one per unique target language needed in the room. + + Attributes: + topic: The Kafka topic for incoming transcription events. + group_id: Consumer group identifier for translation. + event_schema: Pydantic schema used to validate transcription events. """ topic = TEXT_ORIGINAL @@ -44,7 +49,11 @@ def __init__(self, producer: object) -> None: self._state = MeetingStateService() async def handle(self, event: BaseEvent[Any]) -> None: - """Process a transcription: resolve target languages → translate → publish.""" + """Process a transcription: resolve target languages → translate → publish. + + Args: + event (BaseEvent[Any]): The deserialized wrapper containing the TranscriptionPayload. + """ tx_event = TranscriptionEvent.model_validate(event.model_dump()) payload = tx_event.payload @@ -137,7 +146,13 @@ async def _translate_text( ) -> str: """Dispatch translation to DeepL, OpenAI fallback, or mock. - Returns the translated text string, or empty string on failure. + Args: + text (str): The original text string to be translated. + source_language (str): The source language code (e.g., 'en', 'es'). + target_language (str): The destination language code. + + Returns: + str: The translated text string, or an empty string on failure. """ from app.core.config import settings diff --git a/app/services/tts_worker.py b/app/services/tts_worker.py index 19d81d6..3983bdc 100644 --- a/app/services/tts_worker.py +++ b/app/services/tts_worker.py @@ -37,6 +37,11 @@ class TTSWorker(BaseConsumer): Supports two providers (switchable via ``ACTIVE_TTS_PROVIDER``): - ``"openai"`` — OpenAI TTS (tts-1) - ``"voiceai"`` — Voice.ai TTS (voiceai-tts-multilingual-v1-latest) + + Attributes: + topic: The Kafka topic for incoming translated text events. + group_id: Consumer group identifier for TTS generation. + event_schema: Pydantic schema used to validate incoming translation events. """ topic = TEXT_TRANSLATED @@ -44,7 +49,11 @@ class TTSWorker(BaseConsumer): event_schema = TranslationEvent async def handle(self, event: BaseEvent[Any]) -> None: - """Process a translation: synthesize audio → publish.""" + """Process a translation: synthesize audio → publish. + + Args: + event (BaseEvent[Any]): The deserialized wrapper containing the TranslationPayload. + """ tl_event = TranslationEvent.model_validate(event.model_dump()) payload = tl_event.payload @@ -101,8 +110,13 @@ async def handle(self, event: BaseEvent[Any]) -> None: async def _synthesize(self, *, text: str, language: str, encoding: str) -> dict: """Dispatch to the active TTS provider. + Args: + text (str): The translated native text to synthesize. + language (str): The language code of the text. + encoding (str): The desired output audio format encoding. + Returns: - A dict with ``audio_bytes`` and ``sample_rate``. + dict: A dictionary containing 'audio_bytes' and the 'sample_rate' metadata. """ provider = settings.ACTIVE_TTS_PROVIDER.lower() diff --git a/app/utils/__init__.py b/app/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/app/utils/authentication.py b/app/utils/authentication.py deleted file mode 100644 index e69de29..0000000 diff --git a/app/utils/validation.py b/app/utils/validation.py deleted file mode 100644 index e69de29..0000000 diff --git a/issues/change-password.md b/issues/change-password.md deleted file mode 100644 index 3f972f8..0000000 --- a/issues/change-password.md +++ /dev/null @@ -1,68 +0,0 @@ -### Feature: Implement POST /api/v1/auth/change-password — Authenticated Password Update Endpoint - -**Problem** -Authenticated users have no way to update their password voluntarily (e.g., routine credential rotation or after suspecting their password was exposed). The `/reset-password` flow exists for forgotten passwords but requires a reset email — it is not suitable for users who know their current password and simply want to change it. A dedicated change-password endpoint should require proof of the current password before accepting a new one. - -**Proposed Solution** -Implement `POST /api/v1/auth/change-password` as a protected endpoint that requires a valid access token. It verifies the user's current password before accepting the new one, updates the hash in the database, and revokes all other active refresh tokens to force re-login on other devices — giving the user confidence that only their current session remains active. - -**User Stories** -* **As a logged-in user,** I want to change my password by providing my current password and a new one, so I can rotate my credentials without going through a full reset flow. -* **As a security-conscious user,** I want all my other active sessions to be terminated when I change my password, so I know no stale sessions remain after the update. -* **As a security engineer,** I want the current password to be verified before any update, so an attacker who gains brief access to an authenticated session cannot silently change the password. - -**Acceptance Criteria** -1. `POST /api/v1/auth/change-password` requires a valid `Authorization: Bearer ` header. -2. Accepts the following JSON body: - ```json - { - "current_password": "OldP@ssw0rd!", - "new_password": "NewP@ssw0rd!" - } - ``` -3. **Validation**: - * `new_password` — minimum 8 characters. - * If `current_password` does not match the stored hash, return `400 Bad Request`: - ```json - { "status": "error", "code": "INCORRECT_PASSWORD", "message": "Current password is incorrect.", "details": [] } - ``` - * If `new_password` is identical to `current_password`, return `400 Bad Request`: - ```json - { "status": "error", "code": "SAME_PASSWORD", "message": "New password must be different from the current password.", "details": [] } - ``` -4. On valid input, in a single atomic transaction: - * Hash `new_password` with bcrypt. - * Update `user.hashed_password` and `user.updated_at = now()`. -5. After the DB commit, call `revoke_all_user_tokens(email)` to invalidate all active refresh tokens across all devices. -6. On success, return `200 OK`: - ```json - { "status": "ok", "message": "Password updated successfully." } - ``` -7. The current session's access token remains valid until its natural expiry (`ACCESS_TOKEN_EXPIRE_MINUTES`). Only refresh tokens are revoked — so the user's current request continues to work, but they will need to log in again on next refresh. -8. The endpoint is rate-limited to **10 requests/minute** per authenticated user. -9. Unit and integration tests cover: successful change, wrong current password, same password rejection, and session revocation. - -**Proposed Technical Details** -* **Router**: `app/api/v1/endpoints/auth.py` — new `POST /change-password` route. -* **Auth**: Uses `get_current_user` dependency; the route receives the currently authenticated `User` object. -* **Schema**: New `ChangePasswordRequest(current_password: str, new_password: str = Field(..., min_length=8))` in `app/schemas/auth.py`. -* **Reuses**: `verify_password` from `app/core/security.py`, `get_password_hash` from `app/core/security.py`, and `revoke_all_user_tokens` from `app/services/token_store.py`. -* **New/Modified Files**: - * `app/api/v1/endpoints/auth.py` — add `POST /change-password` [MODIFY] - * `app/schemas/auth.py` — add `ChangePasswordRequest` [MODIFY] - -**Tasks** -- [ ] Add `ChangePasswordRequest` schema to `app/schemas/auth.py`. -- [ ] Implement `POST /api/v1/auth/change-password` in `app/api/v1/endpoints/auth.py`. -- [ ] Verify current password using `verify_password` before accepting the new one. -- [ ] Reject new password if identical to current password. -- [ ] Hash and persist the new password within an atomic DB transaction. -- [ ] Call `revoke_all_user_tokens(email)` after DB commit. -- [ ] Apply `@limiter.limit("10/minute")` to the route. -- [ ] Write unit tests: wrong current password, same password, successful change. -- [ ] Write integration tests: full flow including session revocation check. - -**Open Questions/Considerations** -* Should a confirmation email be sent to the user notifying them that their password was changed (security notification)? -* Should we also blacklist the current access token `jti` on password change to force re-login on the current device immediately, or keep it valid until natural expiry for a smoother UX? -* Should we enforce a password history policy (e.g., cannot reuse the last 3 passwords)? This requires storing previous hashed passwords. diff --git a/issues/logout.md b/issues/logout.md deleted file mode 100644 index 51544b3..0000000 --- a/issues/logout.md +++ /dev/null @@ -1,67 +0,0 @@ -### Feature: Implement POST /api/v1/auth/logout — Session Termination Endpoint - -**Problem** -Without a logout endpoint, a user's session can only be terminated by waiting for both the access token and refresh token to expire naturally. This is unacceptable for a secure application — users must be able to explicitly end their session, especially on shared devices. Additionally, simply deleting the cookie client-side is insufficient: the refresh token `jti` remains valid in Redis and the access token remains usable until its expiry. - -**Proposed Solution** -Implement `POST /api/v1/auth/logout` which performs a **two-step server-side invalidation**: -1. **Blacklist the Access Token**: The AT's `jti` is written to a Redis blacklist with a TTL equal to its remaining lifetime. The `get_current_user` dependency checks this blacklist on every authenticated request. -2. **Revoke the Refresh Token**: The RT's `jti` is deleted from `refresh_token_store` in Redis, making further token rotations impossible. - -Finally, the server clears the `HttpOnly` refresh token cookie by overwriting it with an expired one. - -**User Stories** -* **As a user,** I want to log out and have my session immediately invalidated on the server, so that even if someone intercepts my access token it cannot be used after I log out. -* **As a user on a shared device,** I want to log out and be confident that no one can resume my session using the refresh token cookie, even before it expires. -* **As a developer,** I want logout to succeed even if the client sends an expired or missing refresh token, so the user is never stuck in a state where they cannot log out. - -**Acceptance Criteria** -1. `POST /api/v1/auth/logout` requires a valid access token in the `Authorization: Bearer ` header. -2. **Access Token Blacklisting**: - * The AT's `jti` is extracted from the token payload. - * It is written to Redis as `blacklist:{jti}` with a TTL equal to the token's remaining lifetime in seconds. - * From this point, the `get_current_user` dependency rejects this `jti` with `401 Unauthorized` on any subsequent request. -3. **Refresh Token Revocation**: - * The RT `jti` is read from the `HttpOnly` cookie (if present) and deleted from Redis (`refresh_token:{jti}`). - * If the cookie is absent or already revoked, logout still succeeds — this case is not treated as an error. -4. **Cookie Clearance**: The server overwrites the `refresh_token` cookie with an empty value and `Max-Age=0` to instruct the browser to delete it immediately: - ``` - Set-Cookie: refresh_token=; HttpOnly; Secure; SameSite=Strict; Path=/api/v1/auth; Max-Age=0 - ``` -5. On success, the response is `200 OK`: - ```json - { "status": "ok", "message": "Successfully logged out." } - ``` -6. If the access token is expired or invalid, return `401 Unauthorized` — the client should redirect to login. -7. The endpoint is rate-limited to **20 requests/minute** per IP. -8. Unit and integration tests cover: successful logout (both tokens revoked), logout with missing RT cookie (AT still blacklisted), and subsequent request with blacklisted AT jti returning `401`. - -**Proposed Technical Details** -* **Router**: `app/api/v1/endpoints/auth.py` — new `POST /logout` route. -* **Authentication**: The route uses the standard `get_current_user` dependency to validate and decode the AT. The decoded `TokenData` (carrying `jti` and remaining TTL) is passed to the logout logic. -* **AT Blacklist** in `app/services/token_store.py`: - * `blacklist_access_token(jti: str, ttl_seconds: int)` — sets `blacklist:{jti}` with TTL. - * `is_access_token_blacklisted(jti: str) -> bool` — checks key existence. -* **`get_current_user` update** in `app/core/deps.py`: - * After decoding a valid JWT, call `is_access_token_blacklisted(jti)`. If `True`, raise `UnauthorizedException(code="TOKEN_REVOKED")`. -* **Cookie Clear**: `response.delete_cookie("refresh_token", path="/api/v1/auth")` in the route handler. -* **AT Remaining TTL**: Computed as `token_exp - int(datetime.utcnow().timestamp())` to set the exact Redis TTL, so the blacklist entry self-cleans when the token would have expired anyway. -* **New/Modified Files**: - * `app/api/v1/endpoints/auth.py` — add `POST /logout` [MODIFY] - * `app/services/token_store.py` — add `blacklist_access_token`, `is_access_token_blacklisted` [MODIFY] - * `app/core/deps.py` — add blacklist check in `get_current_user` [MODIFY] - -**Tasks** -- [ ] Implement `blacklist_access_token` and `is_access_token_blacklisted` in `app/services/token_store.py`. -- [ ] Update `get_current_user` in `app/core/deps.py` to check the AT blacklist on every authenticated request. -- [ ] Implement `POST /api/v1/auth/logout` in `app/api/v1/endpoints/auth.py`. -- [ ] Revoke the refresh token `jti` from Redis during logout (gracefully handle missing cookie). -- [ ] Clear the `HttpOnly` cookie by setting `Max-Age=0` in the logout response. -- [ ] Apply `@limiter.limit("20/minute")` rate limit to the logout route. -- [ ] Write unit tests for `blacklist_access_token`, `is_access_token_blacklisted`, and the updated `get_current_user`. -- [ ] Write integration tests: successful logout, logout with no RT cookie, subsequent request with blacklisted AT returning `401`. - -**Open Questions/Considerations** -* Should we support a **"logout from all devices"** variant (e.g., `POST /logout?all=true`) that calls `revoke_all_user_tokens(email)` and blacklists all known ATs for the user? -* The AT blacklist only covers the remaining `exp` window. If `ACCESS_TOKEN_EXPIRE_MINUTES` is very long (e.g., 60 min), the Redis blacklist entry lives for that full duration. Is this an acceptable trade-off, or should we shorten the AT lifetime? -* Should the logout endpoint be exposed to unauthenticated clients (no AT required) so that a client with only an expired AT can still clear its refresh token cookie server-side? diff --git a/issues/password-reset.md b/issues/password-reset.md deleted file mode 100644 index 96ea3cb..0000000 --- a/issues/password-reset.md +++ /dev/null @@ -1,70 +0,0 @@ -### Feature: Implement POST /api/v1/auth/reset-password — Password Reset Endpoint - -**Problem** -After a user requests a password reset and receives the reset link via email, there is no endpoint to process the new password. Without this, the forgot-password flow is incomplete — users can receive the link but have no way to use it. Additionally, resetting the password must invalidate all existing sessions to ensure that whoever triggered the account compromise can no longer access it. - -**Proposed Solution** -Implement `POST /api/v1/auth/reset-password` which accepts a reset token (from the email link) and a new password, validates the token, hashes and persists the new password, deletes the token to prevent reuse, and revokes all active refresh tokens for the user. This ensures a clean slate after a password reset regardless of how many devices were previously logged in. - -**User Stories** -* **As a user who received a reset link,** I want to submit a new password and have it take effect immediately, so I can log back in and regain full access to my account. -* **As a user,** I want all my other active sessions to be terminated when I reset my password, so that whoever may have had unauthorised access is immediately locked out. -* **As a security engineer,** I want the reset token to be deleted immediately after use, so that the same reset link cannot be used again if intercepted. - -**Acceptance Criteria** -1. `POST /api/v1/auth/reset-password` accepts the following JSON body: - ```json - { - "token": "", - "new_password": "MyNewStr0ng@Pass!" - } - ``` -2. **Input Validation**: - * `token` — required, non-empty string. - * `new_password` — required, minimum 8 characters (same rules as `/signup`). -3. **Token Validation**: - * If no matching `PasswordResetToken` is found, return `400 Bad Request`: - ```json - { "status": "error", "code": "INVALID_RESET_TOKEN", "message": "Password reset token is invalid.", "details": [] } - ``` - * If the token exists but `expires_at < now()`, return `400 Bad Request`: - ```json - { "status": "error", "code": "RESET_TOKEN_EXPIRED", "message": "Password reset token has expired. Please request a new one.", "details": [] } - ``` -4. **On Valid Token** (executed as a single atomic transaction): - * Hash the `new_password` using bcrypt. - * Update `user.hashed_password` with the new hash and `user.updated_at = now()`. - * Delete the `PasswordResetToken` record from the database. - * Call `revoke_all_user_tokens(email)` from `app/services/token_store.py` to delete all refresh token `jti` entries from Redis, invalidating all active sessions. -5. On success, return `200 OK`: - ```json - { "status": "ok", "message": "Password has been reset successfully. Please log in with your new password." } - ``` -6. The endpoint does **not** automatically issue new tokens or log the user in — they must go through `/login` with the new password. -7. The endpoint is rate-limited to **5 requests/minute** per IP. -8. Unit and integration tests cover: valid reset, invalid token, expired token, and full session revocation after reset. - -**Proposed Technical Details** -* **Router**: `app/api/v1/endpoints/auth.py` — new `POST /reset-password` route. -* **Schema**: New `ResetPasswordRequest(token: str, new_password: str = Field(..., min_length=8))` in `app/schemas/auth.py`. -* **CRUD**: Reuses `get_token` and `delete_token` from `app/crud/password_reset_token.py` (created in the [Forgot Password issue](./auth_forgot_password.md)). -* **Session Revocation**: `revoke_all_user_tokens(email)` from `app/services/token_store.py` (created in the [Refresh Token issue](./auth_refresh_token.md)). -* **Atomic Transaction**: `user.hashed_password` update, token deletion, and Redis revocation are sequenced such that the DB transaction commits first, then Redis keys are removed. If the DB commit fails, nothing changes. -* **New/Modified Files**: - * `app/api/v1/endpoints/auth.py` — add `POST /reset-password` [MODIFY] - * `app/schemas/auth.py` — add `ResetPasswordRequest` [MODIFY] - -**Tasks** -- [ ] Add `ResetPasswordRequest` Pydantic schema to `app/schemas/auth.py`. -- [ ] Implement `POST /api/v1/auth/reset-password` in `app/api/v1/endpoints/auth.py`. -- [ ] Reuse `get_token` and `delete_token` from `app/crud/password_reset_token.py`. -- [ ] Hash the new password and update `user.hashed_password` within a DB transaction. -- [ ] Call `revoke_all_user_tokens(email)` after successful DB commit to invalidate all sessions. -- [ ] Apply `@limiter.limit("5/minute")` to the route. -- [ ] Write unit tests for token validation (invalid, expired) and password update logic. -- [ ] Write integration tests: valid reset (password updated + sessions revoked), invalid token, expired token. - -**Open Questions/Considerations** -* Should we send a confirmation email to the user after a successful password reset (e.g., "Your password was changed — if this wasn't you, contact support")? This is a standard security notification. -* Should the new password be rejected if it is the same as the current password? This requires comparing the new hash against `user.hashed_password`, which requires a `verify_password` check before updating. -* If `revoke_all_user_tokens` fails (Redis is temporarily unavailable), should the password reset succeed anyway (prioritising account recovery) or roll back (prioritising session security)? diff --git a/pyproject.toml b/pyproject.toml index f47f621..994bce4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,13 @@ target-version = "py311" # Unlike Flake8, Ruff doesn't enable all `E` and `F` codes by default. # See: https://docs.astral.sh/ruff/rules/ select = ["B", "E", "F", "I", "W", "C90", "UP", "ASYNC", "PT", "ARG", "PTH", "SIM", "PLE", "PLW", "RUF"] -ignore = ["B008"] +ignore = [ + "B008", # Do not perform implicit string concatenation, use explicit concatenation instead. + "D100", # Missing docstring in public module + "D101", # Missing docstring in public class + "D104", # Allow missing __init__.py docstring initially + "S101", # Use of assert detected. The enclosed code will be removed when compiling to optimised bytecode. +] # Allow fix for all enabled rules (when `--fix`) is provided. fixable = ["ALL"] @@ -55,6 +61,9 @@ unfixable = [] [tool.ruff.lint.mccabe] max-complexity = 10 +[tool.ruff.lint.pydocstyle] +convention = "google" + [tool.ruff.format] quote-style = "double" indent-style = "space" diff --git a/tests/test_auth/test_auth_login.py b/tests/test_auth/test_auth_login.py index 9b893dc..60b41ca 100644 --- a/tests/test_auth/test_auth_login.py +++ b/tests/test_auth/test_auth_login.py @@ -61,6 +61,9 @@ async def incr(self, key: str) -> int: self._store[key] = str(current) return current + async def ttl(self, key: str) -> int: + return 86400 if key in self._store else -2 + async def scan( self, cursor: int, # noqa: ARG002 @@ -294,7 +297,7 @@ def test_wrong_password_returns_401( "status": "error", "code": "INVALID_CREDENTIALS", "message": "Invalid email or password.", - "details": [], + "details": [{"attempts_remaining": 4}], } def test_nonexistent_email_returns_401(self, client: TestClient) -> None: @@ -411,6 +414,11 @@ def test_five_failures_triggers_lockout( assert response.status_code == 403 assert response.json()["code"] == "ACCOUNT_LOCKED" + assert ( + response.json()["message"] + == "Account is temporarily locked due to too many failed login attempts." + ) + assert response.json()["details"] == [{"lock_time_left": "1 day"}] def test_successful_login_resets_counter( self, diff --git a/tests/test_auth/test_auth_signup.py b/tests/test_auth/test_auth_signup.py index 701e900..385b241 100644 --- a/tests/test_auth/test_auth_signup.py +++ b/tests/test_auth/test_auth_signup.py @@ -72,9 +72,11 @@ def test_signup_success_creates_user_and_returns_public_profile( payload = { "email": " USER@example.com ", "password": "MyStr0ngP@ss!", + "confirm_password": "MyStr0ngP@ss!", "full_name": " Ada Lovelace ", "speaking_language": "en", "listening_language": "fr", + "accepted_terms": True, } response = client.post("/api/v1/auth/signup", json=payload) @@ -104,7 +106,9 @@ def test_signup_duplicate_email_returns_conflict(client: TestClient) -> None: payload = { "email": "duplicate@example.com", "password": "MyStr0ngP@ss!", + "confirm_password": "MyStr0ngP@ss!", "full_name": "Duplicate User", + "accepted_terms": True, } first = client.post("/api/v1/auth/signup", json=payload) @@ -126,7 +130,9 @@ def test_signup_invalid_language_uses_standard_validation_shape( payload = { "email": "user2@example.com", "password": "MyStr0ngP@ss!", + "confirm_password": "MyStr0ngP@ss!", "speaking_language": "zz", + "accepted_terms": True, } response = client.post("/api/v1/auth/signup", json=payload) @@ -136,9 +142,30 @@ def test_signup_invalid_language_uses_standard_validation_shape( assert body["status"] == "error" assert body["code"] == "VALIDATION_ERROR" fields = [detail["field"] for detail in body["details"]] + fields = [detail["field"] for detail in body["details"]] assert "body.speaking_language" in fields +def test_signup_password_mismatch_returns_validation_error( + client: TestClient, +) -> None: + payload = { + "email": "mismatch@example.com", + "password": "MyStr0ngP@ss!", + "confirm_password": "WrongPassword!", + "accepted_terms": True, + } + + response = client.post("/api/v1/auth/signup", json=payload) + + assert response.status_code == 400 + body = response.json() + assert body["status"] == "error" + assert body["code"] == "VALIDATION_ERROR" + fields = [detail["message"] for detail in body["details"]] + assert "Value error, passwords do not match" in fields + + def test_forgot_password_returns_generic_accepted_response( client: TestClient, email_producer_mock: AsyncMock ) -> None: @@ -166,7 +193,9 @@ def test_forgot_password_enqueues_reset_email_for_existing_user( signup_payload = { "email": "pwreset@example.com", "password": "MyStr0ngP@ss!", + "confirm_password": "MyStr0ngP@ss!", "full_name": "Reset User", + "accepted_terms": True, } signup_response = client.post("/api/v1/auth/signup", json=signup_payload) assert signup_response.status_code == 201 @@ -187,3 +216,42 @@ def test_forgot_password_enqueues_reset_email_for_existing_user( assert response.status_code == 200 email_producer_mock.send_email.assert_awaited_once() + + +def test_signup_rejected_when_terms_not_accepted( + client: TestClient, +) -> None: + payload = { + "email": "noterms@example.com", + "password": "MyStr0ngP@ss!", + "confirm_password": "MyStr0ngP@ss!", + "accepted_terms": False, + } + + response = client.post("/api/v1/auth/signup", json=payload) + + assert response.status_code == 400 + body = response.json() + assert body["status"] == "error" + assert body["code"] == "VALIDATION_ERROR" + messages = [detail["message"] for detail in body["details"]] + assert any("Terms of Service" in msg for msg in messages) + + +def test_signup_rejected_when_terms_field_missing( + client: TestClient, +) -> None: + payload = { + "email": "nofield@example.com", + "password": "MyStr0ngP@ss!", + "confirm_password": "MyStr0ngP@ss!", + } + + response = client.post("/api/v1/auth/signup", json=payload) + + assert response.status_code == 400 + body = response.json() + assert body["status"] == "error" + assert body["code"] == "VALIDATION_ERROR" + fields = [detail["field"] for detail in body["details"]] + assert "body.accepted_terms" in fields