perf: optimize usage queries

x0sina · x0sina · commit 56cb06e628e8 · 2026-02-14T14:37:51.000+03:30
diff --git a/app/db/crud/admin.py b/app/db/crud/admin.py
@@ -1,15 +1,13 @@
 from datetime import datetime, timezone
 from enum import Enum
 
-from sqlalchemy import and_, case, func, literal_column, select
+from sqlalchemy import and_, case, func, select
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.db.crud.general import (
-    MYSQL_FORMATS,
-    SQLITE_FORMATS,
     _build_trunc_expression,
-    _get_next_period_boundary,
     attach_timezone_to_period_start,
+    get_complete_period_start_for_filter,
     to_utc_for_filter,
 )
 from app.db.models import Admin, AdminUsageLogs, NodeUserUsage, User
@@ -445,8 +443,8 @@ async def get_admin_usages(
     # Build truncation expression with timezone support
     trunc_expr = _build_trunc_expression(db, period, NodeUserUsage.created_at, start=start)
 
-    # Filter using UTC timestamps (DB stores naive UTC)
-    start_utc = to_utc_for_filter(start)
+    # Filter using UTC timestamps (DB stores naive UTC) from first complete bucket
+    start_utc = get_complete_period_start_for_filter(start, period)
     end_utc = to_utc_for_filter(end)
     conditions = [
         NodeUserUsage.created_at >= start_utc,
@@ -489,26 +487,6 @@ async def get_admin_usages(
             .order_by(trunc_expr)
         )
 
-    # HAVING clause to exclude partial first bucket
-    # Only needed if start has timezone (which means we did timezone-aware grouping)
-    if start.tzinfo:
-        # Get the first COMPLETE bucket boundary
-        # Example: if start is 14:02:37, first_complete_bucket is 15:00:00
-        first_complete_bucket = _get_next_period_boundary(start, period)
-
-        # Convert to naive for comparison (represents wall-clock time in target timezone)
-        boundary_value = first_complete_bucket.replace(tzinfo=None)
-
-        # Add HAVING clause with appropriate comparison based on dialect
-        if dialect == "postgresql":
-            # PostgreSQL: trunc_expr returns timestamp, compare to timestamp
-            stmt = stmt.having(trunc_expr >= boundary_value)
-        elif dialect in ("mysql", "sqlite"):
-            # MySQL/SQLite: Use the alias 'period_start' in HAVING
-            format_str = MYSQL_FORMATS[period] if dialect == "mysql" else SQLITE_FORMATS[period]
-            boundary_str = boundary_value.strftime(format_str.replace("%i", "%M"))
-            stmt = stmt.having(literal_column("period_start") >= boundary_str)
-
     result = await db.execute(stmt)
     stats = {}
     for row in result.mappings():
diff --git a/app/db/crud/general.py b/app/db/crud/general.py
@@ -159,6 +159,22 @@ def _get_next_period_boundary(dt: datetime, period: Period) -> datetime:
     return dt
 
 
+def get_complete_period_start_for_filter(start: Optional[datetime], period: Period) -> Optional[datetime]:
+    """
+    Convert start datetime to the first complete period boundary in UTC for DB filtering.
+
+    If `start` is timezone-aware, this rounds up to the next complete boundary and converts
+    it to naive UTC. If `start` is naive, it is treated as UTC and returned unchanged.
+    """
+    if start is None:
+        return None
+
+    if start.tzinfo:
+        return to_utc_for_filter(_get_next_period_boundary(start, period))
+
+    return to_utc_for_filter(start)
+
+
 def attach_timezone_to_period_start(row_dict: dict, target_tz, dialect: str = None) -> None:
     """
     Attach timezone info to period_start in the row dictionary.
diff --git a/app/db/crud/user.py b/app/db/crud/user.py
@@ -3,7 +3,7 @@
 from enum import Enum
 from typing import List, Optional, Sequence
 
-from sqlalchemy import and_, case, delete, desc, func, literal, literal_column, not_, or_, select, update
+from sqlalchemy import and_, case, delete, desc, func, literal, not_, or_, select, update
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import joinedload, selectinload
 from sqlalchemy.sql.functions import coalesce
@@ -30,12 +30,10 @@
 from config import USERS_AUTODELETE_DAYS
 
 from .general import (
-    MYSQL_FORMATS,
-    SQLITE_FORMATS,
     _build_trunc_expression,
-    _get_next_period_boundary,
     attach_timezone_to_period_start,
     build_json_proxy_settings_search_condition,
+    get_complete_period_start_for_filter,
     to_utc_for_filter,
 )
 from .group import get_groups_by_ids
@@ -453,8 +451,8 @@ async def get_user_usages(
     # Build the appropriate truncation expression
     trunc_expr = _build_trunc_expression(db, period, NodeUserUsage.created_at, start)
 
-    # Filter using UTC timestamps (DB stores naive UTC)
-    start_utc = to_utc_for_filter(start)
+    # Filter using UTC timestamps (DB stores naive UTC) from first complete bucket
+    start_utc = get_complete_period_start_for_filter(start, period)
     end_utc = to_utc_for_filter(end)
     conditions = [
         NodeUserUsage.created_at >= start_utc,
@@ -488,26 +486,6 @@ async def get_user_usages(
             .order_by(trunc_expr)
         )
 
-    # HAVING clause to exclude partial first bucket
-    # Only needed if start has timezone (which means we did timezone-aware grouping)
-    if start.tzinfo:
-        # Get the first COMPLETE bucket boundary
-        # Example: if start is 14:02:37, first_complete_bucket is 15:00:00
-        first_complete_bucket = _get_next_period_boundary(start, period)
-
-        # Convert to naive for comparison (represents wall-clock time in target timezone)
-        boundary_value = first_complete_bucket.replace(tzinfo=None)
-
-        # Add HAVING clause with appropriate comparison based on dialect
-        if dialect == "postgresql":
-            # PostgreSQL: trunc_expr returns timestamp, compare to timestamp
-            stmt = stmt.having(trunc_expr >= boundary_value)
-        elif dialect in ("mysql", "sqlite"):
-            # MySQL/SQLite: Use the alias 'period_start' in HAVING
-            format_str = MYSQL_FORMATS[period] if dialect == "mysql" else SQLITE_FORMATS[period]
-            boundary_str = boundary_value.strftime(format_str.replace("%i", "%M"))
-            stmt = stmt.having(literal_column("period_start") >= boundary_str)
-
     result = await db.execute(stmt)
 
     stats = {}
@@ -1085,68 +1063,50 @@ async def get_all_users_usages(
     """
     admins_filter = admins or None
 
-    users_subquery = select(User.id)
-    if admins_filter:
-        users_subquery = users_subquery.join(Admin).where(Admin.username.in_(admins_filter))
-    users_subquery = users_subquery.subquery()
-
     # Build the appropriate truncation expression
     trunc_expr = _build_trunc_expression(db, period, NodeUserUsage.created_at, start)
 
-    # Filter using UTC timestamps (DB stores naive UTC)
-    start_utc = to_utc_for_filter(start)
+    # Filter using UTC timestamps (DB stores naive UTC) from first complete bucket
+    start_utc = get_complete_period_start_for_filter(start, period)
     end_utc = to_utc_for_filter(end)
     conditions = [
         NodeUserUsage.created_at >= start_utc,
         NodeUserUsage.created_at < end_utc,
-        NodeUserUsage.user_id.in_(select(users_subquery.c.id)),
     ]
+    if admins_filter:
+        conditions.append(Admin.username.in_(admins_filter))
 
     if node_id is not None:
         conditions.append(NodeUserUsage.node_id == node_id)
     else:
         node_id = -1
 
     dialect = db.bind.dialect.name
+    from_clause = NodeUserUsage.__table__.join(User, User.id == NodeUserUsage.user_id)
+    if admins_filter:
+        from_clause = from_clause.join(Admin, Admin.id == User.admin_id)
+
     if group_by_node:
         stmt = (
             select(
                 trunc_expr.label("period_start"),
                 func.coalesce(NodeUserUsage.node_id, 0).label("node_id"),
                 func.sum(NodeUserUsage.used_traffic).label("total_traffic"),
             )
+            .select_from(from_clause)
             .where(and_(*conditions))
             .group_by(trunc_expr, NodeUserUsage.node_id)
             .order_by(trunc_expr)
         )
     else:
         stmt = (
             select(trunc_expr.label("period_start"), func.sum(NodeUserUsage.used_traffic).label("total_traffic"))
+            .select_from(from_clause)
             .where(and_(*conditions))
             .group_by(trunc_expr)
             .order_by(trunc_expr)
         )
 
-    # HAVING clause to exclude partial first bucket
-    # Only needed if start has timezone (which means we did timezone-aware grouping)
-    if start.tzinfo:
-        # Get the first COMPLETE bucket boundary
-        # Example: if start is 14:02:37, first_complete_bucket is 15:00:00
-        first_complete_bucket = _get_next_period_boundary(start, period)
-
-        # Convert to naive for comparison (represents wall-clock time in target timezone)
-        boundary_value = first_complete_bucket.replace(tzinfo=None)
-
-        # Add HAVING clause with appropriate comparison based on dialect
-        if dialect == "postgresql":
-            # PostgreSQL: trunc_expr returns timestamp, compare to timestamp
-            stmt = stmt.having(trunc_expr >= boundary_value)
-        elif dialect in ("mysql", "sqlite"):
-            # MySQL/SQLite: Use the alias 'period_start' in HAVING
-            format_str = MYSQL_FORMATS[period] if dialect == "mysql" else SQLITE_FORMATS[period]
-            boundary_str = boundary_value.strftime(format_str.replace("%i", "%M"))
-            stmt = stmt.having(literal_column("period_start") >= boundary_str)
-
     result = await db.execute(stmt)
 
     stats = {}