Skip to content

Commit 56cb06e

Browse files
committed
perf: optimize usage queries
1 parent f7e75ed commit 56cb06e

File tree

3 files changed

+34
-80
lines changed

3 files changed

+34
-80
lines changed

app/db/crud/admin.py

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
from datetime import datetime, timezone
22
from enum import Enum
33

4-
from sqlalchemy import and_, case, func, literal_column, select
4+
from sqlalchemy import and_, case, func, select
55
from sqlalchemy.ext.asyncio import AsyncSession
66

77
from app.db.crud.general import (
8-
MYSQL_FORMATS,
9-
SQLITE_FORMATS,
108
_build_trunc_expression,
11-
_get_next_period_boundary,
129
attach_timezone_to_period_start,
10+
get_complete_period_start_for_filter,
1311
to_utc_for_filter,
1412
)
1513
from app.db.models import Admin, AdminUsageLogs, NodeUserUsage, User
@@ -445,8 +443,8 @@ async def get_admin_usages(
445443
# Build truncation expression with timezone support
446444
trunc_expr = _build_trunc_expression(db, period, NodeUserUsage.created_at, start=start)
447445

448-
# Filter using UTC timestamps (DB stores naive UTC)
449-
start_utc = to_utc_for_filter(start)
446+
# Filter using UTC timestamps (DB stores naive UTC) from first complete bucket
447+
start_utc = get_complete_period_start_for_filter(start, period)
450448
end_utc = to_utc_for_filter(end)
451449
conditions = [
452450
NodeUserUsage.created_at >= start_utc,
@@ -489,26 +487,6 @@ async def get_admin_usages(
489487
.order_by(trunc_expr)
490488
)
491489

492-
# HAVING clause to exclude partial first bucket
493-
# Only needed if start has timezone (which means we did timezone-aware grouping)
494-
if start.tzinfo:
495-
# Get the first COMPLETE bucket boundary
496-
# Example: if start is 14:02:37, first_complete_bucket is 15:00:00
497-
first_complete_bucket = _get_next_period_boundary(start, period)
498-
499-
# Convert to naive for comparison (represents wall-clock time in target timezone)
500-
boundary_value = first_complete_bucket.replace(tzinfo=None)
501-
502-
# Add HAVING clause with appropriate comparison based on dialect
503-
if dialect == "postgresql":
504-
# PostgreSQL: trunc_expr returns timestamp, compare to timestamp
505-
stmt = stmt.having(trunc_expr >= boundary_value)
506-
elif dialect in ("mysql", "sqlite"):
507-
# MySQL/SQLite: Use the alias 'period_start' in HAVING
508-
format_str = MYSQL_FORMATS[period] if dialect == "mysql" else SQLITE_FORMATS[period]
509-
boundary_str = boundary_value.strftime(format_str.replace("%i", "%M"))
510-
stmt = stmt.having(literal_column("period_start") >= boundary_str)
511-
512490
result = await db.execute(stmt)
513491
stats = {}
514492
for row in result.mappings():

app/db/crud/general.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,22 @@ def _get_next_period_boundary(dt: datetime, period: Period) -> datetime:
159159
return dt
160160

161161

162+
def get_complete_period_start_for_filter(start: Optional[datetime], period: Period) -> Optional[datetime]:
163+
"""
164+
Convert start datetime to the first complete period boundary in UTC for DB filtering.
165+
166+
If `start` is timezone-aware, this rounds up to the next complete boundary and converts
167+
it to naive UTC. If `start` is naive, it is treated as UTC and returned unchanged.
168+
"""
169+
if start is None:
170+
return None
171+
172+
if start.tzinfo:
173+
return to_utc_for_filter(_get_next_period_boundary(start, period))
174+
175+
return to_utc_for_filter(start)
176+
177+
162178
def attach_timezone_to_period_start(row_dict: dict, target_tz, dialect: str = None) -> None:
163179
"""
164180
Attach timezone info to period_start in the row dictionary.

app/db/crud/user.py

Lines changed: 14 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from enum import Enum
44
from typing import List, Optional, Sequence
55

6-
from sqlalchemy import and_, case, delete, desc, func, literal, literal_column, not_, or_, select, update
6+
from sqlalchemy import and_, case, delete, desc, func, literal, not_, or_, select, update
77
from sqlalchemy.ext.asyncio import AsyncSession
88
from sqlalchemy.orm import joinedload, selectinload
99
from sqlalchemy.sql.functions import coalesce
@@ -30,12 +30,10 @@
3030
from config import USERS_AUTODELETE_DAYS
3131

3232
from .general import (
33-
MYSQL_FORMATS,
34-
SQLITE_FORMATS,
3533
_build_trunc_expression,
36-
_get_next_period_boundary,
3734
attach_timezone_to_period_start,
3835
build_json_proxy_settings_search_condition,
36+
get_complete_period_start_for_filter,
3937
to_utc_for_filter,
4038
)
4139
from .group import get_groups_by_ids
@@ -453,8 +451,8 @@ async def get_user_usages(
453451
# Build the appropriate truncation expression
454452
trunc_expr = _build_trunc_expression(db, period, NodeUserUsage.created_at, start)
455453

456-
# Filter using UTC timestamps (DB stores naive UTC)
457-
start_utc = to_utc_for_filter(start)
454+
# Filter using UTC timestamps (DB stores naive UTC) from first complete bucket
455+
start_utc = get_complete_period_start_for_filter(start, period)
458456
end_utc = to_utc_for_filter(end)
459457
conditions = [
460458
NodeUserUsage.created_at >= start_utc,
@@ -488,26 +486,6 @@ async def get_user_usages(
488486
.order_by(trunc_expr)
489487
)
490488

491-
# HAVING clause to exclude partial first bucket
492-
# Only needed if start has timezone (which means we did timezone-aware grouping)
493-
if start.tzinfo:
494-
# Get the first COMPLETE bucket boundary
495-
# Example: if start is 14:02:37, first_complete_bucket is 15:00:00
496-
first_complete_bucket = _get_next_period_boundary(start, period)
497-
498-
# Convert to naive for comparison (represents wall-clock time in target timezone)
499-
boundary_value = first_complete_bucket.replace(tzinfo=None)
500-
501-
# Add HAVING clause with appropriate comparison based on dialect
502-
if dialect == "postgresql":
503-
# PostgreSQL: trunc_expr returns timestamp, compare to timestamp
504-
stmt = stmt.having(trunc_expr >= boundary_value)
505-
elif dialect in ("mysql", "sqlite"):
506-
# MySQL/SQLite: Use the alias 'period_start' in HAVING
507-
format_str = MYSQL_FORMATS[period] if dialect == "mysql" else SQLITE_FORMATS[period]
508-
boundary_str = boundary_value.strftime(format_str.replace("%i", "%M"))
509-
stmt = stmt.having(literal_column("period_start") >= boundary_str)
510-
511489
result = await db.execute(stmt)
512490

513491
stats = {}
@@ -1085,68 +1063,50 @@ async def get_all_users_usages(
10851063
"""
10861064
admins_filter = admins or None
10871065

1088-
users_subquery = select(User.id)
1089-
if admins_filter:
1090-
users_subquery = users_subquery.join(Admin).where(Admin.username.in_(admins_filter))
1091-
users_subquery = users_subquery.subquery()
1092-
10931066
# Build the appropriate truncation expression
10941067
trunc_expr = _build_trunc_expression(db, period, NodeUserUsage.created_at, start)
10951068

1096-
# Filter using UTC timestamps (DB stores naive UTC)
1097-
start_utc = to_utc_for_filter(start)
1069+
# Filter using UTC timestamps (DB stores naive UTC) from first complete bucket
1070+
start_utc = get_complete_period_start_for_filter(start, period)
10981071
end_utc = to_utc_for_filter(end)
10991072
conditions = [
11001073
NodeUserUsage.created_at >= start_utc,
11011074
NodeUserUsage.created_at < end_utc,
1102-
NodeUserUsage.user_id.in_(select(users_subquery.c.id)),
11031075
]
1076+
if admins_filter:
1077+
conditions.append(Admin.username.in_(admins_filter))
11041078

11051079
if node_id is not None:
11061080
conditions.append(NodeUserUsage.node_id == node_id)
11071081
else:
11081082
node_id = -1
11091083

11101084
dialect = db.bind.dialect.name
1085+
from_clause = NodeUserUsage.__table__.join(User, User.id == NodeUserUsage.user_id)
1086+
if admins_filter:
1087+
from_clause = from_clause.join(Admin, Admin.id == User.admin_id)
1088+
11111089
if group_by_node:
11121090
stmt = (
11131091
select(
11141092
trunc_expr.label("period_start"),
11151093
func.coalesce(NodeUserUsage.node_id, 0).label("node_id"),
11161094
func.sum(NodeUserUsage.used_traffic).label("total_traffic"),
11171095
)
1096+
.select_from(from_clause)
11181097
.where(and_(*conditions))
11191098
.group_by(trunc_expr, NodeUserUsage.node_id)
11201099
.order_by(trunc_expr)
11211100
)
11221101
else:
11231102
stmt = (
11241103
select(trunc_expr.label("period_start"), func.sum(NodeUserUsage.used_traffic).label("total_traffic"))
1104+
.select_from(from_clause)
11251105
.where(and_(*conditions))
11261106
.group_by(trunc_expr)
11271107
.order_by(trunc_expr)
11281108
)
11291109

1130-
# HAVING clause to exclude partial first bucket
1131-
# Only needed if start has timezone (which means we did timezone-aware grouping)
1132-
if start.tzinfo:
1133-
# Get the first COMPLETE bucket boundary
1134-
# Example: if start is 14:02:37, first_complete_bucket is 15:00:00
1135-
first_complete_bucket = _get_next_period_boundary(start, period)
1136-
1137-
# Convert to naive for comparison (represents wall-clock time in target timezone)
1138-
boundary_value = first_complete_bucket.replace(tzinfo=None)
1139-
1140-
# Add HAVING clause with appropriate comparison based on dialect
1141-
if dialect == "postgresql":
1142-
# PostgreSQL: trunc_expr returns timestamp, compare to timestamp
1143-
stmt = stmt.having(trunc_expr >= boundary_value)
1144-
elif dialect in ("mysql", "sqlite"):
1145-
# MySQL/SQLite: Use the alias 'period_start' in HAVING
1146-
format_str = MYSQL_FORMATS[period] if dialect == "mysql" else SQLITE_FORMATS[period]
1147-
boundary_str = boundary_value.strftime(format_str.replace("%i", "%M"))
1148-
stmt = stmt.having(literal_column("period_start") >= boundary_str)
1149-
11501110
result = await db.execute(stmt)
11511111

11521112
stats = {}

0 commit comments

Comments
 (0)