Skip to content

Commit

Permalink
Some adjustments and workarounds for new db performance and settings.
Browse files Browse the repository at this point in the history
  • Loading branch information
GUI committed Feb 15, 2024
1 parent 488f347 commit 034b71a
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 8 deletions.
1 change: 1 addition & 0 deletions config/schema.cue
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,7 @@ import "path"
sync_interval: string | *"10s"
}
}
max_buckets: uint | *10000
}

#analytics_output_name: "opensearch"
Expand Down
15 changes: 14 additions & 1 deletion src/api-umbrella/web-app/actions/admin/stats.lua
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,19 @@ function _M.search(self)
search:aggregate_by_response_time_average()

local results = search:fetch_results()

-- Optimization: Every request should have an IP, so we don't need to perform
-- extra aggregations to look for total counts and missing values, since we
-- can assume the total count matches the overall hit count, and the missing
-- IPs are zero. But we'll fake the structure needed for `aggregation_result`
-- below.
results["aggregations"]["value_count_request_ip"] = {
value = results["hits"]["_total_value"],
}
results["aggregations"]["missing_request_ip"] = {
doc_count = 0,
}

local response = {
stats = {
total_hits = results["hits"]["_total_value"],
Expand All @@ -302,7 +315,7 @@ function _M.search(self)

if results["aggregations"] then
response["stats"]["total_users"] = results["aggregations"]["unique_user_email"]["value"]
response["stats"]["total_ips"] = results["aggregations"]["unique_request_ip"]["value"]
response["stats"]["total_ips"] = results["aggregations"]["sampled_ips"]["unique_request_ip"]["value"]
response["stats"]["average_response_time"] = results["aggregations"]["response_time_average"]["value"]
end

Expand Down
37 changes: 30 additions & 7 deletions src/api-umbrella/web-app/models/analytics_search_opensearch.lua
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,8 @@ function _M:aggregate_by_interval_for_summary()
unique_user_ids = {
terms = {
field = "user_id",
size = 100000000,
shard_size = 100000000 * 4,
size = config["opensearch"]["max_buckets"],
shard_size = config["opensearch"]["max_buckets"] * 4,
},
},
response_time_average = {
Expand Down Expand Up @@ -440,7 +440,7 @@ function _M:aggregate_by_cardinality(field)
self.body["aggregations"]["unique_" .. field] = {
cardinality = {
field = field,
precision_threshold = 100,
precision_threshold = 3000,
},
}
end
Expand All @@ -451,8 +451,31 @@ function _M:aggregate_by_users(size)
end

function _M:aggregate_by_request_ip(size)
self:aggregate_by_term("request_ip", size)
self:aggregate_by_cardinality("request_ip")
self.body["aggregations"]["top_request_ip"] = {
terms = {
field = "request_ip",
size = size,
shard_size = size * 4,
},
}

-- TODO: Getting unique IP counts currently not performing well and causing
-- timeouts. Might need to look into mapper-murmur3 field for this. See
-- https://github.com/opensearch-project/OpenSearch/issues/2820
-- In the meantime, perform sampling to at least return something.
self.body["aggregations"]["sampled_ips"] = {
sampler = {
shard_size = 1000000,
},
aggregations = {
unique_request_ip = {
cardinality = {
field = "request_ip",
precision_threshold = 3000,
},
},
},
}
end

function _M:aggregate_by_response_time_average()
Expand Down Expand Up @@ -506,7 +529,7 @@ function _M:aggregate_by_drilldown(prefix, size)
end

if not size then
size = 100000000
size = config["opensearch"]["max_buckets"]
end

self.body["aggregations"]["drilldown"] = {
Expand Down Expand Up @@ -592,7 +615,7 @@ function _M:aggregate_by_user_stats(order)
self.body["aggregations"]["user_stats"] = {
terms = {
field = "user_id",
size = 100000000,
size = config["opensearch"]["max_buckets"],
},
aggregations = {
last_request_at = {
Expand Down

0 comments on commit 034b71a

Please sign in to comment.