diff --git a/.github/workflows/logstash.yml b/.github/workflows/logstash.yml index 8de2f97d..16d28da3 100644 --- a/.github/workflows/logstash.yml +++ b/.github/workflows/logstash.yml @@ -44,7 +44,7 @@ jobs: # echo Using API_BUILDER_URL: $API_BUILDER_URL # echo Using MEMCACHED: $MEMCACHED # echo Using GEOIP_ENABLED: $GEOIP_ENABLED, GEOIP_CUSTOM_ATTRIBUTE: ${GEOIP_CUSTOM_ATTRIBUTE}, GEOIP_CACHE_SIZE: ${GEOIP_CACHE_SIZE} - # ./logstash-filter-verifier --diff-command="diff -y" --logstash-output --keep-env=API_BUILDER_SSL_CERT --keep-env=API_BUILDER_URL --keep-env=MEMCACHED --keep-env=GEOIP_ENABLED --keep-env=GEOIP_CUSTOM_ATTRIBUTE --keep-env=GEOIP_CACHE_SIZE --keep-env=EVENTLOG_CUSTOM_ATTR ./logstash/test/http/test-events.json ./logstash/pipelines/EventsPipeline.conf + # ./logstash-filter-verifier --diff-command="diff -y" --logstash-output --keep-env=API_BUILDER_SSL_CERT --keep-env=API_BUILDER_URL --keep-env=MEMCACHED --keep-env=CACHE_API_PATHS ./logstash/test/http/test-opentrafficlog.json ./logstash/pipelines/OpenTrafficPipeline.conf run: | echo Using API_BUILDER_URL: $API_BUILDER_URL @@ -54,7 +54,7 @@ jobs: ./logstash-filter-verifier --diff-command="diff -y" --keep-env=API_BUILDER_SSL_CERT --keep-env=API_BUILDER_URL --keep-env=MEMCACHED --keep-env=DROP_TRACE_MESSAGE_LEVELS ./logstash/test/http/test-tracemessages.json ./logstash/pipelines/TraceMessagesPipeline.conf ./logstash-filter-verifier --diff-command="diff -y" --keep-env=API_BUILDER_SSL_CERT --keep-env=API_BUILDER_URL --keep-env=MEMCACHED ./logstash/test/http/test-tracemessages-gmt-4.json ./logstash/pipelines/TraceMessagesPipeline.conf ./logstash-filter-verifier --diff-command="diff -y" --keep-env=API_BUILDER_SSL_CERT --keep-env=API_BUILDER_URL --keep-env=MEMCACHED --keep-env=GEOIP_ENABLED --keep-env=GEOIP_CUSTOM_ATTRIBUTE --keep-env=GEOIP_CACHE_SIZE --keep-env=EVENTLOG_CUSTOM_ATTR ./logstash/test/http/test-events.json ./logstash/pipelines/EventsPipeline.conf - ./logstash-filter-verifier --diff-command="diff -y" --keep-env=API_BUILDER_SSL_CERT --keep-env=API_BUILDER_URL --keep-env=MEMCACHED ./logstash/test/http/test-opentrafficlog.json ./logstash/pipelines/OpenTrafficPipeline.conf + ./logstash-filter-verifier --diff-command="diff -y" --keep-env=API_BUILDER_SSL_CERT --keep-env=API_BUILDER_URL --keep-env=MEMCACHED --keep-env=CACHE_API_PATHS./logstash/test/http/test-opentrafficlog.json ./logstash/pipelines/OpenTrafficPipeline.conf ./logstash-filter-verifier --diff-command="diff -y" --keep-env=API_BUILDER_SSL_CERT --keep-env=API_BUILDER_URL --keep-env=MEMCACHED ./logstash/test/filetransfer/test-opentrafficlog-filetransfer.json ./logstash/pipelines/OpenTrafficPipeline.conf ./logstash-filter-verifier --diff-command="diff -y" --keep-env=API_BUILDER_SSL_CERT --keep-env=API_BUILDER_URL --keep-env=MEMCACHED ./logstash/test/domainAudit/test-domain-audit-events.json ./logstash/pipelines/DomainAuditPipeline.conf env: @@ -70,6 +70,7 @@ jobs: # This is not relevant as we test with HTTP # But it's required by Logstash to be a valid certificate API_BUILDER_SSL_CERT: "./config/certificates/apibuilder4elastic.crt" + CACHE_API_PATHS: "/petstore/v2, /api/v1/banana, /api/v2/banana" - name: API-Builder Logs if: ${{ always() }} run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 1879ae83..dc0fba00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Wait 5 instead of 2 seconds, before trying to attach ILM-Policy to Index-Template - It may take a bit longer to obtain Custom-Properties from API-Manager for the Index-Template +### Added +- Added support to cache APIs with Path-Parameter more efficiently to reduce document ingest latency + - See the new parameter: `CACHE_API_PATHS` for more details. + ## [4.3.0] 2022-03-11 ### Added - Added Geo-Location map of API-Requests diff --git a/UPDATE.md b/UPDATE.md index 9e7957b6..a4aac747 100644 --- a/UPDATE.md +++ b/UPDATE.md @@ -37,6 +37,7 @@ On the other hand, the API builder Docker image, as a central component of the s | Ver | API-Builder | Logstash | Memcached | Filebeat | ANM-Config | Dashboards | Params |Elastic-Config | ELK-Ver. | Notes | | :--- | :---: | :---: | :---: | :---: | :---: | :---: | :---: |:---: | :---: | :--- | +| 4.4.0 | [X](#api-builderlogstashmemcached) | [X](#api-builderlogstashmemcached) | - | - | - | - | [X](#parameters)|- | [7.17.1](#update-elastic-stack-version) | | | 4.3.0 | [X](#api-builderlogstashmemcached) | [X](#api-builderlogstashmemcached) | - | - | - | [X](#dashboards)| [X](#parameters)|[X](#elastic-config)| [7.17.1](#update-elastic-stack-version) | | | 4.2.0 | [X](#api-builderlogstashmemcached) | - | - | - | - | [X](#dashboards)| [X](#parameters)|- | [7.17.0](#update-elastic-stack-version) | | | 4.1.0 | [X](#api-builderlogstashmemcached) | - | - | - | - | - | [X](#parameters)|[X](#elastic-config)| [7.16.3](#update-elastic-stack-version) | | diff --git a/docker-compose.yml b/docker-compose.yml index 9fac38c5..c46e6358 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,6 +30,7 @@ services: - GEOIP_CUSTOM_ATTRIBUTE=${GEOIP_CUSTOM_ATTRIBUTE:-xForwardedFor} - xpack.geoip.download.endpoint=${GEOIP_DOWNLOAD_ENDPOINT} - EVENTLOG_CUSTOM_ATTR=${EVENTLOG_CUSTOM_ATTR} + - CACHE_API_PATHS=${CACHE_API_PATHS} ports: - 5044:5044 volumes: diff --git a/env-sample b/env-sample index 5c2a61b7..5c63b461 100644 --- a/env-sample +++ b/env-sample @@ -302,6 +302,24 @@ MEMCACHED=memcached:11211 # Used-By: Logstash, API-Builder # LOOKUP_CACHE_TTL=1200 +# ---------------------------------------------------------------------------------------------- +# In case an API contains path parameters (e.g. /api/v2/pet/123456789), it may be advisable to +# configure them here to optimize caching. Especially if the path parameter is very variable +# (e.g. customer IDs). Due to the fact that the API request path is then always different +# Logstash cannot cache the looked up API-Details efficiently. +# Therefore, you configure a list of comma separated API paths here. The Logstash pipeline +# checks if the received API request path starts with one of the configured paths, if so, the +# API details are cached with the configured path (/api/v2/pet) instead of the received +# API path (/api/v2/pet/123456789). +# It is recommended to configure the most frequently called paths first in the list to avoid +# unnecessary iterations. +# You can also configure a shorter path. It is important to understand that based on this +# configured API path the API details are cached and of course there should not be any overlap +# between two actually different APIs. +# Defaults to null +# Used-By: Logstash +# CACHE_API_PATHS=/api/v2/petstore, /api/v1/user, ... + # ---------------------------------------------------------------------------------------------- # The maximum heap memory for ES. Xmx should be configured to 50% of the available memory, when # running ES on a dedicated node. Both values should be same. diff --git a/helm/templates/elasticApimLogstash/logstash-config.yaml b/helm/templates/elasticApimLogstash/logstash-config.yaml index adb120b7..319f2f08 100644 --- a/helm/templates/elasticApimLogstash/logstash-config.yaml +++ b/helm/templates/elasticApimLogstash/logstash-config.yaml @@ -37,6 +37,7 @@ data: {{- if .Values.logstash.lookupCacheTTL }} LOOKUP_CACHE_TTL: {{ default "" .Values.logstash.lookupCacheTTL | quote }} {{- end }} + CACHE_API_PATHS: {{ default "" .Values.logstash.cacheAPIPaths | quote }} GEOIP_ENABLED: {{ default "true" .Values.logstash.geoip.enabled | quote }} GEOIP_CACHE_SIZE: {{ default "1000" .Values.logstash.geoip.cacheSize | quote }} GEOIP_CUSTOM_ATTRIBUTE: {{ default "true" .Values.logstash.geoip.customAttribute | quote }} diff --git a/helm/values.yaml b/helm/values.yaml index 12f2b8f1..28d0eca7 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -270,6 +270,10 @@ logstash: # https://www.elastic.co/guide/en/logstash/current/plugins-filters-geoip.html#plugins-filters-geoip-manage_update # downloadEndpoint: "http://some-custom-endpoint" + # Configures a list of API-Paths that should be used for caching instead of the complete request path. + # Check the env-sample parameter: CACHE_API_PATHS for more details. + # cacheAPIPaths: "/api/v2/petstore, /api/v1/user, ..." + # Injects the environment variables from the ConfigMaps and Secrets into the # Logstash container. Specify your own ConfigMaps or Secrets if you don't # provide Configuration and Secrets as part of this values.yaml. diff --git a/logstash/pipelines/OpenTrafficPipeline.conf b/logstash/pipelines/OpenTrafficPipeline.conf index d5ea0533..a8a25a66 100644 --- a/logstash/pipelines/OpenTrafficPipeline.conf +++ b/logstash/pipelines/OpenTrafficPipeline.conf @@ -139,14 +139,40 @@ filter { if([transactionSummary] and [transactionSummary][protocol] in ["http", "https"]) { # Check, if an apiName is given (not the case for native APIs such as /healthcheck) to be used for the lookup if([transactionSummary][serviceContext][service]) { - mutate { add_field => { "[@metadata][apiName]" => "%{[transactionSummary][serviceContext][service]}" } } + mutate { + add_field => { "[@metadata][apiName]" => "%{[transactionSummary][serviceContext][service]}" } + add_field => { "[@metadata][cacheAPIPaths]" => "${CACHE_API_PATHS:''}" } + } } else { # Without an API name, only API path is used for the API lookup. mutate { add_field => { "[@metadata][apiName]" => "" } } } - # Create a key for the API + # Create a cache key for the API either based on the received request path or the configured path. + # For example: /v1/get/pet/687687678 --> CACHE_API_PER_NAME = /v1/get/pet makes sure the API is cached based on the API-Name + ruby { + code => ' + cacheAPIPaths = event.get("[@metadata][cacheAPIPaths]"); + apiRequestPath = event.get("[transactionSummary][path]"); + if (cacheAPIPaths.nil? || cacheAPIPaths.empty?) + event.set("[@metadata][apiCacheKeyPrefix]", apiRequestPath); + return; + end + apiName = event.get("[transactionSummary][serviceContext][service]"); + logger.info("Configured paths and current API-Request path: ", { "cacheAPIPaths" => cacheAPIPaths, "apiRequestPath" => apiRequestPath } ); + event.set("[@metadata][apiCacheKeyPrefix]", apiRequestPath); + for configuredPath in cacheAPIPaths.split(",") do + if(apiRequestPath.start_with?(configuredPath) ) + logger.info("Using configured path as primary cache key as it matches to request path. ", { "configuredPath" => configuredPath, "apiRequestPath" => apiRequestPath }); + event.set("[@metadata][apiCacheKeyPrefix]", configuredPath); + break; + end + end + ' + } mutate { - add_field => { "apiCacheKey" => "%{[transactionSummary][path]}###%{[processInfo][groupId]}###%{[processInfo][gatewayRegion]}" } + add_field => { + apiCacheKey => "%{[@metadata][apiCacheKeyPrefix]}###%{[processInfo][groupId]}###%{[processInfo][gatewayRegion]}" + } } # Lookup the cache with the created API-Key (API-Name---API-Path) memcached {