Skip to content

Commit

Permalink
Cosmos Metrics: allow enabling only metrics of certain categories (#3…
Browse files Browse the repository at this point in the history
…3436)

* Cosmos Metrics: allow enabling only metrics of certain categories

* Updating changelogs

* Small refactoring removing unnecessary metricTagNames/metricCategories fields in AsyncDoclmplementations

* Linting fixes

* Fixing test regressions

* Adding tests

* Fixing test failure

* Aligning MetricNames with the new categories

* Update CosmosClientTelemetryConfig.java

* Update RntbdServiceEndpoint.java

* Test changes

* Fixing test issues

* Public API refactoring

* Update ci.yml

* Trying to fix build issue

* Trying to fix build issue

* Adding missing javadoc comments

* Refactoring to allow more granular option definition for metrics

* Fixing linting violations

* Fixing tests

* Making system meters configurable

* NITs

* Update CosmosMeterOptions.java

* Test fix

* Adding few more tests and fixing javadoc violations

* Reacting to Code review feedback

* Reacting to API review feedback

* Refactoring based on API review

* Update pom.xml

* Update CosmosMeterOptions.java

* Update CosmosMeterOptions.java

* Update CosmosMeterOptions.java

* Update CosmosMeterOptions.java

* Update pom.xml

* Update pom.xml

* Update ClientMetricsTest.java

* Refactoring to avoid dependency on ExtendableStringEnum

* Fixing regression from removing ExpandableStringENum

* Update CosmosMetricName.java

* Update ClientMetricsTest.java
  • Loading branch information
FabianMeiswinkel committed Feb 17, 2023
1 parent f9eadbc commit 0c0625d
Show file tree
Hide file tree
Showing 32 changed files with 2,872 additions and 571 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import com.azure.core.management.AzureEnvironment
import com.azure.core.management.profile.AzureProfile
import com.azure.cosmos.implementation.clienttelemetry.TagName
import com.azure.cosmos.implementation.{CosmosClientMetadataCachesSnapshot, CosmosDaemonThreadFactory, SparkBridgeImplementationInternal, Strings}
import com.azure.cosmos.models.{CosmosClientTelemetryConfig, CosmosMicrometerMetricsOptions}
import com.azure.cosmos.models.{CosmosClientTelemetryConfig, CosmosMetricCategory, CosmosMetricTagName, CosmosMicrometerMetricsOptions}
import com.azure.cosmos.spark.CosmosPredicates.isOnSparkDriver
import com.azure.cosmos.spark.catalog.{CosmosCatalogClient, CosmosCatalogCosmosSDKClient, CosmosCatalogManagementSDKClient}
import com.azure.cosmos.spark.diagnostics.BasicLoggingTrait
Expand Down Expand Up @@ -212,20 +212,32 @@ private[spark] object CosmosClientCache extends BasicLoggingTrait {
case None => customApplicationNameSuffix
}

val metricsOptions = new CosmosMicrometerMetricsOptions()
.meterRegistry(CosmosClientMetrics.meterRegistry.get)
.defaultTagNames(
CosmosMetricTagName.CONTAINER,
CosmosMetricTagName.CLIENT_CORRELATION_ID,
CosmosMetricTagName.OPERATION,
CosmosMetricTagName.OPERATION_STATUS_CODE,
CosmosMetricTagName.PARTITION_KEY_RANGE_ID,
CosmosMetricTagName.SERVICE_ADDRESS,
CosmosMetricTagName.ADDRESS_RESOLUTION_COLLECTION_MAP_REFRESH,
CosmosMetricTagName.ADDRESS_RESOLUTION_FORCED_REFRESH,
CosmosMetricTagName.REQUEST_STATUS_CODE,
CosmosMetricTagName.REQUEST_OPERATION_TYPE
)
.setMetricCategories(
CosmosMetricCategory.SYSTEM,
CosmosMetricCategory.OPERATION_SUMMARY,
CosmosMetricCategory.REQUEST_SUMMARY,
CosmosMetricCategory.DIRECT_ADDRESS_RESOLUTIONS,
CosmosMetricCategory.DIRECT_REQUESTS,
CosmosMetricCategory.DIRECT_CHANNELS
)

val telemetryConfig = new CosmosClientTelemetryConfig()
.metricsOptions(
new CosmosMicrometerMetricsOptions().meterRegistry(CosmosClientMetrics.meterRegistry.get)
)
.metricsOptions(metricsOptions)
.clientCorrelationId(clientCorrelationId)
.metricTagNames(
TagName.Container.toString,
TagName.ClientCorrelationId.toString,
TagName.Operation.toString,
TagName.OperationStatusCode.toString,
TagName.PartitionKeyRangeId.toString,
TagName.ServiceEndpoint.toString,
TagName.ServiceAddress.toString
)

builder.clientTelemetryConfig(telemetryConfig)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import com.azure.cosmos.implementation.TracerProvider;
import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry;
import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetryMetrics;
import com.azure.cosmos.implementation.clienttelemetry.CosmosMeterOptions;
import com.azure.cosmos.implementation.clienttelemetry.MetricCategory;
import com.azure.cosmos.implementation.clienttelemetry.TagName;
import com.azure.cosmos.implementation.directconnectivity.rntbd.RntbdMetrics;
import com.azure.cosmos.implementation.throughputControl.config.ThroughputControlGroupInternal;
Expand All @@ -27,6 +29,8 @@
import com.azure.cosmos.models.CosmosDatabaseProperties;
import com.azure.cosmos.models.CosmosDatabaseRequestOptions;
import com.azure.cosmos.models.CosmosDatabaseResponse;
import com.azure.cosmos.models.CosmosMetricName;
import com.azure.cosmos.models.CosmosMicrometerMeterOptions;
import com.azure.cosmos.models.CosmosPermissionProperties;
import com.azure.cosmos.models.CosmosQueryRequestOptions;
import com.azure.cosmos.models.ModelBridgeInternal;
Expand Down Expand Up @@ -62,6 +66,11 @@
isAsync = true)
public final class CosmosAsyncClient implements Closeable {

private static final ImplementationBridgeHelpers.CosmosClientTelemetryConfigHelper.CosmosClientTelemetryConfigAccessor
telemetryConfigAccessor = ImplementationBridgeHelpers
.CosmosClientTelemetryConfigHelper
.getCosmosClientTelemetryConfigAccessor();

// Async Cosmos client wrapper
private final Configs configs;
private final AsyncDocumentClient asyncDocumentClient;
Expand All @@ -83,7 +92,6 @@ public final class CosmosAsyncClient implements Closeable {
private final String clientCorrelationId;
private final Tag clientCorrelationTag;
private final String accountTagValue;
private final EnumSet<TagName> metricTagNames;
private final boolean clientMetricsEnabled;
private final boolean isSendClientTelemetryToServiceEnabled;
private final MeterRegistry clientMetricRegistrySnapshot;
Expand Down Expand Up @@ -114,10 +122,6 @@ public final class CosmosAsyncClient implements Closeable {
this.sessionCapturingOverride = builder.isSessionCapturingOverrideEnabled();
this.enableTransportClientSharing = builder.isConnectionSharingAcrossClientsEnabled();
this.proactiveContainerInitConfig = builder.getProactiveContainerInitConfig();
ImplementationBridgeHelpers.CosmosClientTelemetryConfigHelper.CosmosClientTelemetryConfigAccessor
telemetryConfigAccessor = ImplementationBridgeHelpers
.CosmosClientTelemetryConfigHelper
.getCosmosClientTelemetryConfigAccessor();

CosmosClientTelemetryConfig effectiveTelemetryConfig = telemetryConfigAccessor
.createSnapshot(
Expand All @@ -137,8 +141,6 @@ public final class CosmosAsyncClient implements Closeable {
this.apiType = builder.apiType();
this.clientCorrelationId = telemetryConfigAccessor
.getClientCorrelationId(effectiveTelemetryConfig);
this.metricTagNames = telemetryConfigAccessor
.getMetricTagNames(effectiveTelemetryConfig);

List<Permission> permissionList = new ArrayList<>();
if (this.permissions != null) {
Expand Down Expand Up @@ -167,7 +169,6 @@ public final class CosmosAsyncClient implements Closeable {
.withApiType(this.apiType)
.withClientTelemetryConfig(this.clientTelemetryConfig)
.withClientCorrelationId(this.clientCorrelationId)
.withMetricTagNames(this.metricTagNames)
.build();

String effectiveClientCorrelationId = this.asyncDocumentClient.getClientCorrelationId();
Expand All @@ -190,8 +191,15 @@ public final class CosmosAsyncClient implements Closeable {
this.clientMetricRegistrySnapshot = telemetryConfigAccessor
.getClientMetricRegistry(effectiveTelemetryConfig);
this.clientMetricsEnabled = clientMetricRegistrySnapshot != null;

CosmosMeterOptions cpuMeterOptions = telemetryConfigAccessor
.getMeterOptions(effectiveTelemetryConfig, CosmosMetricName.SYSTEM_CPU);
CosmosMeterOptions memoryMeterOptions = telemetryConfigAccessor
.getMeterOptions(effectiveTelemetryConfig, CosmosMetricName.SYSTEM_MEMORY_FREE);


if (clientMetricRegistrySnapshot != null) {
ClientTelemetryMetrics.add(clientMetricRegistrySnapshot);
ClientTelemetryMetrics.add(clientMetricRegistrySnapshot, cpuMeterOptions, memoryMeterOptions);
}
this.accountTagValue = URI.create(this.serviceEndpoint).getHost().replace(
".documents.azure.com", ""
Expand Down Expand Up @@ -725,7 +733,14 @@ public String getAccountTagValue(CosmosAsyncClient client) {

@Override
public EnumSet<TagName> getMetricTagNames(CosmosAsyncClient client) {
return client.metricTagNames;
return telemetryConfigAccessor
.getMetricTagNames(client.clientTelemetryConfig);
}

@Override
public EnumSet<MetricCategory> getMetricCategories(CosmosAsyncClient client) {
return telemetryConfigAccessor
.getMetricCategories(client.clientTelemetryConfig);
}

@Override
Expand All @@ -747,6 +762,12 @@ public List<String> getPreferredRegions(CosmosAsyncClient client) {
public boolean isEndpointDiscoveryEnabled(CosmosAsyncClient client) {
return client.connectionPolicy.isEndpointDiscoveryEnabled();
}

@Override
public CosmosMeterOptions getMeterOptions(CosmosAsyncClient client, CosmosMetricName name) {
return telemetryConfigAccessor
.getMeterOptions(client.clientTelemetryConfig, name);
}
}
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -658,20 +658,6 @@ public CosmosClientBuilder multipleWriteRegionsEnabled(boolean multipleWriteRegi
* @return current CosmosClientBuilder
*/
public CosmosClientBuilder clientTelemetryEnabled(boolean clientTelemetryEnabled) {
ImplementationBridgeHelpers.CosmosClientTelemetryConfigHelper.CosmosClientTelemetryConfigAccessor accessor =
ImplementationBridgeHelpers
.CosmosClientTelemetryConfigHelper
.getCosmosClientTelemetryConfigAccessor();

Boolean explicitlySetInConfig = accessor.isSendClientTelemetryToServiceEnabled(this.clientTelemetryConfig);

if (explicitlySetInConfig != null) {
CosmosClientTelemetryConfig newTelemetryConfig = accessor
.createSnapshot(this.clientTelemetryConfig, clientTelemetryEnabled);
accessor.resetIsSendClientTelemetryToServiceEnabled(newTelemetryConfig);
this.clientTelemetryConfig = newTelemetryConfig;
}

this.clientTelemetryEnabledOverride = clientTelemetryEnabled;
return this;
}
Expand Down Expand Up @@ -796,8 +782,6 @@ boolean isClientTelemetryEnabled() {
.getCosmosClientTelemetryConfigAccessor()
.isSendClientTelemetryToServiceEnabled(this.clientTelemetryConfig);

assert(this.clientTelemetryEnabledOverride == null || explicitlySetInConfig == null);

if (this.clientTelemetryEnabledOverride != null) {
return this.clientTelemetryEnabledOverride;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import com.azure.cosmos.implementation.caches.RxClientCollectionCache;
import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache;
import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry;
import com.azure.cosmos.implementation.clienttelemetry.TagName;
import com.azure.cosmos.implementation.query.PartitionedQueryExecutionInfo;
import com.azure.cosmos.implementation.throughputControl.config.ThroughputControlGroupInternal;
import com.azure.cosmos.models.CosmosClientTelemetryConfig;
Expand All @@ -30,7 +29,6 @@

import java.net.URI;
import java.net.URISyntaxException;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -98,7 +96,6 @@ class Builder {
private ApiType apiType;
CosmosClientTelemetryConfig clientTelemetryConfig;
private String clientCorrelationId = null;
private EnumSet<TagName> metricTagNames = EnumSet.allOf(TagName.class);

public Builder withServiceEndpoint(String serviceEndpoint) {
try {
Expand All @@ -125,12 +122,6 @@ public Builder withClientCorrelationId(String clientCorrelationId) {
return this;
}

public Builder withMetricTagNames(EnumSet<TagName> tagNames) {
this.metricTagNames = tagNames;

return this;
}

/**
* New method withMasterKeyOrResourceToken will take either master key or resource token
* and perform authentication for accessing resource.
Expand Down Expand Up @@ -273,8 +264,7 @@ public AsyncDocumentClient build() {
state,
apiType,
clientTelemetryConfig,
clientCorrelationId,
metricTagNames);
clientCorrelationId);

client.init(state, null);
return client;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import com.azure.cosmos.ThroughputControlGroupConfig;
import com.azure.cosmos.implementation.batch.ItemBatchOperation;
import com.azure.cosmos.implementation.batch.PartitionScopeThresholds;
import com.azure.cosmos.implementation.clienttelemetry.CosmosMeterOptions;
import com.azure.cosmos.implementation.clienttelemetry.MetricCategory;
import com.azure.cosmos.implementation.clienttelemetry.TagName;
import com.azure.cosmos.implementation.directconnectivity.rntbd.RntbdChannelStatistics;
import com.azure.cosmos.implementation.patch.PatchOperation;
Expand All @@ -38,6 +40,8 @@
import com.azure.cosmos.models.CosmosContainerProperties;
import com.azure.cosmos.models.CosmosItemRequestOptions;
import com.azure.cosmos.models.CosmosItemResponse;
import com.azure.cosmos.models.CosmosMetricName;
import com.azure.cosmos.models.CosmosMicrometerMeterOptions;
import com.azure.cosmos.models.CosmosPatchOperations;
import com.azure.cosmos.models.CosmosQueryRequestOptions;
import com.azure.cosmos.models.FeedResponse;
Expand Down Expand Up @@ -1044,10 +1048,12 @@ public interface CosmosAsyncClientAccessor {
Tag getClientCorrelationTag(CosmosAsyncClient client);
String getAccountTagValue(CosmosAsyncClient client);
EnumSet<TagName> getMetricTagNames(CosmosAsyncClient client);
EnumSet<MetricCategory> getMetricCategories(CosmosAsyncClient client);
boolean isClientTelemetryMetricsEnabled(CosmosAsyncClient client);
boolean isSendClientTelemetryToServiceEnabled(CosmosAsyncClient client);
List<String> getPreferredRegions(CosmosAsyncClient client);
boolean isEndpointDiscoveryEnabled(CosmosAsyncClient client);
CosmosMeterOptions getMeterOptions(CosmosAsyncClient client, CosmosMetricName name);
}
}

Expand Down Expand Up @@ -1130,12 +1136,15 @@ public interface CosmosClientTelemetryConfigAccessor {
int getMaxConnectionPoolSize(CosmosClientTelemetryConfig config);
Duration getIdleHttpConnectionTimeout(CosmosClientTelemetryConfig config);
ProxyOptions getProxy(CosmosClientTelemetryConfig config);
EnumSet<MetricCategory> getMetricCategories(CosmosClientTelemetryConfig config);
EnumSet<TagName> getMetricTagNames(CosmosClientTelemetryConfig config);
String getClientCorrelationId(CosmosClientTelemetryConfig config);
MeterRegistry getClientMetricRegistry(CosmosClientTelemetryConfig config);
Boolean isSendClientTelemetryToServiceEnabled(CosmosClientTelemetryConfig config);
boolean isClientMetricsEnabled(CosmosClientTelemetryConfig config);
void resetIsSendClientTelemetryToServiceEnabled(CosmosClientTelemetryConfig config);
CosmosMeterOptions getMeterOptions(CosmosClientTelemetryConfig config, CosmosMetricName name);
CosmosMeterOptions createDisabledMeterOptions(CosmosMetricName name);
CosmosClientTelemetryConfig createSnapshot(
CosmosClientTelemetryConfig config,
boolean effectiveIsClientTelemetryEnabled);
Expand Down
Loading

0 comments on commit 0c0625d

Please sign in to comment.