diff --git a/bin/kafka-run-class.sh b/bin/kafka-run-class.sh index 2ec19b605b..81b67e2d1c 100755 --- a/bin/kafka-run-class.sh +++ b/bin/kafka-run-class.sh @@ -276,13 +276,13 @@ fi # Memory options if [ -z "$KAFKA_HEAP_OPTS" ]; then - KAFKA_HEAP_OPTS="-Xmx256M" + KAFKA_HEAP_OPTS="-Xmx6g -XX:MaxDirectMemorySize=6g -XX:MetaspaceSize=96m" fi # JVM performance options # MaxInlineLevel=15 is the default since JDK 14 and can be removed once older JDKs are no longer supported if [ -z "$KAFKA_JVM_PERFORMANCE_OPTS" ]; then - KAFKA_JVM_PERFORMANCE_OPTS="-server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+ExplicitGCInvokesConcurrent -XX:MaxInlineLevel=15 -Djava.awt.headless=true" + KAFKA_JVM_PERFORMANCE_OPTS="-server -XX:+UseZGC -XX:ZCollectionInterval=5 -XX:MaxInlineLevel=15 -Djava.awt.headless=true" fi while [ $# -gt 0 ]; do diff --git a/config/kraft/broker.properties b/config/kraft/broker.properties index 3e2b2020c4..bb9cfcec15 100644 --- a/config/kraft/broker.properties +++ b/config/kraft/broker.properties @@ -152,77 +152,39 @@ s3.bucket=ko3 # The file path of delta WAL in block device s3.wal.path=/tmp/kraft-broker-logs/s3wal -# The maximum size of delta WAL in block device, default 1GB -s3.wal.capacity=1073741824 +# The maximum size of delta WAL in block device, default 2GB +# s3.wal.capacity=2147483648 -# The maximum size of memory cache delta WAL can use, default 200MB -s3.wal.cache.size=209715200 +# The maximum size of WAL cache can use, default 2GB +# s3.wal.cache.size=2147483648 -# The batched size of delta WAL before being uploaded to S3, default 100MB -s3.wal.upload.threshold=104857600 +# The batched size of delta WAL before being uploaded to S3, default 500MB +# s3.wal.upload.threshold=524288000 -# The maximum size of block cache the broker can use to cache data read from S3, default 100MB -s3.block.cache.size=104857600 - -# The execution interval for stream object compaction, default 60 minutes -s3.stream.object.compaction.interval.minutes=60 - -# The maximum size of stream object allowed to be generated in stream compaction, default 10GB -s3.stream.object.compaction.max.size.bytes=10737418240 - -# The execution interval for stream set object compaction, default 20 minutes -s3.stream.set.object.compaction.interval.minutes=20 - -# The maximum allowed memory consumption for stream set object compaction, default 200MB -s3.stream.set.object.compaction.cache.size=209715200 - -# The minimum time before a stream set object to be force split into multiple stream object, default 120 minutes -s3.stream.set.object.compaction.force.split.minutes=120 - -# The maximum stream set objects allowed to be compacted in one execution, default 500 -s3.stream.set.object.compaction.max.num=500 +# The maximum size of block cache the broker can use to cache data read from S3, default 1GB +# s3.block.cache.size=1073741824 # The baseline network bandwidth of the broker in bytes/s, default 100MB/s. This is used to throttle the network usage during compaction # and catch up read -s3.network.baseline.bandwidth=104857600 - -# Set to true to enable metrics collection -s3.telemetry.metrics.enable=false - -# The metrics level to record, supported values are INFO, DEBUG -s3.telemetry.metrics.level=INFO +# s3.network.baseline.bandwidth=104857600 +############################# Settings for telemetry ############################# # The metrics exporter type, supported values are otlp, prometheus, log. Use comma to separate multiple exporters. -s3.telemetry.metrics.exporter.type=otlp +# s3.telemetry.metrics.exporter.type=otlp # The Prometheus HTTP server host and port, if exporter type is set to prometheus # s3.metrics.exporter.prom.host=127.0.0.1 # s3.metrics.exporter.prom.port=9090 -# Set to true to enable exporting tracing data to OTel Collector -s3.telemetry.tracer.enable=false - # The OTel Collector endpoint, if exporter type is set to otlp or tracing is enabled # s3.telemetry.exporter.otlp.endpoint=http://${your_host_name}:4317 -# Set following configurations for batching -s3.telemetry.tracer.span.scheduled.delay.ms=1000 -s3.telemetry.tracer.span.max.queue.size=5120 -s3.telemetry.tracer.span.max.batch.size=1024 - -# Metrics report interval -s3.telemetry.exporter.report.interval.ms=5000 - ############################# Settings for Auto Balancer ############################# # The metric reporter to collect and report metrics for Auto Balancer metric.reporters=kafka.autobalancer.metricsreporter.AutoBalancerMetricsReporter # The network inbound bandwidth in bytes/s, default 100MB/s. Used in NetworkInCapacityGoal and calculation of inbound bandwidth utilization -autobalancer.reporter.network.in.capacity=104857600 +# autobalancer.reporter.network.in.capacity=104857600 # The network outbound bandwidth in bytes/s, default 100MB/s. Used in NetworkOutCapacityGoal and calculation of outbound bandwidth utilization -autobalancer.reporter.network.out.capacity=104857600 - -# The reporter interval of Auto Balancer metric reporter in milliseconds, default 10s -autobalancer.reporter.metrics.reporting.interval.ms=10000 - +# autobalancer.reporter.network.out.capacity=104857600 diff --git a/config/kraft/controller.properties b/config/kraft/controller.properties index be007a0523..b4df521e60 100644 --- a/config/kraft/controller.properties +++ b/config/kraft/controller.properties @@ -145,18 +145,9 @@ s3.bucket=ko3 # If you are using minio for storage, you have to set this to true. #s3.path.style=true -############################# Settings for Auto Balancer ############################# -# Whether to enabled Auto Balancer in controller, default true -autobalancer.controller.enable=true - -# The detect interval of AnomalyDetector, default 1m -autobalancer.controller.anomaly.detect.interval.ms=60000 - -# The maximum tolerated delay of metrics, default 20s -autobalancer.controller.metrics.delay.ms=20000 - -# The maximum partition reassignment number allowed in single detect period, default 60 -autobalancer.controller.execution.steps=60 +############################# Settings of Controller for Auto Balancer ############################# +# Whether to enabled Auto Balancer in controller, default false +# autobalancer.controller.enable=false # The topics to be excluded from balancing #autobalancer.controller.exclude.topics=topic-a,topic-b,topic-c diff --git a/config/kraft/server.properties b/config/kraft/server.properties index 96fa61cadc..efe1ad0a82 100644 --- a/config/kraft/server.properties +++ b/config/kraft/server.properties @@ -158,91 +158,46 @@ s3.bucket=ko3 # The file path of delta WAL in block device s3.wal.path=/tmp/kraft-combined-logs/s3wal -# The maximum size of delta WAL in block device, default 1GB -s3.wal.capacity=1073741824 +# The maximum size of delta WAL in block device, default 2GB +# s3.wal.capacity=2147483648 -# The maximum size of memory cache delta WAL can use, default 200MB -s3.wal.cache.size=209715200 +# The maximum size of WAL cache can use, default 2GB +# s3.wal.cache.size=2147483648 -# The batched size of delta WAL before being uploaded to S3, default 100MB -s3.wal.upload.threshold=104857600 +# The batched size of delta WAL before being uploaded to S3, default 500MB +# s3.wal.upload.threshold=524288000 -# The maximum size of block cache the broker can use to cache data read from S3, default 100MB -s3.block.cache.size=104857600 - -# The execution interval for stream object compaction, default 60 minutes -s3.stream.object.compaction.interval.minutes=60 - -# The maximum size of stream object allowed to be generated in stream compaction, default 10GB -s3.stream.object.compaction.max.size.bytes=10737418240 - -# The execution interval for stream set object compaction, default 20 minutes -s3.stream.set.object.compaction.interval.minutes=20 - -# The maximum allowed memory consumption for stream set object compaction, default 200MB -s3.stream.set.object.compaction.cache.size=209715200 - -# The minimum time before a stream set object to be force split into multiple stream object, default 120 minutes -s3.stream.set.object.compaction.force.split.minutes=120 - -# The maximum stream set objects allowed to be compacted in one execution, default 500 -s3.stream.set.object.compaction.max.num=500 +# The maximum size of block cache the broker can use to cache data read from S3, default 1GB +# s3.block.cache.size=1073741824 # The baseline network bandwidth of the broker in bytes/s, default 100MB/s. This is used to throttle the network usage during compaction # and catch up read -s3.network.baseline.bandwidth=104857600 - -# Set to true to enable metrics collection -s3.telemetry.metrics.enable=false - -# The metrics level to record, supported values are INFO, DEBUG -s3.telemetry.metrics.level=INFO +# s3.network.baseline.bandwidth=104857600 +############################# Settings for telemetry ############################# # The metrics exporter type, supported values are otlp, prometheus, log. Use comma to separate multiple exporters. -s3.telemetry.metrics.exporter.type=otlp +# s3.telemetry.metrics.exporter.type=otlp # The Prometheus HTTP server host and port, if exporter type is set to prometheus # s3.metrics.exporter.prom.host=127.0.0.1 # s3.metrics.exporter.prom.port=9090 -# Set to true to enable exporting tracing data to OTel Collector -s3.telemetry.tracer.enable=false - # The OTel Collector endpoint, if exporter type is set to otlp or tracing is enabled # s3.telemetry.exporter.otlp.endpoint=http://${your_host_name}:4317 -# Set following configurations for batching -s3.telemetry.tracer.span.scheduled.delay.ms=1000 -s3.telemetry.tracer.span.max.queue.size=5120 -s3.telemetry.tracer.span.max.batch.size=1024 - -# Metrics report interval -s3.telemetry.exporter.report.interval.ms=5000 - ############################# Settings for Auto Balancer ############################# # The metric reporter to collect and report metrics for Auto Balancer metric.reporters=kafka.autobalancer.metricsreporter.AutoBalancerMetricsReporter # The network inbound bandwidth in bytes/s, default 100MB/s. Used in NetworkInCapacityGoal and calculation of inbound bandwidth utilization -autobalancer.reporter.network.in.capacity=104857600 +# autobalancer.reporter.network.in.capacity=104857600 # The network outbound bandwidth in bytes/s, default 100MB/s. Used in NetworkOutCapacityGoal and calculation of outbound bandwidth utilization -autobalancer.reporter.network.out.capacity=104857600 - -# The reporter interval of Auto Balancer metric reporter in milliseconds, default 10s -autobalancer.reporter.metrics.reporting.interval.ms=10000 - -# Whether to enabled Auto Balancer in controller, default true -autobalancer.controller.enable=true - -# The detect interval of AnomalyDetector, default 1m -autobalancer.controller.anomaly.detect.interval.ms=60000 - -# The maximum tolerated delay of metrics, default 20s -autobalancer.controller.metrics.delay.ms=20000 +# autobalancer.reporter.network.out.capacity=104857600 -# The maximum partition reassignment number allowed in single detect period, default 60 -autobalancer.controller.execution.steps=60 +############################# Settings of Controller for Auto Balancer ############################# +# Whether to enabled Auto Balancer in controller, default false +# autobalancer.controller.enable=false # The topics to be excluded from balancing #autobalancer.controller.exclude.topics=topic-a,topic-b,topic-c diff --git a/core/src/main/java/kafka/autobalancer/config/AutoBalancerConfig.java b/core/src/main/java/kafka/autobalancer/config/AutoBalancerConfig.java index a266c6d4ed..fa59de29b1 100644 --- a/core/src/main/java/kafka/autobalancer/config/AutoBalancerConfig.java +++ b/core/src/main/java/kafka/autobalancer/config/AutoBalancerConfig.java @@ -35,7 +35,7 @@ public class AutoBalancerConfig extends AbstractConfig { public static final String AUTO_BALANCER_METRICS_TOPIC_CLEANUP_POLICY = PREFIX + "topic.cleanup.policy"; /* Default values */ public static final String DEFAULT_AUTO_BALANCER_TOPIC = "__auto_balancer_metrics"; - public static final Integer DEFAULT_AUTO_BALANCER_METRICS_TOPIC_NUM_PARTITIONS = -1; + public static final Integer DEFAULT_AUTO_BALANCER_METRICS_TOPIC_NUM_PARTITIONS = 1; public static final long DEFAULT_AUTO_BALANCER_METRICS_TOPIC_RETENTION_MS = TimeUnit.MINUTES.toMillis(30); public static final String DEFAULT_AUTO_BALANCER_METRICS_TOPIC_CLEANUP_POLICY = String.join(",", TopicConfig.CLEANUP_POLICY_DELETE); /* Documents */ diff --git a/core/src/main/scala/kafka/server/KafkaConfig.scala b/core/src/main/scala/kafka/server/KafkaConfig.scala index 3cc8748170..3ee5d18497 100755 --- a/core/src/main/scala/kafka/server/KafkaConfig.scala +++ b/core/src/main/scala/kafka/server/KafkaConfig.scala @@ -313,15 +313,15 @@ object Defaults { /** ********* Kafka on S3 Configuration *********/ val S3StreamSetObjectCompactionInterval: Int = 20 // 20min val S3StreamSetObjectCompactionCacheSize: Long = 200 * 1024 * 1024 // 200MB - val S3StreamSetObjectCompactionStreamSplitSize: Long = 16 * 1024 * 1024 // 16MB + val S3StreamSetObjectCompactionStreamSplitSize: Long = 8 * 1024 * 1024 // 8MB val S3StreamSetObjectCompactionForceSplitMinutes: Int = 120 // 120min val S3StreamSetObjectCompactionMaxObjectNum: Int = 500 val S3MaxStreamNumPerStreamSetObject: Int = 10000 val S3MaxStreamObjectNumPerCommit: Int = 10000 - val S3ObjectRetentionMinutes: Long = 10 // 10min + val S3ObjectRetentionMinutes: Long = 30 // 30min val S3NetworkBaselineBandwidth: Long = 100 * 1024 * 1024 // 100MB/s val S3RefillPeriodMs: Int = 1000 // 1s - val S3MetricsExporterReportIntervalMs = 60000 // 1min + val S3MetricsExporterReportIntervalMs = 30000 // 30s val S3SpanScheduledDelayMs = 1000 // 1s val S3SpanMaxQueueSize = 5120 val S3SpanMaxBatchSize = 1024 @@ -1589,20 +1589,20 @@ object KafkaConfig { .define(S3PathStyleProp, BOOLEAN, false, HIGH, S3PathStyleDoc) .define(S3BucketProp, STRING, null, HIGH, S3BucketDoc) .define(S3WALPathProp, STRING, null, HIGH, S3WALPathDoc) - .define(S3WALCacheSizeProp, LONG, 209715200L, MEDIUM, S3WALCacheSizeDoc) - .define(S3WALCapacityProp, LONG, 1073741824L, MEDIUM, S3WALCapacityDoc) + .define(S3WALCacheSizeProp, LONG, 2147483648L, MEDIUM, S3WALCacheSizeDoc) + .define(S3WALCapacityProp, LONG, 2147483648L, MEDIUM, S3WALCapacityDoc) .define(S3WALHeaderFlushIntervalSecondsProp, INT, 10, MEDIUM, S3WALHeaderFlushIntervalSecondsDoc) .define(S3WALThreadProp, INT, 8, MEDIUM, S3WALThreadDoc) .define(S3WALQueueProp, INT, 10000, MEDIUM, S3WALQueueDoc) .define(S3WALWindowInitialProp, LONG, 1048576L, MEDIUM, S3WALWindowInitialDoc) .define(S3WALWindowIncrementProp, LONG, 4194304L, MEDIUM, S3WALWindowIncrementDoc) .define(S3WALWindowMaxProp, LONG, 536870912L, MEDIUM, S3WALWindowMaxDoc) - .define(S3WALUploadThresholdProp, LONG, 104857600L, MEDIUM, S3WALUploadThresholdDoc) - .define(S3StreamSplitSizeProp, INT, 16777216, MEDIUM, S3StreamSplitSizeDoc) + .define(S3WALUploadThresholdProp, LONG, 524288000L, MEDIUM, S3WALUploadThresholdDoc) + .define(S3StreamSplitSizeProp, INT, 8388608, MEDIUM, S3StreamSplitSizeDoc) .define(S3ObjectBlockSizeProp, INT, 1048576, MEDIUM, S3ObjectBlockSizeDoc) .define(S3ObjectPartSizeProp, INT, 16777216, MEDIUM, S3ObjectPartSizeDoc) .define(S3BlockCacheSizeProp, LONG, 104857600L, MEDIUM, S3BlockCacheSizeDoc) - .define(S3StreamObjectCompactionIntervalMinutesProp, INT, 60, MEDIUM, S3StreamObjectCompactionIntervalMinutesDoc) + .define(S3StreamObjectCompactionIntervalMinutesProp, INT, 30, MEDIUM, S3StreamObjectCompactionIntervalMinutesDoc) .define(S3StreamObjectCompactionMaxSizeBytesProp, LONG, 10737418240L, MEDIUM, S3StreamObjectCompactionMaxSizeBytesDoc) .define(S3ControllerRequestRetryMaxCountProp, INT, Integer.MAX_VALUE, MEDIUM, S3ControllerRequestRetryMaxCountDoc) .define(S3ControllerRequestRetryBaseDelayMsProp, LONG, 500, MEDIUM, S3ControllerRequestRetryBaseDelayMsDoc)