Skip to content

Commit

Permalink
Promotes gp3 to be GA and properly does IO calcs
Browse files Browse the repository at this point in the history
Now that Cassandra can provision with gp3, let's start recommending it
from models.
  • Loading branch information
jolynch committed Jun 23, 2023
1 parent 5337617 commit ebc7697
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 8 deletions.
7 changes: 5 additions & 2 deletions service_capacity_modeling/hardware/profiles/shapes/aws.json
Original file line number Diff line number Diff line change
Expand Up @@ -797,21 +797,24 @@
"name": "gp2",
"read_io_latency_ms": {"low": 0.8, "mid": 1.05, "high": 1.8, "maximum_value": 10, "confidence": 0.90},
"write_io_latency_ms": {"low": 1.2, "mid": 2, "high": 4, "maximum_value": 20, "confidence": 0.90},
"max_scale_size_gib": 16384, "block_size_kib": 16
"max_scale_size_gib": 16384, "block_size_kib": 16,
"max_scale_io_per_s": 16000
},
"io2": {
"name": "io2",
"read_io_latency_ms": {"low": 0.5, "mid": 0.8, "high": 1.2, "maximum_value": 2, "confidence": 0.90},
"write_io_latency_ms": {"low": 0.9, "mid": 1.2, "high": 2, "maximum_value": 4, "confidence": 0.90},
"max_scale_size_gib": 16384, "block_size_kib": 16,
"max_scale_io_per_s": 64000,
"lifecycle": "alpha"
},
"gp3": {
"name": "gp3",
"read_io_latency_ms": {"low": 0.8, "mid": 1.05, "high": 1.8, "maximum_value": 10, "confidence": 0.90},
"write_io_latency_ms": {"low": 1.2, "mid": 2, "high": 4, "maximum_value": 20, "confidence": 0.90},
"max_scale_size_gib": 16384, "block_size_kib": 16,
"lifecycle": "alpha"
"max_scale_io_per_s": 16000,
"lifecycle": "stable"
},
"aurora": {
"name": "aurora",
Expand Down
10 changes: 10 additions & 0 deletions service_capacity_modeling/interface.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import json
import sys
from decimal import Decimal
from enum import Enum
from functools import lru_cache
Expand Down Expand Up @@ -199,6 +200,8 @@ class Drive(ExcludeUnsetModel):
single_tenant: bool = True
# If this drive can scale, how large can it scale to
max_scale_size_gib: int = 0
# If this drive can scale IO, how large can it scale to
max_scale_io_per_s: int = 0

lifecycle: Lifecycle = Lifecycle.stable
compatible_families: List[str] = []
Expand All @@ -225,6 +228,13 @@ def max_size_gib(self):
else:
return self.size_gib

@property
def max_io_per_s(self):
if self.max_scale_io_per_s != 0:
return self.max_scale_io_per_s
else:
return sys.maxsize

@property
def annual_cost(self):
size = self.size_gib or 0
Expand Down
11 changes: 9 additions & 2 deletions service_capacity_modeling/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,13 @@ def compute_stateful_zone(
utils.next_n(read_io, n=200),
utils.next_n(write_io, n=200),
)
if (read_io + write_io) > drive.max_io_per_s:
ratio = (read_io + write_io) / drive.max_io_per_s
count = max(cluster_size(math.ceil(count * ratio)), min_count)
cost = count * instance.annual_cost
read_io = utils.next_n(read_io * ratio, n=200)
write_io = utils.next_n(write_io * ratio, n=200)

attached_drive = drive.copy()
attached_drive.size_gib = ebs_gib
attached_drive.read_io_per_s = int(round(read_io, 2))
Expand Down Expand Up @@ -326,9 +333,9 @@ def gp2_gib_for_io(read_ios) -> int:
return int(max(1, read_ios // 3))


def cloud_gib_for_io(drive, read_ios, space_gib) -> int:
def cloud_gib_for_io(drive, total_ios, space_gib) -> int:
if drive.name == "gp2":
return gp2_gib_for_io(read_ios)
return gp2_gib_for_io(total_ios)
else:
return space_gib

Expand Down
22 changes: 19 additions & 3 deletions service_capacity_modeling/models/org/netflix/cassandra.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ def _upsert_params(cluster, params):


# pylint: disable=too-many-locals
# pylint: disable=too-many-return-statements
# flake8: noqa: C901
def _estimate_cassandra_cluster_zonal(
instance: Instance,
Expand All @@ -174,6 +175,7 @@ def _estimate_cassandra_cluster_zonal(
zones_per_region: int = 3,
copies_per_region: int = 3,
require_local_disks: bool = False,
require_attached_disks: bool = False,
required_cluster_size: Optional[int] = None,
max_rps_to_disk: int = 500,
max_local_disk_gib: int = 2048,
Expand All @@ -190,8 +192,12 @@ def _estimate_cassandra_cluster_zonal(
if instance.drive is None and require_local_disks:
return None

# Cassandra only deploys on gp2 drives right now
if drive.name != "gp2":
# if we're not allowed to use local disks, skip ephems
if instance.drive is not None and require_attached_disks:
return None

# Cassandra only deploys on gp2 and gp3 drives right now
if drive.name not in ("gp2", "gp3"):
return None

rps = desires.query_pattern.estimated_read_per_second.mid // zones_per_region
Expand All @@ -201,11 +207,13 @@ def _estimate_cassandra_cluster_zonal(
write_bytes_per_sec = (
write_per_sec * desires.query_pattern.estimated_mean_write_size_bytes.mid
)
read_bytes_per_sec = rps * desires.query_pattern.estimated_mean_read_size_bytes.mid
# Write IO will be 1 to commitlog + 2 writes (plus 2 reads) in the first
# hour during compaction.
# https://aws.amazon.com/ebs/volume-types/ says IOPS are 16k for io2/gp2
# so for now we're just hardcoding.
write_io_per_sec = (1 + 4) * max(1, write_bytes_per_sec // 16384)
read_io_per_sec = max(rps, read_bytes_per_sec // 16384)

# Based on the disk latency and the read latency SLOs we adjust our
# working set to keep more or less data in RAM. Faster drives need
Expand Down Expand Up @@ -262,7 +270,7 @@ def _estimate_cassandra_cluster_zonal(
# Take into account the reads per read
# from the per node dataset using leveled compaction
required_disk_ios=lambda size, count: (
_cass_io_per_read(size) * math.ceil(rps / count),
_cass_io_per_read(size) * math.ceil(read_io_per_sec / count),
write_io_per_sec / count,
),
# C* requires ephemeral disks to be 25% full because compaction
Expand Down Expand Up @@ -418,6 +426,10 @@ class NflxCassandraArguments(BaseModel):
default=False,
description="If local (ephemeral) drives are required",
)
require_attached_disks: bool = Field(
default=False,
description="If attached (ebs) drives are required",
)
required_cluster_size: Optional[int] = Field(
default=None,
description="Require zonal clusters to be this size (force vertical scaling)",
Expand Down Expand Up @@ -464,6 +476,9 @@ def capacity_plan(
require_local_disks: bool = extra_model_arguments.get(
"require_local_disks", False
)
require_attached_disks: bool = extra_model_arguments.get(
"require_attached_disks", False
)
required_cluster_size: Optional[int] = extra_model_arguments.get(
"required_cluster_size", None
)
Expand Down Expand Up @@ -493,6 +508,7 @@ def capacity_plan(
zones_per_region=context.zones_in_region,
copies_per_region=copies_per_region,
require_local_disks=require_local_disks,
require_attached_disks=require_attached_disks,
required_cluster_size=required_cluster_size,
max_rps_to_disk=max_rps_to_disk,
max_regional_size=max_regional_size,
Expand Down
66 changes: 66 additions & 0 deletions tests/netflix/test_cassandra.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,72 @@ def test_capacity_small_fast():
assert small_result.cluster_params["cassandra.heap.table.percent"] == 0.11


def test_ebs_high_reads():
cap_plan = planner.plan_certain(
model_name="org.netflix.cassandra",
region="us-east-1",
desires=CapacityDesires(
service_tier=1,
query_pattern=QueryPattern(
estimated_read_per_second=certain_int(100_000),
estimated_write_per_second=certain_int(1_000),
),
data_shape=DataShape(
estimated_state_size_gib=certain_int(1_000),
),
),
extra_model_arguments={"require_attached_disks": True},
)[0]
result = cap_plan.candidate_clusters.zonal[0]

cores = result.count * result.instance.cpu
assert 64 <= cores <= 128
# Should get gp3
assert result.attached_drives[0].name == "gp3"
# 1TiB / ~32 nodes
assert result.attached_drives[0].read_io_per_s is not None
ios = result.attached_drives[0].read_io_per_s * result.count
# Each zone is handling ~33k reads per second, so total disk ios should be < 3x that
# 3 from each level
assert 100_000 < ios < 400_000


def test_ebs_high_writes():
cap_plan = planner.plan_certain(
model_name="org.netflix.cassandra",
region="us-east-1",
desires=CapacityDesires(
service_tier=1,
query_pattern=QueryPattern(
estimated_read_per_second=certain_int(10_000),
estimated_write_per_second=certain_int(100_000),
estimated_mean_write_size_bytes=certain_int(1024 * 8),
),
data_shape=DataShape(
estimated_state_size_gib=certain_int(10_000),
),
),
extra_model_arguments={"require_attached_disks": True},
)[0]
result = cap_plan.candidate_clusters.zonal[0]

cores = result.count * result.instance.cpu
assert 128 <= cores <= 512
# Should get gp3
assert result.attached_drives[0].name == "gp3"
# 1TiB / ~32 nodes
assert result.attached_drives[0].read_io_per_s is not None
assert result.attached_drives[0].write_io_per_s is not None

read_ios = result.attached_drives[0].read_io_per_s * result.count
write_ios = result.attached_drives[0].write_io_per_s * result.count

# 10TiB ~= 4 IO/read -> 3.3k r/zone/s -> 12k /s
assert 20_000 < read_ios < 60_000
# 33k wps * 8KiB / 16KiB write IO size = 16.5k / s * 4 for compaction = 64k
assert 60_000 < write_ios < 100_000


def test_capacity_high_writes():
cap_plan = planner.plan_certain(
model_name="org.netflix.cassandra",
Expand Down
2 changes: 1 addition & 1 deletion tests/netflix/test_cassandra_uncertain.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def test_worn_dataset():
assert lr_cluster.instance.name.startswith(
"m5."
) or lr_cluster.instance.name.startswith("r5.")
assert lr_cluster.attached_drives[0].name == "gp2"
assert lr_cluster.attached_drives[0].name == "gp3"
# gp2 should not provision massive drives, prefer to upcolor
assert lr_cluster.attached_drives[0].size_gib < 9000
assert lr_cluster.attached_drives[0].size_gib * lr_cluster.count * 3 > 204800
Expand Down

0 comments on commit ebc7697

Please sign in to comment.