Skip to content

Commit

Permalink
CRDB working set tests
Browse files Browse the repository at this point in the history
Just to add a unit test of the current SLOs we are offering for CRDB and
the neccesary working set.
  • Loading branch information
jolynch committed Jul 7, 2023
1 parent ebc7697 commit cc86658
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 22 deletions.
4 changes: 2 additions & 2 deletions service_capacity_modeling/hardware/profiles/shapes/aws.json
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@
"ram_gib": 15.48,
"net_mbps": 781,
"drive": {
"name": "ephem", "size_gib": 436.5,
"name": "ephem", "size_gib": 436,
"read_io_latency_ms": {
"minimum_value":0.05,
"low":0.10, "mid":0.125, "high":0.17,
Expand All @@ -271,7 +271,7 @@
"ram_gib": 30.955,
"net_mbps": 1875,
"drive": {
"name": "ephem", "size_gib": 873.0,
"name": "ephem", "size_gib": 873,
"read_io_latency_ms": {
"minimum_value": 0.05,
"low": 0.10, "mid": 0.125, "high": 0.17,
Expand Down
4 changes: 2 additions & 2 deletions service_capacity_modeling/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,11 +555,11 @@ class DataShape(ExcludeUnsetModel):

# How much fixed memory must be provisioned per instance for the
# application (e.g. for process heap memory)
reserved_instance_app_mem_gib: int = 2
reserved_instance_app_mem_gib: float = 2

# How much fixed memory must be provisioned per instance for the
# system (e.g. for kernel and other system processes)
reserved_instance_system_mem_gib: int = 1
reserved_instance_system_mem_gib: float = 1

# How durable does this dataset need to be. We want to provision
# sufficient replication and backups of data to achieve the target
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
description="Contains utilities for modeling database capacity on a cloud",
packages=setuptools.find_packages(exclude=("tests*", "notebooks*")),
install_requires=[
"pydantic",
"pydantic>=1.0,<2.0",
"scipy",
"numpy",
'importlib_resources; python_version < "3.7"',
Expand Down
69 changes: 52 additions & 17 deletions tests/netflix/test_crdb.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,35 @@
from service_capacity_modeling.capacity_planner import planner
from service_capacity_modeling.hardware import shapes
from service_capacity_modeling.interface import CapacityDesires
from service_capacity_modeling.interface import DataShape
from service_capacity_modeling.interface import FixedInterval
from service_capacity_modeling.interface import Interval
from service_capacity_modeling.interface import QueryPattern
from service_capacity_modeling.models.common import working_set_from_drive_and_slo
from service_capacity_modeling.models.org.netflix import nflx_cockroachdb_capacity_model
from service_capacity_modeling.stats import dist_for_interval


def test_crdb_basic():
basic = CapacityDesires(
service_tier=1,
query_pattern=QueryPattern(
estimated_read_per_second=Interval(
low=100, mid=1000, high=10000, confidence=0.98
),
estimated_write_per_second=Interval(
low=100, mid=1000, high=10000, confidence=0.98
),
simple_desire = CapacityDesires(
service_tier=1,
query_pattern=QueryPattern(
estimated_read_per_second=Interval(
low=100, mid=1000, high=10000, confidence=0.98
),
data_shape=DataShape(
estimated_state_size_gib=Interval(
low=10, mid=100, high=1000, confidence=0.98
),
estimated_write_per_second=Interval(
low=100, mid=1000, high=10000, confidence=0.98
),
)
),
data_shape=DataShape(
estimated_state_size_gib=Interval(low=10, mid=100, high=1000, confidence=0.98),
),
)


def test_crdb_simple():
plan = planner.plan(
model_name="org.netflix.cockroachdb",
region="us-east-1",
desires=basic,
desires=simple_desire,
)

lr = plan.least_regret[0]
Expand All @@ -44,6 +47,38 @@ def test_crdb_basic():
assert lr_cluster.count * lr_cluster.instance.cpu >= 4


def test_crdb_working_set():
ephem = shapes.region("us-east-1").instances["i4i.xlarge"].drive
ebs = shapes.region("us-east-1").drives["gp3"]
super_slow_drive = ebs.copy(deep=True)
# Simulate a very slow drive
super_slow_drive.name = "slow"
super_slow_drive.read_io_latency_ms = FixedInterval(
low=5, mid=8, high=20, confidence=0.9
)

latency_sensitive = nflx_cockroachdb_capacity_model.default_desires(
simple_desire, {}
)
results = {}
for drive in (ephem, ebs, super_slow_drive):
working_set = working_set_from_drive_and_slo(
drive_read_latency_dist=dist_for_interval(drive.read_io_latency_ms),
read_slo_latency_dist=dist_for_interval(
latency_sensitive.query_pattern.read_latency_slo_ms
),
estimated_working_set=None,
# CRDB has looser latency SLOs but we still want a lot of the data
# hot in cache. Target the 95th percentile of disk latency to
# keep in RAM.
target_percentile=0.95,
).mid
results[drive.name] = working_set
assert results["ephem"] < 0.05
assert results["gp3"] < 0.05
assert results["slow"] > 0.5


def test_crdb_footprint():
space = CapacityDesires(
service_tier=1,
Expand Down

0 comments on commit cc86658

Please sign in to comment.