CRDB working set tests

Just to add a unit test of the current SLOs we are offering for CRDB and the neccesary working set.
Netflix-Skunkworks · Jul 7, 2023 · cc86658 · cc86658
1 parent ebc7697
commit cc86658
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 22 deletions.
diff --git a/service_capacity_modeling/hardware/profiles/shapes/aws.json b/service_capacity_modeling/hardware/profiles/shapes/aws.json
@@ -254,7 +254,7 @@
       "ram_gib": 15.48,
       "net_mbps": 781,
       "drive": {
-        "name": "ephem", "size_gib": 436.5,
+        "name": "ephem", "size_gib": 436,
         "read_io_latency_ms": {
             "minimum_value":0.05,
             "low":0.10, "mid":0.125, "high":0.17,
@@ -271,7 +271,7 @@
       "ram_gib": 30.955,
       "net_mbps": 1875,
       "drive": {
-        "name": "ephem", "size_gib": 873.0,
+        "name": "ephem", "size_gib": 873,
         "read_io_latency_ms": {
             "minimum_value": 0.05,
             "low": 0.10, "mid": 0.125, "high": 0.17,

diff --git a/service_capacity_modeling/interface.py b/service_capacity_modeling/interface.py
@@ -555,11 +555,11 @@ class DataShape(ExcludeUnsetModel):
 
     # How much fixed memory must be provisioned per instance for the
     # application (e.g. for process heap memory)
-    reserved_instance_app_mem_gib: int = 2
+    reserved_instance_app_mem_gib: float = 2
 
     # How much fixed memory must be provisioned per instance for the
     # system (e.g. for kernel and other system processes)
-    reserved_instance_system_mem_gib: int = 1
+    reserved_instance_system_mem_gib: float = 1
 
     # How durable does this dataset need to be. We want to provision
     # sufficient replication and backups of data to achieve the target

diff --git a/setup.py b/setup.py
@@ -9,7 +9,7 @@
     description="Contains utilities for modeling database capacity on a cloud",
     packages=setuptools.find_packages(exclude=("tests*", "notebooks*")),
     install_requires=[
-        "pydantic",
+        "pydantic>=1.0,<2.0",
         "scipy",
         "numpy",
         'importlib_resources; python_version < "3.7"',

diff --git a/tests/netflix/test_crdb.py b/tests/netflix/test_crdb.py
@@ -1,32 +1,35 @@
 from service_capacity_modeling.capacity_planner import planner
+from service_capacity_modeling.hardware import shapes
 from service_capacity_modeling.interface import CapacityDesires
 from service_capacity_modeling.interface import DataShape
+from service_capacity_modeling.interface import FixedInterval
 from service_capacity_modeling.interface import Interval
 from service_capacity_modeling.interface import QueryPattern
+from service_capacity_modeling.models.common import working_set_from_drive_and_slo
+from service_capacity_modeling.models.org.netflix import nflx_cockroachdb_capacity_model
+from service_capacity_modeling.stats import dist_for_interval
 
-
-def test_crdb_basic():
-    basic = CapacityDesires(
-        service_tier=1,
-        query_pattern=QueryPattern(
-            estimated_read_per_second=Interval(
-                low=100, mid=1000, high=10000, confidence=0.98
-            ),
-            estimated_write_per_second=Interval(
-                low=100, mid=1000, high=10000, confidence=0.98
-            ),
+simple_desire = CapacityDesires(
+    service_tier=1,
+    query_pattern=QueryPattern(
+        estimated_read_per_second=Interval(
+            low=100, mid=1000, high=10000, confidence=0.98
         ),
-        data_shape=DataShape(
-            estimated_state_size_gib=Interval(
-                low=10, mid=100, high=1000, confidence=0.98
-            ),
+        estimated_write_per_second=Interval(
+            low=100, mid=1000, high=10000, confidence=0.98
         ),
-    )
+    ),
+    data_shape=DataShape(
+        estimated_state_size_gib=Interval(low=10, mid=100, high=1000, confidence=0.98),
+    ),
+)
 
+
+def test_crdb_simple():
     plan = planner.plan(
         model_name="org.netflix.cockroachdb",
         region="us-east-1",
-        desires=basic,
+        desires=simple_desire,
     )
 
     lr = plan.least_regret[0]
@@ -44,6 +47,38 @@ def test_crdb_basic():
     assert lr_cluster.count * lr_cluster.instance.cpu >= 4
 
 
+def test_crdb_working_set():
+    ephem = shapes.region("us-east-1").instances["i4i.xlarge"].drive
+    ebs = shapes.region("us-east-1").drives["gp3"]
+    super_slow_drive = ebs.copy(deep=True)
+    # Simulate a very slow drive
+    super_slow_drive.name = "slow"
+    super_slow_drive.read_io_latency_ms = FixedInterval(
+        low=5, mid=8, high=20, confidence=0.9
+    )
+
+    latency_sensitive = nflx_cockroachdb_capacity_model.default_desires(
+        simple_desire, {}
+    )
+    results = {}
+    for drive in (ephem, ebs, super_slow_drive):
+        working_set = working_set_from_drive_and_slo(
+            drive_read_latency_dist=dist_for_interval(drive.read_io_latency_ms),
+            read_slo_latency_dist=dist_for_interval(
+                latency_sensitive.query_pattern.read_latency_slo_ms
+            ),
+            estimated_working_set=None,
+            # CRDB has looser latency SLOs but we still want a lot of the data
+            # hot in cache. Target the 95th percentile of disk latency to
+            # keep in RAM.
+            target_percentile=0.95,
+        ).mid
+        results[drive.name] = working_set
+    assert results["ephem"] < 0.05
+    assert results["gp3"] < 0.05
+    assert results["slow"] > 0.5
+
+
 def test_crdb_footprint():
     space = CapacityDesires(
         service_tier=1,