Netflix-Skunkworks · tcdevoe · Dec 29, 2023 · Oct 19, 2023
diff --git a/service_capacity_modeling/models/org/netflix/entity.py b/service_capacity_modeling/models/org/netflix/entity.py
@@ -59,6 +59,33 @@ def extra_model_arguments_schema() -> Dict[str, Any]:
     def compose_with(
         user_desires: CapacityDesires, extra_model_arguments: Dict[str, Any]
     ) -> Tuple[Tuple[str, Callable[[CapacityDesires], CapacityDesires]], ...]:
+        def _modify_crdb_desires(
+            user_desires: CapacityDesires,
+        ) -> CapacityDesires:
+            relaxed = user_desires.copy(deep=True)
+            item_count = relaxed.data_shape.estimated_state_item_count
+            # based on the nts cluster where the version store is 10x the prime store
+            if item_count is None:
+                # assume 10 KB items
+                if (
+                    user_desires.query_pattern.estimated_mean_write_size_bytes
+                    is not None
+                ):
+                    item_size_gib = (
+                        user_desires.query_pattern.estimated_mean_write_size_bytes.mid
+                        / 1024**3
+                    )
+                else:
+                    item_size_gib = 10 / 1024**2
+                item_count = user_desires.data_shape.estimated_state_size_gib.scale(
+                    1 / item_size_gib
+                )
+            # assume 512 B to track the id/version of each item
+            relaxed.data_shape.estimated_state_size_gib = item_count.scale(
+                512 / 1024**3
+            )
+            return relaxed
+
         def _modify_elasticsearch_desires(
             user_desires: CapacityDesires,
         ) -> CapacityDesires:
@@ -69,7 +96,8 @@ def _modify_elasticsearch_desires(
             return relaxed
 
         return (
-            ("org.netflix.cassandra", lambda x: x),
+            ("org.netflix.cockroachdb", _modify_crdb_desires),
+            ("org.netflix.key-value", lambda x: x),
             ("org.netflix.elasticsearch", _modify_elasticsearch_desires),
         )
 

diff --git a/tests/netflix/test_entity.py b/tests/netflix/test_entity.py
@@ -18,10 +18,84 @@ def test_entity_increasing_qps_simple():
                 estimated_write_per_second=Interval(
                     low=qps // 10, mid=qps, high=qps * 10, confidence=0.98
                 ),
+                estimated_mean_write_size_bytes=Interval(
+                    low=1024, mid=1024 * 10, high=1024 * 100, confidence=0.98
+                ),
+            ),
+            data_shape=DataShape(
+                estimated_state_item_count=Interval(
+                    low=1000000, mid=10000000, high=100000000, confidence=0.98
+                ),
+            ),
+        )
+
+        cap_plan = planner.plan(
+            model_name="org.netflix.entity",
+            region="us-east-1",
+            desires=simple,
+            simulations=256,
+        )
+
+        # the set of cluster types the planner chose
+        types = {
+            c.cluster_type
+            for c in list(cap_plan.least_regret[0].candidate_clusters.regional)
+            + list(cap_plan.least_regret[0].candidate_clusters.zonal)
+        }
+        assert sorted(types) == [
+            "cassandra",
+            "cockroachdb",
+            "dgwentity",
+            "dgwkv",
+            "elasticsearch-data",
+            "elasticsearch-master",
+            "elasticsearch-search",
+        ]
+
+        # Check the Java cluster
+        entity_plan = next(
+            filter(
+                lambda c: c.cluster_type == "dgwentity",
+                cap_plan.least_regret[0].candidate_clusters.regional,
+            )
+        )
+        entity_results_trend.append((entity_plan.count * entity_plan.instance.cpu,))
+        # We just want ram and cpus for a java app
+        assert entity_plan.instance.family[0] in ("m", "r")
+        # We should never be paying for ephemeral drives
+        assert entity_plan.instance.drive is None
+        # CRDB disk usage should be num items * 512 bytes/per item ~= 6 GB (rounded up)
+
+        for c in cap_plan.least_regret[0].requirements.zonal:
+            if c.requirement_type == "crdb-zonal":
+                assert c.disk_gib.mid == 6.0
+
+    # Should have more capacity as requirement increases
+    x = [r[0] for r in entity_results_trend]
+    assert x[0] < x[-1]
+    assert sorted(x) == x
+
+
+def test_entity_increasing_qps_item_count_unset():
+    qps_values = (100, 1000, 10_000, 100_000)
+    entity_results_trend = []
+    for qps in qps_values:
+        simple = CapacityDesires(
+            service_tier=1,
+            query_pattern=QueryPattern(
+                estimated_read_per_second=Interval(
+                    low=qps // 10, mid=qps, high=qps * 10, confidence=0.98
+                ),
+                estimated_write_per_second=Interval(
+                    low=qps // 10, mid=qps, high=qps * 10, confidence=0.98
+                ),
+                estimated_mean_write_size_bytes=Interval(
+                    low=1024, mid=1024 * 10, high=1024 * 100, confidence=0.98
+                ),
             ),
             data_shape=DataShape(
                 estimated_state_size_gib=Interval(
-                    low=20, mid=200, high=2000, confidence=0.98
+                    low=10, mid=100, high=1000, confidence=0.98
                 ),
             ),
         )
@@ -33,6 +107,22 @@ def test_entity_increasing_qps_simple():
             simulations=256,
         )
 
+        # the set of cluster types the planner chose
+        types = {
+            c.cluster_type
+            for c in list(cap_plan.least_regret[0].candidate_clusters.regional)
+            + list(cap_plan.least_regret[0].candidate_clusters.zonal)
+        }
+        assert sorted(types) == [
+            "cassandra",
+            "cockroachdb",
+            "dgwentity",
+            "dgwkv",
+            "elasticsearch-data",
+            "elasticsearch-master",
+            "elasticsearch-search",
+        ]
+
         # Check the Java cluster
         entity_plan = next(
             filter(
@@ -45,6 +135,11 @@ def test_entity_increasing_qps_simple():
         assert entity_plan.instance.family[0] in ("m", "r")
         # We should never be paying for ephemeral drives
         assert entity_plan.instance.drive is None
+        # CRDB disk usage should be num items * 512 bytes/per item ~= 7 GB (rounded up)
+
+        for c in cap_plan.least_regret[0].requirements.zonal:
+            if c.requirement_type == "crdb-zonal":
+                assert c.disk_gib.mid == 7.0
 
     # Should have more capacity as requirement increases
     x = [r[0] for r in entity_results_trend]