consolidate current instance type (str), count (interval), cpu% (inte…

…rval) into a new model object
Netflix-Skunkworks · Oct 19, 2023 · 592def6 · 592def6
1 parent 82dd4c5
commit 592def6
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 20 deletions.
diff --git a/service_capacity_modeling/capacity_planner.py b/service_capacity_modeling/capacity_planner.py
@@ -504,10 +504,10 @@ def _plan_certain(
             allowed_drives.update(hardware.drives.keys())
 
         # Get current instance object if exists
-        if desires.current_instance_type is not "":
+        if desires.current_capacity.current_instance_type is not "":
             for instance in hardware.instances.values():
-                if instance.name == desires.current_instance_type:
-                    desires.current_instance = instance
+                if instance.name == desires.current_capacity.current_instance_type:
+                    desires.current_capacity.current_instance = instance
 
         plans = []
         if model.run_hardware_simulation():

diff --git a/service_capacity_modeling/interface.py b/service_capacity_modeling/interface.py
@@ -619,6 +619,13 @@ class DataShape(ExcludeUnsetModel):
     )
 
 
+class CurrentCapacity(ExcludeUnsetModel):
+    current_instance_type: str = ""
+    current_cluster_size: int = 0
+    current_instance: Instance = None  # type: ignore
+    cpu_utilization: Interval = certain_float(0.0)
+
+
 class CapacityDesires(ExcludeUnsetModel):
     # How critical is this cluster, impacts how much "extra" we provision
     # 0 = Critical to the product            (Product does not function)
@@ -633,14 +640,14 @@ class CapacityDesires(ExcludeUnsetModel):
     # What will the state look like
     data_shape: DataShape = DataShape()
 
+    # What is the current microarchitectural/system configuration of the system
+    current_capacity: CurrentCapacity = CurrentCapacity()
+
     # When users are providing latency estimates, what is the typical
     # instance core frequency we are comparing to. Databases use i3s a lot
     # hence this default
     core_reference_ghz: float = 2.3
 
-    current_instance_type: str = ""
-    current_instance: Instance = None  # type: ignore
-
     def merge_with(self, defaults: "CapacityDesires") -> "CapacityDesires":
         # Now merge with the models default
         desires_dict = self.dict(exclude_unset=True)

diff --git a/service_capacity_modeling/models/org/netflix/cassandra.py b/service_capacity_modeling/models/org/netflix/cassandra.py
@@ -64,13 +64,11 @@ def _write_buffer_gib_zone(
 
 def _estimate_cassandra_requirement(
         instance: Instance,
-        max_cpu_utilization: Optional[float],
-        required_cluster_size: Optional[int],
-        current_instance: Optional[Instance],
         desires: CapacityDesires,
         working_set: float,
         reads_per_second: float,
         max_rps_to_disk: int,
+        required_cluster_size: Optional[int] = None,
         zones_per_region: int = 3,
         copies_per_region: int = 3,
 ) -> CapacityRequirement:
@@ -80,8 +78,11 @@ def _estimate_cassandra_requirement(
     return the zonal capacity requirement
     """
     # Keep half of the cores free for background work (compaction, backup, repair)
-    if max_cpu_utilization is not None and current_instance is not None and required_cluster_size is not None:
-        needed_cores = (current_instance.cpu * required_cluster_size * zones_per_region) * (max_cpu_utilization / 20)
+    if desires.current_capacity.cpu_utilization.high is not None \
+            and desires.current_capacity.current_instance is not None \
+            and required_cluster_size is not None:
+        needed_cores = (desires.current_capacity.current_instance.cpu * required_cluster_size *
+                        zones_per_region) * (desires.current_capacity.cpu_utilization.high / 20)
     else:
         needed_cores = sqrt_staffed_cores(desires) * 2
     # Keep half of the bandwidth available for backup
@@ -175,7 +176,6 @@ def _upsert_params(cluster, params):
 # flake8: noqa: C901
 def _estimate_cassandra_cluster_zonal(
         instance: Instance,
-        max_cpu_utilization: Optional[float],
         drive: Drive,
         context: RegionContext,
         desires: CapacityDesires,
@@ -240,13 +240,11 @@ def _estimate_cassandra_cluster_zonal(
 
     requirement = _estimate_cassandra_requirement(
         instance=instance,
-        max_cpu_utilization=max_cpu_utilization,
-        required_cluster_size=required_cluster_size,
-        current_instance=desires.current_instance,
         desires=desires,
         working_set=working_set,
         reads_per_second=rps,
         max_rps_to_disk=max_rps_to_disk,
+        required_cluster_size=required_cluster_size,
         zones_per_region=zones_per_region,
         copies_per_region=copies_per_region,
     )
@@ -502,7 +500,6 @@ def capacity_plan(
         max_table_buffer_percent: float = min(
             0.5, extra_model_arguments.get("max_table_buffer_percent", 0.11)
         )
-        max_cpu_utilization: Optional[float] = extra_model_arguments.get("max_cpu_utilization", None)
 
         # Adjust heap defaults for high write clusters
         if (
@@ -514,7 +511,6 @@ def capacity_plan(
 
         return _estimate_cassandra_cluster_zonal(
             instance=instance,
-            max_cpu_utilization=max_cpu_utilization,
             drive=drive,
             context=context,
             desires=desires,

diff --git a/tests/netflix/test_cassandra.py b/tests/netflix/test_cassandra.py
@@ -1,5 +1,5 @@
 from service_capacity_modeling.capacity_planner import planner
-from service_capacity_modeling.interface import AccessConsistency
+from service_capacity_modeling.interface import AccessConsistency, CurrentCapacity
 from service_capacity_modeling.interface import CapacityDesires
 from service_capacity_modeling.interface import certain_float
 from service_capacity_modeling.interface import certain_int
@@ -310,7 +310,13 @@ def test_plan_certain():
     """
     worn_desire = CapacityDesires(
         service_tier=1,
-        current_instance_type="i4i.8xlarge",
+        current_capacity=CurrentCapacity(
+            current_instance_type="i4i.8xlarge",
+            current_cluster_size=8,
+            cpu_utilization=Interval(
+              low=10.12, mid=13.2, high=14.194801291058118, confidence=1
+            ),
+        ),
         query_pattern=QueryPattern(
             access_pattern=AccessPattern(
                 AccessPattern.latency
@@ -340,9 +346,9 @@ def test_plan_certain():
         desires=worn_desire,
         extra_model_arguments={
             "required_cluster_size": 8,
-            "max_cpu_utilization": 14.194801291058118,
         },
     )
+    print(cap_plan)
 
     lr_clusters = cap_plan[0].candidate_clusters.zonal[0]
     assert lr_clusters.count == 8