Merge pull request #53 from Netflix-Skunkworks/feature/shengweiw/auro…

…ra-hardware Feature/shengweiw/aurora hardware
Netflix-Skunkworks · May 22, 2023 · 46eb1f5 · 46eb1f5
2 parents 5853c76 + a6dc62a
commit 46eb1f5
Show file tree

Hide file tree

Showing 5 changed files with 108 additions and 15 deletions.
diff --git a/service_capacity_modeling/hardware/profiles/pricing/aws/3yr-reserved.json b/service_capacity_modeling/hardware/profiles/pricing/aws/3yr-reserved.json
@@ -60,7 +60,15 @@
       "r5dn.xlarge":   {"annual_cost": 1037},
       "r5dn.2xlarge":  {"annual_cost": 2073.6},
       "r5dn.4xlarge":  {"annual_cost": 4147.3},
-      "r5dn.8xlarge":  {"annual_cost": 8295}
+      "r5dn.8xlarge":  {"annual_cost": 8295},
+      "db.r5.large":     {"annual_cost": 885.7},
+      "db.r5.xlarge":    {"annual_cost": 1771.3},
+      "db.r5.2xlarge":   {"annual_cost": 3543},
+      "db.r5.4xlarge":   {"annual_cost": 7086},
+      "db.r5.8xlarge":   {"annual_cost": 14171.7},
+      "db.r5.12xlarge":   {"annual_cost": 21258},
+      "db.r5.16xlarge":   {"annual_cost": 28343.3},
+      "db.r5.24xlarge":   {"annual_cost": 42516}
     },
     "drives": {
       "gp2": {"annual_cost_per_gib": 1.2},

diff --git a/service_capacity_modeling/hardware/profiles/shapes/aws.json b/service_capacity_modeling/hardware/profiles/shapes/aws.json
@@ -718,6 +718,70 @@
         "read_io_per_s": 466666, "write_io_per_s": 233333,
         "block_size_kib": 4, "single_tenant": true
       }
+    },
+    "db.r5.large": {
+      "name": "db.r5.large",
+      "cpu": 2,
+      "cpu_ghz": 3.1,
+      "ram_gib": 15.71,
+      "net_mbps": 500,
+      "drive": null
+    },
+    "db.r5.xlarge": {
+      "name": "db.r5.xlarge",
+      "cpu": 4,
+      "cpu_ghz": 3.1,
+      "ram_gib": 31.65,
+      "net_mbps": 1000,
+      "drive": null
+    },
+    "db.r5.2xlarge": {
+      "name": "db.r5.2xlarge",
+      "cpu": 8,
+      "cpu_ghz": 3.1,
+      "ram_gib": 63.62,
+      "net_mbps": 2000,
+      "drive": null
+    },
+    "db.r5.4xlarge": {
+      "name": "db.r5.4xlarge",
+      "cpu": 16,
+      "cpu_ghz": 3.1,
+      "ram_gib": 128,
+      "net_mbps": 4000,
+      "drive": null
+    },
+    "db.r5.8xlarge": {
+      "name": "db.r5.8xlarge",
+      "cpu": 32,
+      "cpu_ghz": 3.1,
+      "ram_gib": 256,
+      "net_mbps": 10000,
+      "drive": null
+    },
+    "db.r5.12xlarge": {
+      "name": "db.r5.12xlarge",
+      "cpu": 48,
+      "cpu_ghz": 3.1,
+      "ram_gib": 384,
+      "net_mbps": 10000,
+      "drive": null
+    },
+    "db.r5.16xlarge": {
+      "name": "db.r5.16xlarge",
+      "cpu": 64,
+      "cpu_ghz": 3.1,
+      "ram_gib": 512,
+      "net_mbps": 13600,
+      "drive": null
+    },
+    "db.r5.24xlarge": {
+      "name": "db.r5.24xlarge",
+      "cpu": 96,
+      "cpu_ghz": 3.1,
+      "ram_gib": 768,
+      "net_mbps": 19000,
+      "drive": null
     }
   },
   "drives": {

diff --git a/service_capacity_modeling/interface.py b/service_capacity_modeling/interface.py
@@ -276,11 +276,11 @@ class Instance(ExcludeUnsetModel):
 
     @property
     def family(self):
-        return self.name.split(self.family_separator)[0]
+        return self.name[:self.name.rindex(self.family_separator)]
 
     @property
     def size(self):
-        return self.name.split(self.family_separator)[1]
+        return self.name.split(self.family_separator)[-1]
 
 
 class Service(ExcludeUnsetModel):

diff --git a/service_capacity_modeling/models/org/netflix/aurora.py b/service_capacity_modeling/models/org/netflix/aurora.py
@@ -91,6 +91,24 @@ def _rds_required_disk_ios(disk_size_gib: int, db_type: str, btree_fan_out: int
     return math.log(pages, btree_fan_out)
 
 
+# This is a start, we should iterate based on the actual work load
+def _estimate_io_cost(db_type: str, desires, read_io_price: float, write_io_price: float, cache_hit_rate: float = 0.8):
+    if db_type == "postgres":
+        read_byte_per_io = 8192
+    else:
+        read_byte_per_io = 16384
+
+    write_byte_per_io = 4096
+
+    r_io = desires.query_pattern.estimated_read_per_second.mid * math.ceil(desires.query_pattern.estimated_mean_read_size_bytes.mid / read_byte_per_io)
+    # Assuming write can be batched
+    w_io = desires.query_pattern.estimated_write_per_second.mid * desires.query_pattern.estimated_mean_write_size_bytes.mid / write_byte_per_io
+
+    r_cost = r_io * (1 - cache_hit_rate) * read_io_price
+    w_cost = w_io * write_io_price
+    return r_cost + w_cost
+
+
 def _compute_aurora_region(
     instance: Instance,
     drive: Drive,  # always to be Aurora Storage
@@ -101,6 +119,8 @@ def _compute_aurora_region(
     required_disk_ios,
     required_disk_space,
     core_reference_ghz: float,
+    db_type:str,
+    desires:CapacityDesires
 ) -> Optional[RegionClusterCapacity]:
     """Computes a regional cluster of a Aurora service
 
@@ -129,10 +149,9 @@ def _compute_aurora_region(
     attached_drive.size_gib = max(1, required_disk_space(needed_disk_gib))  # todo: Figure out the IO vs disk
     attached_drives.append(attached_drive)
 
-    # print(f"hardware {attached_drive}, driver cost: {attached_drive.annual_cost}")
-
-    # todo: add IO cost
-    total_annual_cost = instance.annual_cost + attached_drive.annual_cost
+    io_cost = _estimate_io_cost(db_type, desires, drive.annual_cost_per_read_io[0][1],
+                                drive.annual_cost_per_write_io[0][1])
+    total_annual_cost = instance.annual_cost + attached_drive.annual_cost + io_cost
 
     logger.debug(
         "For (cpu, memory_gib, disk_gib) = (%s, %s, %s) need ( %s, %s, %s)",
@@ -161,7 +180,7 @@ def _estimate_aurora_regional(
     extra_model_arguments: Dict[str, Any],
 ) -> Optional[CapacityPlan]:
     instance_family = instance.family
-    if instance_family not in ("x2g", "r6g", "r6i", "r5", "t4g"):  # TODO: split db instance and ec2 instance
+    if instance_family not in ("db.x2g", "db.r6g", "db.r6i", "db.r5", "db.t4g"):
         return None
 
     if drive.name != "aurora":
@@ -186,6 +205,8 @@ def _estimate_aurora_regional(
         * math.ceil(0.1 * rps),
         required_disk_space=lambda x: x * 1.2,  # Unscientific random guess!
         core_reference_ghz=requirement.core_reference_ghz,
+        db_type=db_type,
+        desires=desires
     )
 
     if not cluster:

diff --git a/tests/netflix/test_aurora.py b/tests/netflix/test_aurora.py
@@ -93,17 +93,17 @@ def test_small_footprint():
         region="us-east-1",
         desires=small_footprint,
     )
-    assert cap_plan[0].candidate_clusters.regional[0].instance.name == "r5.xlarge"
+    assert cap_plan[0].candidate_clusters.regional[0].instance.name == "db.r5.xlarge"
 
-    # two instance plus storage
-    assert cap_plan[0].candidate_clusters.annual_costs["aurora-cluster.regional-clusters"] == 830 * 2 + 60 * 1.2
+    # two instance plus storage and io
+    assert (3500 < cap_plan[0].candidate_clusters.annual_costs["aurora-cluster.regional-clusters"] < 4500)
 
 
 def test_medium_footprint():
     cap_plan = planner.plan_certain(
         model_name="org.netflix.aurora", region="us-east-1", desires=mid_footprint
     )
-    assert cap_plan[0].candidate_clusters.regional[0].instance.name == "r5.8xlarge"
+    assert cap_plan[0].candidate_clusters.regional[0].instance.name == "db.r5.8xlarge"
 
 
 def test_large_footprint():
@@ -112,7 +112,7 @@ def test_large_footprint():
         region="us-east-1",
         desires=large_footprint,
     )
-    assert cap_plan[0].candidate_clusters.regional[0].instance.name == "r5.8xlarge"
+    assert cap_plan[0].candidate_clusters.regional[0].instance.name == "db.r5.8xlarge"
 
 
 def test_tier_3():
@@ -121,7 +121,7 @@ def test_tier_3():
         region="us-east-1",
         desires=tier_3,
     )
-    assert cap_plan[0].candidate_clusters.regional[0].instance.name == "r5.4xlarge"
+    assert cap_plan[0].candidate_clusters.regional[0].instance.name == "db.r5.4xlarge"
 
 
 def test_cap_plan():
@@ -222,4 +222,4 @@ def test_cap_plan():
         region="us-east-1",
         desires=my_desire,
     )
-    assert cap_plan[0].candidate_clusters.regional[0].instance.name == "r5.8xlarge"
+    assert cap_plan[0].candidate_clusters.regional[0].instance.name == "db.r5.8xlarge"