Skip to content

Commit

Permalink
Merge pull request #53 from Netflix-Skunkworks/feature/shengweiw/auro…
Browse files Browse the repository at this point in the history
…ra-hardware

Feature/shengweiw/aurora hardware
  • Loading branch information
ShengweiWang committed May 22, 2023
2 parents 5853c76 + a6dc62a commit 46eb1f5
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,15 @@
"r5dn.xlarge": {"annual_cost": 1037},
"r5dn.2xlarge": {"annual_cost": 2073.6},
"r5dn.4xlarge": {"annual_cost": 4147.3},
"r5dn.8xlarge": {"annual_cost": 8295}
"r5dn.8xlarge": {"annual_cost": 8295},
"db.r5.large": {"annual_cost": 885.7},
"db.r5.xlarge": {"annual_cost": 1771.3},
"db.r5.2xlarge": {"annual_cost": 3543},
"db.r5.4xlarge": {"annual_cost": 7086},
"db.r5.8xlarge": {"annual_cost": 14171.7},
"db.r5.12xlarge": {"annual_cost": 21258},
"db.r5.16xlarge": {"annual_cost": 28343.3},
"db.r5.24xlarge": {"annual_cost": 42516}
},
"drives": {
"gp2": {"annual_cost_per_gib": 1.2},
Expand Down
64 changes: 64 additions & 0 deletions service_capacity_modeling/hardware/profiles/shapes/aws.json
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,70 @@
"read_io_per_s": 466666, "write_io_per_s": 233333,
"block_size_kib": 4, "single_tenant": true
}
},
"db.r5.large": {
"name": "db.r5.large",
"cpu": 2,
"cpu_ghz": 3.1,
"ram_gib": 15.71,
"net_mbps": 500,
"drive": null
},
"db.r5.xlarge": {
"name": "db.r5.xlarge",
"cpu": 4,
"cpu_ghz": 3.1,
"ram_gib": 31.65,
"net_mbps": 1000,
"drive": null
},
"db.r5.2xlarge": {
"name": "db.r5.2xlarge",
"cpu": 8,
"cpu_ghz": 3.1,
"ram_gib": 63.62,
"net_mbps": 2000,
"drive": null
},
"db.r5.4xlarge": {
"name": "db.r5.4xlarge",
"cpu": 16,
"cpu_ghz": 3.1,
"ram_gib": 128,
"net_mbps": 4000,
"drive": null
},
"db.r5.8xlarge": {
"name": "db.r5.8xlarge",
"cpu": 32,
"cpu_ghz": 3.1,
"ram_gib": 256,
"net_mbps": 10000,
"drive": null
},
"db.r5.12xlarge": {
"name": "db.r5.12xlarge",
"cpu": 48,
"cpu_ghz": 3.1,
"ram_gib": 384,
"net_mbps": 10000,
"drive": null
},
"db.r5.16xlarge": {
"name": "db.r5.16xlarge",
"cpu": 64,
"cpu_ghz": 3.1,
"ram_gib": 512,
"net_mbps": 13600,
"drive": null
},
"db.r5.24xlarge": {
"name": "db.r5.24xlarge",
"cpu": 96,
"cpu_ghz": 3.1,
"ram_gib": 768,
"net_mbps": 19000,
"drive": null
}
},
"drives": {
Expand Down
4 changes: 2 additions & 2 deletions service_capacity_modeling/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,11 +276,11 @@ class Instance(ExcludeUnsetModel):

@property
def family(self):
return self.name.split(self.family_separator)[0]
return self.name[:self.name.rindex(self.family_separator)]

@property
def size(self):
return self.name.split(self.family_separator)[1]
return self.name.split(self.family_separator)[-1]


class Service(ExcludeUnsetModel):
Expand Down
31 changes: 26 additions & 5 deletions service_capacity_modeling/models/org/netflix/aurora.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,24 @@ def _rds_required_disk_ios(disk_size_gib: int, db_type: str, btree_fan_out: int
return math.log(pages, btree_fan_out)


# This is a start, we should iterate based on the actual work load
def _estimate_io_cost(db_type: str, desires, read_io_price: float, write_io_price: float, cache_hit_rate: float = 0.8):
if db_type == "postgres":
read_byte_per_io = 8192
else:
read_byte_per_io = 16384

write_byte_per_io = 4096

r_io = desires.query_pattern.estimated_read_per_second.mid * math.ceil(desires.query_pattern.estimated_mean_read_size_bytes.mid / read_byte_per_io)
# Assuming write can be batched
w_io = desires.query_pattern.estimated_write_per_second.mid * desires.query_pattern.estimated_mean_write_size_bytes.mid / write_byte_per_io

r_cost = r_io * (1 - cache_hit_rate) * read_io_price
w_cost = w_io * write_io_price
return r_cost + w_cost


def _compute_aurora_region(
instance: Instance,
drive: Drive, # always to be Aurora Storage
Expand All @@ -101,6 +119,8 @@ def _compute_aurora_region(
required_disk_ios,
required_disk_space,
core_reference_ghz: float,
db_type:str,
desires:CapacityDesires
) -> Optional[RegionClusterCapacity]:
"""Computes a regional cluster of a Aurora service
Expand Down Expand Up @@ -129,10 +149,9 @@ def _compute_aurora_region(
attached_drive.size_gib = max(1, required_disk_space(needed_disk_gib)) # todo: Figure out the IO vs disk
attached_drives.append(attached_drive)

# print(f"hardware {attached_drive}, driver cost: {attached_drive.annual_cost}")

# todo: add IO cost
total_annual_cost = instance.annual_cost + attached_drive.annual_cost
io_cost = _estimate_io_cost(db_type, desires, drive.annual_cost_per_read_io[0][1],
drive.annual_cost_per_write_io[0][1])
total_annual_cost = instance.annual_cost + attached_drive.annual_cost + io_cost

logger.debug(
"For (cpu, memory_gib, disk_gib) = (%s, %s, %s) need ( %s, %s, %s)",
Expand Down Expand Up @@ -161,7 +180,7 @@ def _estimate_aurora_regional(
extra_model_arguments: Dict[str, Any],
) -> Optional[CapacityPlan]:
instance_family = instance.family
if instance_family not in ("x2g", "r6g", "r6i", "r5", "t4g"): # TODO: split db instance and ec2 instance
if instance_family not in ("db.x2g", "db.r6g", "db.r6i", "db.r5", "db.t4g"):
return None

if drive.name != "aurora":
Expand All @@ -186,6 +205,8 @@ def _estimate_aurora_regional(
* math.ceil(0.1 * rps),
required_disk_space=lambda x: x * 1.2, # Unscientific random guess!
core_reference_ghz=requirement.core_reference_ghz,
db_type=db_type,
desires=desires
)

if not cluster:
Expand Down
14 changes: 7 additions & 7 deletions tests/netflix/test_aurora.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,17 @@ def test_small_footprint():
region="us-east-1",
desires=small_footprint,
)
assert cap_plan[0].candidate_clusters.regional[0].instance.name == "r5.xlarge"
assert cap_plan[0].candidate_clusters.regional[0].instance.name == "db.r5.xlarge"

# two instance plus storage
assert cap_plan[0].candidate_clusters.annual_costs["aurora-cluster.regional-clusters"] == 830 * 2 + 60 * 1.2
# two instance plus storage and io
assert (3500 < cap_plan[0].candidate_clusters.annual_costs["aurora-cluster.regional-clusters"] < 4500)


def test_medium_footprint():
cap_plan = planner.plan_certain(
model_name="org.netflix.aurora", region="us-east-1", desires=mid_footprint
)
assert cap_plan[0].candidate_clusters.regional[0].instance.name == "r5.8xlarge"
assert cap_plan[0].candidate_clusters.regional[0].instance.name == "db.r5.8xlarge"


def test_large_footprint():
Expand All @@ -112,7 +112,7 @@ def test_large_footprint():
region="us-east-1",
desires=large_footprint,
)
assert cap_plan[0].candidate_clusters.regional[0].instance.name == "r5.8xlarge"
assert cap_plan[0].candidate_clusters.regional[0].instance.name == "db.r5.8xlarge"


def test_tier_3():
Expand All @@ -121,7 +121,7 @@ def test_tier_3():
region="us-east-1",
desires=tier_3,
)
assert cap_plan[0].candidate_clusters.regional[0].instance.name == "r5.4xlarge"
assert cap_plan[0].candidate_clusters.regional[0].instance.name == "db.r5.4xlarge"


def test_cap_plan():
Expand Down Expand Up @@ -222,4 +222,4 @@ def test_cap_plan():
region="us-east-1",
desires=my_desire,
)
assert cap_plan[0].candidate_clusters.regional[0].instance.name == "r5.8xlarge"
assert cap_plan[0].candidate_clusters.regional[0].instance.name == "db.r5.8xlarge"

0 comments on commit 46eb1f5

Please sign in to comment.