Skip to content

Commit

Permalink
Add io cost for Aurora
Browse files Browse the repository at this point in the history
  • Loading branch information
ShengweiWang committed May 16, 2023
1 parent 30937b4 commit a6dc62a
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 7 deletions.
29 changes: 25 additions & 4 deletions service_capacity_modeling/models/org/netflix/aurora.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,24 @@ def _rds_required_disk_ios(disk_size_gib: int, db_type: str, btree_fan_out: int
return math.log(pages, btree_fan_out)


# This is a start, we should iterate based on the actual work load
def _estimate_io_cost(db_type: str, desires, read_io_price: float, write_io_price: float, cache_hit_rate: float = 0.8):
if db_type == "postgres":
read_byte_per_io = 8192
else:
read_byte_per_io = 16384

write_byte_per_io = 4096

r_io = desires.query_pattern.estimated_read_per_second.mid * math.ceil(desires.query_pattern.estimated_mean_read_size_bytes.mid / read_byte_per_io)
# Assuming write can be batched
w_io = desires.query_pattern.estimated_write_per_second.mid * desires.query_pattern.estimated_mean_write_size_bytes.mid / write_byte_per_io

r_cost = r_io * (1 - cache_hit_rate) * read_io_price
w_cost = w_io * write_io_price
return r_cost + w_cost


def _compute_aurora_region(
instance: Instance,
drive: Drive, # always to be Aurora Storage
Expand All @@ -101,6 +119,8 @@ def _compute_aurora_region(
required_disk_ios,
required_disk_space,
core_reference_ghz: float,
db_type:str,
desires:CapacityDesires
) -> Optional[RegionClusterCapacity]:
"""Computes a regional cluster of a Aurora service
Expand Down Expand Up @@ -129,10 +149,9 @@ def _compute_aurora_region(
attached_drive.size_gib = max(1, required_disk_space(needed_disk_gib)) # todo: Figure out the IO vs disk
attached_drives.append(attached_drive)

# print(f"hardware {attached_drive}, driver cost: {attached_drive.annual_cost}")

# todo: add IO cost
total_annual_cost = instance.annual_cost + attached_drive.annual_cost
io_cost = _estimate_io_cost(db_type, desires, drive.annual_cost_per_read_io[0][1],
drive.annual_cost_per_write_io[0][1])
total_annual_cost = instance.annual_cost + attached_drive.annual_cost + io_cost

logger.debug(
"For (cpu, memory_gib, disk_gib) = (%s, %s, %s) need ( %s, %s, %s)",
Expand Down Expand Up @@ -186,6 +205,8 @@ def _estimate_aurora_regional(
* math.ceil(0.1 * rps),
required_disk_space=lambda x: x * 1.2, # Unscientific random guess!
core_reference_ghz=requirement.core_reference_ghz,
db_type=db_type,
desires=desires
)

if not cluster:
Expand Down
5 changes: 2 additions & 3 deletions tests/netflix/test_aurora.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,8 @@ def test_small_footprint():
)
assert cap_plan[0].candidate_clusters.regional[0].instance.name == "db.r5.xlarge"

# two instance plus storage
print(cap_plan[0].candidate_clusters.annual_costs["aurora-cluster.regional-clusters"])
assert (3000 < cap_plan[0].candidate_clusters.annual_costs["aurora-cluster.regional-clusters"] < 4000)
# two instance plus storage and io
assert (3500 < cap_plan[0].candidate_clusters.annual_costs["aurora-cluster.regional-clusters"] < 4500)


def test_medium_footprint():
Expand Down

0 comments on commit a6dc62a

Please sign in to comment.