Skip to content

Commit

Permalink
Merge pull request #46 from Netflix-Skunkworks/bugfix/josephl/fix-net…
Browse files Browse the repository at this point in the history
…work-costs

Fix off-by-N error on intra costs
  • Loading branch information
abersnaze committed Feb 27, 2023
2 parents f5fdbff + 7085052 commit dd6392e
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 14 deletions.
17 changes: 13 additions & 4 deletions service_capacity_modeling/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ def network_services(
result = []
# Network transfer is for every other zone and then for every region
# other than us as well.
num_zones = copies_per_region - 1
num_regions = context.num_regions - 1
num_zones = max(copies_per_region - 1, 0)
num_regions = max(context.num_regions - 1, 0)

# have bytes and / second
size = desires.query_pattern.estimated_mean_write_size_bytes.mid
Expand All @@ -109,6 +109,8 @@ def network_services(

txfer_gib = (wps * size / (1024 * 1024 * 1024)) * (SECONDS_IN_YEAR)

# For each cross region replication we have to pay to move bytes
# inter region. This is the number of regions minus 1
inter_txfer = context.services.get("net.inter.region", None)
if inter_txfer:
price_per_gib = inter_txfer.annual_cost_per_gib
Expand All @@ -120,14 +122,21 @@ def network_services(
)
)

# Same zone is free, but we pay for replication from our zone to others
intra_txfer = context.services.get("net.intra.region", None)
if intra_txfer:
price_per_gib = intra_txfer.annual_cost_per_gib
result.append(
ServiceCapacity(
service_type=f"{service_type}.net.intra.region",
annual_cost=(price_per_gib * txfer_gib * num_zones),
service_params={"txfer_gib": txfer_gib, "num_zones": num_zones},
annual_cost=(
price_per_gib * txfer_gib * num_zones * context.num_regions
),
service_params={
"txfer_gib": txfer_gib,
"num_zones": num_zones,
"num_regions": context.num_regions,
},
)
)
return result
Expand Down
22 changes: 17 additions & 5 deletions tests/netflix/test_cassandra_uncertain.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,16 @@ def test_uncertain_planning():
lr = mid_plan.least_regret[0]
lr_cluster = lr.candidate_clusters.zonal[0]
assert 8 <= lr_cluster.count * lr_cluster.instance.cpu <= 64
assert 5_000 <= lr.candidate_clusters.total_annual_cost < 45_000
assert (
5_000 <= lr.candidate_clusters.annual_costs["cassandra.zonal-clusters"] < 45_000
)

sr = mid_plan.least_regret[1]
sr_cluster = sr.candidate_clusters.zonal[0]
assert 8 <= sr_cluster.count * sr_cluster.instance.cpu <= 64
assert 5_000 <= sr.candidate_clusters.total_annual_cost < 45_000
assert (
5_000 <= sr.candidate_clusters.annual_costs["cassandra.zonal-clusters"] < 45_000
)

tiny_plan = planner.plan(
model_name="org.netflix.cassandra",
Expand All @@ -56,7 +60,9 @@ def test_uncertain_planning():
lr = tiny_plan.least_regret[0]
lr_cluster = lr.candidate_clusters.zonal[0]
assert 2 <= lr_cluster.count * lr_cluster.instance.cpu < 16
assert 1_000 < lr.candidate_clusters.total_annual_cost < 6_000
assert (
1_000 < lr.candidate_clusters.annual_costs["cassandra.zonal-clusters"] < 6_000
)


def test_increasing_qps_simple():
Expand Down Expand Up @@ -89,7 +95,9 @@ def test_increasing_qps_simple():

lr = cap_plan.least_regret[0].candidate_clusters.zonal[0]
lr_cpu = lr.count * lr.instance.cpu
lr_cost = cap_plan.least_regret[0].candidate_clusters.total_annual_cost
lr_cost = cap_plan.least_regret[0].candidate_clusters.annual_costs[
"cassandra.zonal-clusters"
]
lr_family = lr.instance.family
if lr.instance.drive is None:
assert sum(dr.size_gib for dr in lr.attached_drives) >= 200
Expand Down Expand Up @@ -184,7 +192,11 @@ def test_very_small_has_disk():
for lr in cap_plan.least_regret:
lr_cluster = lr.candidate_clusters.zonal[0]
assert 2 <= lr_cluster.count * lr_cluster.instance.cpu < 16
assert 1_000 < lr.candidate_clusters.total_annual_cost < 6_000
assert (
1_000
< lr.candidate_clusters.annual_costs["cassandra.zonal-clusters"]
< 6_000
)
if lr_cluster.instance.drive is None:
assert sum(dr.size_gib for dr in lr_cluster.attached_drives) > 10
else:
Expand Down
15 changes: 11 additions & 4 deletions tests/netflix/test_evcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def test_evcache_replication():
model_name="org.netflix.evcache",
region="us-east-1",
desires=high_qps,
num_regions=3,
extra_model_arguments={"cross_region_replication": "sets"},
)
assert len(plan.least_regret) >= 2
Expand All @@ -84,22 +85,26 @@ def test_evcache_replication():
assert all(k in lr.requirements.regrets for k in ("spend", "mem", "disk"))
assert lr.requirements.zonal[0].disk_gib.mid > 200

# EVCache should be pretty cheap for 100k RPS with 10k WPS
# EVCache compute should be pretty cheap for 100k RPS with 10k WPS
assert lr.candidate_clusters.annual_costs["evcache.zonal-clusters"] < 10000

set_inter_region = lr.candidate_clusters.annual_costs["evcache.net.inter.region"]

# With replication should have network costs
assert 10000 < set_inter_region < 15000
assert (
10000 < lr.candidate_clusters.annual_costs["evcache.net.intra.region"] < 15000
20000 < lr.candidate_clusters.annual_costs["evcache.net.intra.region"] < 50000
)

delete_plan = planner.plan(
model_name="org.netflix.evcache",
region="us-east-1",
desires=high_qps,
extra_model_arguments={"cross_region_replication": "evicts"},
num_regions=3,
extra_model_arguments={
"cross_region_replication": "evicts",
"copies_per_region": 3,
},
)

lr = delete_plan.least_regret[0]
Expand All @@ -110,4 +115,6 @@ def test_evcache_replication():

# With replication should have network costs
assert 2000 < evict_inter_region < 6000
assert 2000 < lr.candidate_clusters.annual_costs["evcache.net.intra.region"] < 6000
assert (
12000 < lr.candidate_clusters.annual_costs["evcache.net.intra.region"] < 18000
)
2 changes: 1 addition & 1 deletion tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,4 +168,4 @@ def test_network_services():
cost_by_service[service.service_type] = service.annual_cost

assert 3 * 1500 < cost_by_service["test.net.inter.region"] < 3 * 1500 + 100
assert 2 * 1500 < cost_by_service["test.net.intra.region"] < 2 * 1500 + 100
assert 2 * 4 * 1500 < cost_by_service["test.net.intra.region"] < 2 * 4 * 1500 + 100

0 comments on commit dd6392e

Please sign in to comment.