diff --git a/service_capacity_modeling/hardware/profiles/shapes/aws.json b/service_capacity_modeling/hardware/profiles/shapes/aws.json index b44f23f..79b6299 100644 --- a/service_capacity_modeling/hardware/profiles/shapes/aws.json +++ b/service_capacity_modeling/hardware/profiles/shapes/aws.json @@ -254,7 +254,7 @@ "ram_gib": 15.48, "net_mbps": 781, "drive": { - "name": "ephem", "size_gib": 436.5, + "name": "ephem", "size_gib": 436, "read_io_latency_ms": { "minimum_value":0.05, "low":0.10, "mid":0.125, "high":0.17, @@ -271,7 +271,7 @@ "ram_gib": 30.955, "net_mbps": 1875, "drive": { - "name": "ephem", "size_gib": 873.0, + "name": "ephem", "size_gib": 873, "read_io_latency_ms": { "minimum_value": 0.05, "low": 0.10, "mid": 0.125, "high": 0.17, diff --git a/service_capacity_modeling/interface.py b/service_capacity_modeling/interface.py index de6568b..f6657a0 100644 --- a/service_capacity_modeling/interface.py +++ b/service_capacity_modeling/interface.py @@ -555,11 +555,11 @@ class DataShape(ExcludeUnsetModel): # How much fixed memory must be provisioned per instance for the # application (e.g. for process heap memory) - reserved_instance_app_mem_gib: int = 2 + reserved_instance_app_mem_gib: float = 2 # How much fixed memory must be provisioned per instance for the # system (e.g. for kernel and other system processes) - reserved_instance_system_mem_gib: int = 1 + reserved_instance_system_mem_gib: float = 1 # How durable does this dataset need to be. We want to provision # sufficient replication and backups of data to achieve the target diff --git a/setup.py b/setup.py index 48fc70e..5f33c4f 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ description="Contains utilities for modeling database capacity on a cloud", packages=setuptools.find_packages(exclude=("tests*", "notebooks*")), install_requires=[ - "pydantic", + "pydantic>=1.0,<2.0", "scipy", "numpy", 'importlib_resources; python_version < "3.7"', diff --git a/tests/netflix/test_crdb.py b/tests/netflix/test_crdb.py index 17aa7a9..b226fca 100644 --- a/tests/netflix/test_crdb.py +++ b/tests/netflix/test_crdb.py @@ -1,32 +1,35 @@ from service_capacity_modeling.capacity_planner import planner +from service_capacity_modeling.hardware import shapes from service_capacity_modeling.interface import CapacityDesires from service_capacity_modeling.interface import DataShape +from service_capacity_modeling.interface import FixedInterval from service_capacity_modeling.interface import Interval from service_capacity_modeling.interface import QueryPattern +from service_capacity_modeling.models.common import working_set_from_drive_and_slo +from service_capacity_modeling.models.org.netflix import nflx_cockroachdb_capacity_model +from service_capacity_modeling.stats import dist_for_interval - -def test_crdb_basic(): - basic = CapacityDesires( - service_tier=1, - query_pattern=QueryPattern( - estimated_read_per_second=Interval( - low=100, mid=1000, high=10000, confidence=0.98 - ), - estimated_write_per_second=Interval( - low=100, mid=1000, high=10000, confidence=0.98 - ), +simple_desire = CapacityDesires( + service_tier=1, + query_pattern=QueryPattern( + estimated_read_per_second=Interval( + low=100, mid=1000, high=10000, confidence=0.98 ), - data_shape=DataShape( - estimated_state_size_gib=Interval( - low=10, mid=100, high=1000, confidence=0.98 - ), + estimated_write_per_second=Interval( + low=100, mid=1000, high=10000, confidence=0.98 ), - ) + ), + data_shape=DataShape( + estimated_state_size_gib=Interval(low=10, mid=100, high=1000, confidence=0.98), + ), +) + +def test_crdb_simple(): plan = planner.plan( model_name="org.netflix.cockroachdb", region="us-east-1", - desires=basic, + desires=simple_desire, ) lr = plan.least_regret[0] @@ -44,6 +47,38 @@ def test_crdb_basic(): assert lr_cluster.count * lr_cluster.instance.cpu >= 4 +def test_crdb_working_set(): + ephem = shapes.region("us-east-1").instances["i4i.xlarge"].drive + ebs = shapes.region("us-east-1").drives["gp3"] + super_slow_drive = ebs.copy(deep=True) + # Simulate a very slow drive + super_slow_drive.name = "slow" + super_slow_drive.read_io_latency_ms = FixedInterval( + low=5, mid=8, high=20, confidence=0.9 + ) + + latency_sensitive = nflx_cockroachdb_capacity_model.default_desires( + simple_desire, {} + ) + results = {} + for drive in (ephem, ebs, super_slow_drive): + working_set = working_set_from_drive_and_slo( + drive_read_latency_dist=dist_for_interval(drive.read_io_latency_ms), + read_slo_latency_dist=dist_for_interval( + latency_sensitive.query_pattern.read_latency_slo_ms + ), + estimated_working_set=None, + # CRDB has looser latency SLOs but we still want a lot of the data + # hot in cache. Target the 95th percentile of disk latency to + # keep in RAM. + target_percentile=0.95, + ).mid + results[drive.name] = working_set + assert results["ephem"] < 0.05 + assert results["gp3"] < 0.05 + assert results["slow"] > 0.5 + + def test_crdb_footprint(): space = CapacityDesires( service_tier=1,