In [1]:
from service_capacity_modeling.hardware import shapes
from pprint import pprint

In [2]:
shapes

<service_capacity_modeling.hardware.HardwareShapes at 0x7f77a81594c0>

In [3]:
d = shapes.hardware.regions['us-east-1'].drives['io2']
de = d.copy()
de.size_gib = 100
de.read_io_per_s = 33000
print(de.annual_cost)
pprint(shapes.hardware.regions['us-east-1'].drives['io2'].model_dump())

Loading shape=aws from /home/jolynch/pg/service-capacity-modeling/service_capacity_modeling/hardware/profiles/shapes/aws
Loading /home/jolynch/pg/service-capacity-modeling/service_capacity_modeling/hardware/profiles/pricing/aws/3yr-reserved.json
25662.0
{'annual_cost_per_gib': 1.5,
 'annual_cost_per_read_io': [(32000.0, 0.78),
                             (64000.0, 0.552),
                             (160000.0, 0.384),
                             (256000.0, 0.384)],
 'annual_cost_per_write_io': [(32000.0, 0.78),
                              (64000.0, 0.552),
                              (160000.0, 0.384),
                              (256000.0, 0.384)],
 'compatible_families': [],
 'lifecycle': <Lifecycle.alpha: 'alpha'>,
 'max_scale_size_gib': 16384,
 'name': 'io2',
 'read_io_latency_ms': {'allow_simulate': False,
                        'confidence': 0.9,
                        'high': 1.2,
                        'low': 0.5,
                        'maximum_value': 2.0,
      

In [4]:
from service_capacity_modeling.interface import CapacityDesires
from service_capacity_modeling.interface import FixedInterval, Interval
from service_capacity_modeling.interface import QueryPattern, DataShape

db_desires = CapacityDesires(
    # This service is important to the business, not critical (tier 0)
    service_tier=1,
    query_pattern=QueryPattern(
        # Not sure exactly how much QPS we will do, but we think around
        # 10,000 reads and 10,000 writes per second.
        estimated_read_per_second=Interval(
            low=10, mid=100, high=1000, confidence=0.9
        ),
        estimated_write_per_second=Interval(
            low=125000, mid=250000, high=500000, confidence=0.9
        ),
    ),
    # Not sure how much data, but we think it'll be below 1 TiB
    data_shape=DataShape(
        estimated_state_size_gib=Interval(low=400000, mid=876000, high=1752000, confidence=0.9),
    ),
)

In [5]:
from service_capacity_modeling.capacity_planner import planner
from service_capacity_modeling.models.org import netflix
from service_capacity_modeling.interface import Lifecycle
import pprint

# Load up the Netflix capacity models
planner.register_group(netflix.models)

cap_plan = planner.plan(
    model_name="org.netflix.elasticsearch",
    region="us-east-1",
    desires=db_desires,
    # Simulate the possible requirements 512 times
    simulations=256,
    # Request 3 diverse hardware families to be returned
    num_results=5,
    lifecycles=[Lifecycle.alpha, Lifecycle.stable],
    instance_families=["i3en", "r5"]
)

# The range of requirements in hardware resources (CPU, RAM, Disk, etc ...)
requirements = cap_plan.requirements

# The ordered list of least regretful choices for the requirement
least_regret = cap_plan.least_regret

# Show the range of requirements for a single zone
pprint.pprint(requirements.zonal[0].model_dump())

# Show our least regretful choices of hardware in least regret order
# So for example if we can buy the first set of computers we would prefer
# to do that but we might not have availability in that family in which
# case we'd buy the second one.
for choice in range(len(least_regret)):
    num_clusters = len(least_regret[choice].candidate_clusters.zonal)
    print(f"Our #{choice + 1} choice is {num_clusters} zones of:")
    seen = set()
    zonal_clusters = least_regret[choice].candidate_clusters.zonal
    for cluster in zonal_clusters:
        if cluster.cluster_type in seen:
            continue
        seen.add(cluster.cluster_type)
        pprint.pprint(cluster.model_dump())

{'core_reference_ghz': 2.3,
 'cpu_cores': {'confidence': 0.9,
               'high': 2.0,
               'low': 2.0,
               'maximum_value': 2.0,
               'mid': 2.0,
               'minimum_value': 2.0},
 'disk_gib': {'confidence': 0.9,
              'high': 0.0,
              'low': 0.0,
              'maximum_value': 0.0,
              'mid': 0.0,
              'minimum_value': 0.0},
 'mem_gib': {'confidence': 0.9,
             'high': 24.0,
             'low': 24.0,
             'maximum_value': 24.0,
             'mid': 24.0,
             'minimum_value': 24.0},
 'network_mbps': {'confidence': 0.9,
                  'high': 0.0,
                  'low': 0.0,
                  'maximum_value': 0.0,
                  'mid': 0.0,
                  'minimum_value': 0.0},
 'requirement_type': 'elasticsearch-master-zonal'}
Our #1 choice is 6 zones of:
{'annual_cost': 830.0,
 'attached_drives': [],
 'cluster_type': 'elasticsearch-master',
 'count': 1,
 'instance': {'annual_

In [6]:
from service_capacity_modeling.capacity_planner import planner
from service_capacity_modeling.models.org import netflix
from service_capacity_modeling.interface import Lifecycle
import pprint

# Load up the Netflix capacity models
planner.register_group(netflix.models)

cap_plan = planner.plan(
    model_name="org.netflix.elasticsearch",
    region="us-east-1",
    desires=db_desires,
    # Simulate the possible requirements 512 times
    simulations=256,
    # Request 3 diverse hardware families to be returned
    num_results=5,
    lifecycles=[Lifecycle.alpha, Lifecycle.stable],
    instance_families=["r5"],
    drives=["io2"]
)

# The range of requirements in hardware resources (CPU, RAM, Disk, etc ...)
requirements = cap_plan.requirements

# The ordered list of least regretful choices for the requirement
least_regret = cap_plan.least_regret

# Show the range of requirements for a single zone
pprint.pprint(requirements.zonal[0].model_dump())

# Show our least regretful choices of hardware in least regret order
# So for example if we can buy the first set of computers we would prefer
# to do that but we might not have availability in that family in which
# case we'd buy the second one.
for choice in range(len(least_regret)):
    num_clusters = len(least_regret[choice].candidate_clusters.zonal)
    print(f"Our #{choice + 1} choice is {num_clusters} zones of:")
    seen = set()
    zonal_clusters = least_regret[choice].candidate_clusters.zonal
    for cluster in zonal_clusters:
        if cluster.cluster_type in seen:
            continue
        seen.add(cluster.cluster_type)
        pprint.pprint(cluster.model_dump())

{'core_reference_ghz': 2.3,
 'cpu_cores': {'confidence': 0.9,
               'high': 2.0,
               'low': 2.0,
               'maximum_value': 2.0,
               'mid': 2.0,
               'minimum_value': 2.0},
 'disk_gib': {'confidence': 0.9,
              'high': 0.0,
              'low': 0.0,
              'maximum_value': 0.0,
              'mid': 0.0,
              'minimum_value': 0.0},
 'mem_gib': {'confidence': 0.9,
             'high': 24.0,
             'low': 24.0,
             'maximum_value': 24.0,
             'mid': 24.0,
             'minimum_value': 24.0},
 'network_mbps': {'confidence': 0.9,
                  'high': 0.0,
                  'low': 0.0,
                  'maximum_value': 0.0,
                  'mid': 0.0,
                  'minimum_value': 0.0},
 'requirement_type': 'elasticsearch-master-zonal'}
Our #1 choice is 6 zones of:
{'annual_cost': 830.0,
 'attached_drives': [],
 'cluster_type': 'elasticsearch-master',
 'count': 1,
 'instance': {'annual_