In [None]:
import pprint
from pydantic import BaseModel

def mprint(x):
    if isinstance(x, BaseModel):
        pprint.pprint(x.dict(exclude_unset=True), sort_dicts=False)
    else:
        pprint.pprint(x)

In [None]:
from service_capacity_modeling.interface import CapacityDesires
from service_capacity_modeling.interface import FixedInterval, Interval
from service_capacity_modeling.interface import QueryPattern, DataShape

desires = CapacityDesires(
    # This service is critical to the business
    service_tier=1,
    query_pattern=QueryPattern(
        # Not sure exactly how much QPS we will do, but we think around
        # 10,000 reads and 10,000 writes per second.
        estimated_read_per_second=Interval(
            low=1_000, mid=10_000, high=100_000, confidence=0.98
        ),
        estimated_write_per_second=Interval(
            low=1_000, mid=10_000, high=100_000, confidence=0.98
        ),
    ),
    # Not sure how much data, but we think it'll be around 100 GiB
    data_shape=DataShape(
        estimated_state_size_gib=Interval(low=10, mid=100, high=1_000, confidence=0.98),
    ),
)

In [None]:
from service_capacity_modeling.capacity_planner import planner
from service_capacity_modeling.models.org import netflix

# Load up the Netflix capacity models
planner.register_group(netflix.models)

plan = planner.plan(
    model_name="org.netflix.cassandra",
    region="us-east-1",
    desires=desires,
    simulations=1024,
    explain=True
)

In [None]:
worlds = plan.explanation.regret_clusters_by_model["org.netflix.cassandra"]
defaults = plan.explanation.desires_by_model["org.netflix.cassandra"]

def summarize(cluster, regret):
    cost = cluster.candidate_clusters.total_annual_cost
    zonal = cluster.candidate_clusters.zonal
    count = len(zonal) * zonal[0].count
    instance = zonal[0].instance.name
    print(f"{count:>3} {instance:>10} costing {cost} -> {regret}")

min_regret = (float('inf'), None, None)
max_regret = (0, None, None)

for cluster, desire, regret in worlds:
    summarize(cluster, regret)
    if regret < min_regret[0]:
        min_regret = (regret, desire, cluster)
    if regret > max_regret[0]:
        max_regret = (regret, desire, cluster)
        
print("Minimum Regret Choice:")
mprint(summarize(min_regret[2], min_regret[0]))

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from service_capacity_modeling.stats import dist_for_interval

def plot_desires(desires: CapacityDesires, concrete_desire: CapacityDesires):
    # Writes
    wps = desires.query_pattern.estimated_write_per_second
    wps_dist = dist_for_interval(wps)
    world_wps = concrete_desire.query_pattern.estimated_write_per_second
    
    w_size = desires.query_pattern.estimated_mean_write_size_bytes
    w_size_dist = dist_for_interval(w_size)
    world_w_size = concrete_desire.query_pattern.estimated_mean_write_size_bytes

    # Reads
    rps = desires.query_pattern.estimated_read_per_second
    world_rps = concrete_desire.query_pattern.estimated_read_per_second
    rps_dist = dist_for_interval(rps)

    r_size = desires.query_pattern.estimated_mean_read_size_bytes
    r_size_dist = dist_for_interval(r_size)
    world_r_size = concrete_desire.query_pattern.estimated_mean_read_size_bytes

    # Space
    space = desires.data_shape.estimated_state_size_gib
    space_dist = dist_for_interval(space)
    world_space = concrete_desire.data_shape.estimated_state_size_gib

    print(wps.mid, world_wps.mid, rps.mid, world_rps.mid, world_w_size)
    fig, axs = plt.subplots(5, figsize=(15, 20))

    qps_x = np.linspace(500, 50_000, 10000)
    size_x = np.linspace(16, 2048, 10000)
    space_x = np.linspace(10, 1_000, 10000)


    axs[0].plot(qps_x, wps_dist.pdf(qps_x), 'b-', lw=2, label='WPS')
    axs[0].axvline(x=wps.mid, color='k', linestyle='--', label="wps-avg")
    axs[0].axvline(x=world_wps.mid, color='r', linestyle='-', label="wps-world")
    axs[0].set_xlabel("Writes (1/s)")
    axs[0].legend()
 
    axs[1].plot(qps_x, rps_dist.pdf(qps_x), 'b-', lw=2, label='RPS')
    axs[1].axvline(x=rps.mid, color='k', linestyle='--', label="wps-avg")
    axs[1].axvline(x=world_rps.mid, color='r', linestyle='-', label="wps-world")
    axs[1].set_xlabel("Reads (1/s)")
    axs[1].legend()
    
    axs[2].plot(size_x, r_size_dist.pdf(size_x), 'b-', lw=2, label='RS')
    axs[2].axvline(x=r_size.mid, color='k', linestyle='--', label="rsize-avg")
    axs[2].axvline(x=world_r_size.mid, color='r', linestyle='-', label="rsize-world")
    axs[2].set_xlabel("Read Size (bytes)")
    axs[2].legend()
    
    axs[3].plot(size_x, w_size_dist.pdf(size_x), 'b-', lw=2, label='RPS')
    axs[3].axvline(x=w_size.mid, color='k', linestyle='--', label="wsize-avg")
    axs[3].axvline(x=world_w_size.mid, color='r', linestyle='-', label="wsize-world")
    axs[3].set_xlabel("Write Size (bytes)")
    axs[3].legend()
    
    axs[4].plot(space_x, space_dist.pdf(space_x), 'b-', lw=2, label='RPS')
    axs[4].axvline(x=space.mid, color='k', linestyle='--', label="rsize-avg")
    axs[4].axvline(x=world_space.mid, color='r', linestyle='-', label="rsize-world")
    axs[4].set_xlabel("Data Size (GiB)")
    axs[4].legend()

    plt.show()
    
plot_desires(defaults, worlds[0][1])