### 🛠️ 1. Initialize notebook variables

Configures everything that's needed for deployment. 

**Modify entries under _1) User-defined parameters_ and _3) Define the APIs and their operations and policies_**.

In [None]:
import utils
from apimtypes import *

# 1) User-defined parameters (change these as needed)
rg_location = 'eastus2'
index       = 1
deployment  = INFRASTRUCTURE.APIM_ACA
tags        = ['load-balancing']       # [ENTER DESCRIPTIVE TAG(S)]
api_prefix  = 'lb-'                    # OPTIONAL: ENTER A PREFIX FOR THE APIS TO REDUCE COLLISION POTENTIAL WITH OTHER SAMPLES

# 2) Service-defined parameters (please do not change these)
rg_name       = utils.get_infra_rg_name(deployment, index)
sample_folder = 'load-balancing'
nb_helper     = utils.NotebookHelper(sample_folder, rg_name, rg_location, deployment, [INFRASTRUCTURE.AFD_APIM_PE, INFRASTRUCTURE.APIM_ACA])

# 3) Define the APIs and their operations and policies

# Policies - read the base policy file and format with different parameters
pol_aca_backend_pool_load_balancing             = utils.read_policy_xml('aca-backend-pool-load-balancing.xml', sample_name = sample_folder)
pol_aca_backend_pool_prioritized                = pol_aca_backend_pool_load_balancing.format(retry_count = 1, backend_id = 'aca-backend-pool-web-api-429-prioritized')
pol_aca_backend_pool_prioritized_and_weighted   = pol_aca_backend_pool_load_balancing.format(retry_count = 2, backend_id = 'aca-backend-pool-web-api-429-prioritized-and-weighted')
pol_aca_backend_pool_weighted_equal             = pol_aca_backend_pool_load_balancing.format(retry_count = 1, backend_id = 'aca-backend-pool-web-api-429-weighted-50-50')
pol_aca_backend_pool_weighted_unequal           = pol_aca_backend_pool_load_balancing.format(retry_count = 1, backend_id = 'aca-backend-pool-web-api-429-weighted-80-20')

# Standard GET Operation
get = GET_APIOperation('This is a standard GET')

# ACA Backend Pools
apis: List[API] = [
    API(f'{api_prefix}prioritized-aca-pool', 'Prioritized backend pool', f'/{api_prefix}prioritized', 'This is the API for the prioritized backend pool.', policyXml = pol_aca_backend_pool_prioritized, operations = [get], tags = tags),
    API(f'{api_prefix}prioritized-weighted-aca-pool', 'Prioritized & weighted backend pool', f'/{api_prefix}prioritized-weighted', 'This is the API for the prioritized & weighted backend pool.', policyXml = pol_aca_backend_pool_prioritized_and_weighted, operations = [get], tags = tags),
    API(f'{api_prefix}weighted-equal-aca-pool', 'Weighted backend pool (equal)', f'/{api_prefix}weighted-equal', 'This is the API for the weighted (equal) backend pool.', policyXml = pol_aca_backend_pool_weighted_equal, operations = [get], tags = tags),
    API(f'{api_prefix}weighted-unequal-aca-pool', 'Weighted backend pool (unequal)', f'/{api_prefix}weighted-unequal', 'This is the API for the weighted (unequal) backend pool.', policyXml = pol_aca_backend_pool_weighted_unequal, operations = [get], tags = tags)
]

utils.print_ok('Notebook initialized')

### 🚀 2. Create deployment using Bicep

Creates the bicep deployment into the previously-specified resource group. A bicep parameters file will be created prior to execution.

In [None]:
import utils

# 1) Define the Bicep parameters with serialized APIs
bicep_parameters = {
    'apis': {'value': [api.to_dict() for api in apis]}
}

# 2) Deploy the bicep template
output = nb_helper.deploy_bicep(bicep_parameters)

if output.json_data:
    apim_name         = output.get('apimServiceName', 'APIM Service Name')
    apim_gateway_url  = output.get('apimResourceGatewayURL', 'APIM API Gateway URL')
    app_insights_name = output.get('applicationInsightsName', 'Application Insights Name')
    apim_apis         = output.getJson('apiOutputs', 'APIs')

utils.print_ok('Deployment completed')

### ✅ 3. Verify API Request Success

Assert that the deployment was successful by making simple calls to Azure Front Door or API Management.

In [None]:
import json
import time
import utils
from apimrequests import ApimRequests
from apimtesting import ApimTesting

def zzzs():
    sleep_in_s = 5
    utils.print_message(f'Waiting for {sleep_in_s} seconds for the backend timeouts to reset before starting the next set of calls', blank_above = True)
    time.sleep(sleep_in_s)  # Wait a bit before the next set of calls to allow for the backend timeouts to reset

tests = ApimTesting("Load Balancing Sample Tests", sample_folder, deployment)

# Preflight: Check if the infrastructure architecture deployment uses Azure Front Door. If so, assume that APIM is not directly accessible and use the Front Door URL instead.
endpoint_url = utils.test_url_preflight_check(deployment, rg_name, apim_gateway_url)

# Quick test to verify load balancing API is accessible
reqs = ApimRequests(apim_gateway_url, apim_apis[0]['subscriptionPrimaryKey'])
output = reqs.singleGet('/lb-prioritized', msg = 'Quick test of load balancing API')
# We expect to see a priority 1 backend (at index 0) with a count of 1 as this is the first request.
tests.verify(json.loads(output)['index'], 0)
tests.verify(json.loads(output)['count'], 1)

# The following test assertions are rather basic. The real verification comes in the charts in the subsequent cell.

# 1) Prioritized API calls
utils.print_message('1/5: Starting API calls for prioritized distribution (50/50)')
api_results_prioritized = reqs.multiGet('/lb-prioritized', runs = 15, msg = 'Calling prioritized APIs')
tests.verify(len(api_results_prioritized), 15)

# # 2) Weighted API calls
zzzs()
utils.print_message('2/5: Starting API calls for weighted distribution (50/50)', blank_above = True)
reqs = ApimRequests(apim_gateway_url, apim_apis[2]['subscriptionPrimaryKey'])
api_results_weighted_equal = reqs.multiGet('/lb-weighted-equal', runs = 15, msg = 'Calling weighted (equal) APIs')
tests.verify(len(api_results_weighted_equal), 15)

# # 3) Weighted API calls
zzzs()
utils.print_message('3/5: Starting API calls for weighted distribution (80/20)', blank_above = True)
reqs = ApimRequests(apim_gateway_url, apim_apis[3]['subscriptionPrimaryKey'])
api_results_weighted_unequal = reqs.multiGet('/lb-weighted-unequal', runs = 15, msg = 'Calling weighted (unequal) APIs')
tests.verify(len(api_results_weighted_unequal), 15)

# 4) Prioritized & weighted API calls
zzzs()
utils.print_message('4/5: Starting API calls for prioritized & weighted distribution', blank_above = True)
reqs = ApimRequests(apim_gateway_url, apim_apis[1]['subscriptionPrimaryKey'])
api_results_prioritized_and_weighted = reqs.multiGet('/lb-prioritized-weighted', runs = 20, msg = 'Calling prioritized & weighted APIs')
tests.verify(len(api_results_prioritized_and_weighted), 20)

# 5) Prioritized & weighted API calls (500ms sleep)
zzzs()
utils.print_message('5/5: Starting API calls for prioritized & weighted distribution (500ms sleep)', blank_above = True)
api_results_prioritized_and_weighted_sleep = reqs.multiGet('/lb-prioritized-weighted', runs = 20, msg = 'Calling prioritized & weighted APIs', sleepMs = 500)
tests.verify(len(api_results_prioritized_and_weighted_sleep), 20)

tests.print_summary()

utils.print_ok('All done!')

### 🔍 Analyze Load Balancing results

The priority 1 backend will be used until TPM exhaustion sets in, then distribution will occur near equally across the two priority 2 backends with 50/50 weights.  

Please note that the first request of the lab can take a bit longer and should be discounted in terms of duration.

In [None]:
import charts

charts.BarChart(
    api_results = api_results_prioritized,
    title = 'Prioritized Distribution',
    x_label = 'Run #',
    y_label = 'Response Time (ms)',
    fig_text = 'The chart shows a total of 15 requests across a prioritized backend pool with two backends.\n' \
        'Each backend, in sequence, was able to serve five requests for a total of ten requests until the pool became unhealthy (all backends were exhausted).\n' \
        'The average response time is calculated excluding statistical outliers above the 95th percentile (the first request usually takes longer).'
).plot()

charts.BarChart(
    api_results = api_results_weighted_equal,
    title = 'Weighted Distribution (50/50)',
    x_label = 'Run #',
    y_label = 'Response Time (ms)',
    fig_text = 'The chart shows a total of 15 requests across an equally-weighted backend pool with two backends.\n' \
        'Each backend, alternatingly, was able to serve five requests for a total of ten requests until the pool became unhealthy (all backends were exhausted).\n' \
        'The average response time is calculated excluding statistical outliers above the 95th percentile (the first request usually takes longer).'
).plot()

charts.BarChart(
    api_results = api_results_weighted_unequal,
    title = 'Weighted Distribution (80/20)',
    x_label = 'Run #',
    y_label = 'Response Time (ms)',
    fig_text = 'The chart shows a total of 15 requests across an unequally-weighted backend pool with two backends.\n' \
        'Each backend was able to serve requests for a total of ten requests until the pool became unhealthy (all backends were exhausted).\n' \
        'The average response time is calculated excluding statistical outliers above the 95th percentile (the first request usually takes longer).'
).plot()

charts.BarChart(
    api_results = api_results_prioritized_and_weighted,
    title = 'Prioritized & Weighted Distribution',
    x_label = 'Run #',
    y_label = 'Response Time (ms)',
    fig_text = 'The chart shows a total of 20 requests across a prioritized and equally-weighted backend pool with three backends.\n' \
        'The first backend is set up as the only priority 1 backend. It serves its five requests before the second and third backends - each part of\n' \
        'priority 2 and weight equally - commence taking requests.\n' \
        'The average response time is calculated excluding statistical outliers above the 95th percentile (the first request usually takes longer).'
).plot()

charts.BarChart(
    api_results = api_results_prioritized_and_weighted_sleep,
    title = 'Prioritized & Weighted Distribution (500ms sleep)',
    x_label = 'Run #',
    y_label = 'Response Time (ms)',
    fig_text = 'The chart shows a total of 20 requests across a prioritized and equally-weighted backend pool with three backends (same as previously).\n' \
        'The key difference to the previous chart is that each request is now followed by a 500ms sleep, which allows timed-out backends to recover.\n' \
        'The average response time is calculated excluding statistical outliers above the 95th percentile (the first request usually takes longer).'
).plot()
