In [1]:
import pandas as pd
import numpy as np
import warnings
import json
import re
import requests
import pprint
import operator
import copy

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

In [2]:
# General Info of Placement
vm_external_ip = "34.141.8.255" #External ip for host machine to fetch the data
kiali_port = 32002
prometheus_port = 32003

namespace = "default" # the namespace of the app 

cluster_id = "onlineboutique" # Cluster name

cluster_pool = "default-pool" # Node pool

project_id = "single-verve-297917" # Project-ID

zone = "europe-west3-b" # Project-zone

vm_threshold_per_pod = 0.1 # Threshold for reserving sufficient resources for each pod

# Connect to cluster command
connection_command = "gcloud container clusters get-credentials onlineboutique --zone europe-west3-b --project single-verve-297917"

In [21]:
# Information metrics from prometheus about current nodes and pods

# Url from prometheus
url_prometheus = "http://"+vm_external_ip+":"+str(prometheus_port)+"/api/v1/query"

# RAM USAGE PERCENT
# (1 - (node_memory_MemAvailable_bytes / (node_memory_MemTotal_bytes)))* 100
# # CPU USAGE PERCENT
# (1 - avg(rate(node_cpu_seconds_total{mode="idle"}[30m])) by (instance)) * 100

# #PODS CPU USAGE PERCENT (EXCEPT NODE-EXPORTERS)
# avg(rate(container_cpu_usage_seconds_total{pod!~"billowing.*", namespace='default'}[30m])) by (pod) *100

# #PODS MEMORY USAGE (EXCEPT NODE-EXPORTERS)
# avg(container_memory_max_usage_bytes{namespace="default", pod!~"billowing.*"}) by(pod)

# Queries for useful information of Prometheus
query_node_cpu = {"query":"avg(rate(node_cpu_seconds_total{mode='idle'}[30m])) by (instance)"}
query_node_ram = {"query":"node_memory_MemAvailable_bytes"}

# Headers of cURL command
headers_prometheus = {
    'cache-control': "no-cache"
}

# cURL command for Node Ram Usage
response = requests.request("GET", url_prometheus, headers=headers_prometheus, params=query_node_ram)
response_status = response.status_code
result=json.loads(response.text)

number_of_hosts = len(result["data"]["result"])
host_machines = []
node_available_ram = {}
print("Number of hosts : " + str(number_of_hosts))
for i in range(number_of_hosts):
    host_machines.append(result["data"]["result"][i]["metric"]["kubernetes_node"])
    node_available_ram[host_machines[i]] = format(float(result["data"]["result"][i]["value"][1]), '.1f')
node_available_ram
max(node_available_ram.values())

Number of hosts : 4


'7408697344.0'

In [4]:
# cURL command for Node Available CPU
response = requests.request("GET", url_prometheus, headers=headers_prometheus, params=query_node_cpu)
response_status = response.status_code
result=json.loads(response.text)

node_available_cpu = {}
for i in range(number_of_hosts):
     node_available_cpu[host_machines[i]] = format(float(result["data"]["result"][i]["value"][1]), '.4f')
node_available_cpu

{'gke-onlineboutique-default-pool-db17c72b-phxm': '0.6418',
 'gke-onlineboutique-default-pool-db17c72b-4hwg': '0.6869',
 'gke-onlineboutique-default-pool-db17c72b-jl1c': '0.6464',
 'gke-onlineboutique-default-pool-db17c72b-tds3': '0.6727'}

In [5]:
app_request = {"query":"sum(kube_pod_container_resource_requests_cpu_cores) by (node)"}

# cURL command for Node Ram Usage
response = requests.request("GET", url_prometheus, headers=headers_prometheus, params=app_request)
response_status = response.status_code
result=json.loads(response.text)

node_request_cpu = {}
for x in result['data']['result']:
    node_request_cpu[x['metric']['node']] = format(float(x['value'][1]), '.3f')
node_request_cpu

{'gke-onlineboutique-default-pool-db17c72b-jl1c': '0.731',
 'gke-onlineboutique-default-pool-db17c72b-phxm': '1.343',
 'gke-onlineboutique-default-pool-db17c72b-4hwg': '0.893',
 'gke-onlineboutique-default-pool-db17c72b-tds3': '0.831'}

In [6]:
app_request = {"query":"sum(kube_pod_container_resource_requests_memory_bytes) by (node)"}

# cURL command for Node Ram Usage
response = requests.request("GET", url_prometheus, headers=headers_prometheus, params=app_request)
response_status = response.status_code
result=json.loads(response.text)

node_request_ram = {}
for x in result['data']['result']:
    node_request_ram[x['metric']['node']] = format(float(x['value'][1]), '.3f')
node_request_ram

{'gke-onlineboutique-default-pool-db17c72b-tds3': '705691648.000',
 'gke-onlineboutique-default-pool-db17c72b-jl1c': '1063256064.000',
 'gke-onlineboutique-default-pool-db17c72b-phxm': '3210739712.000',
 'gke-onlineboutique-default-pool-db17c72b-4hwg': '851443712.000'}

In [7]:
app_request = {"query":"sum(kube_pod_container_resource_requests_memory_bytes{namespace='default'}) by (pod)"}

# cURL command for Node Ram Usage
response = requests.request("GET", url_prometheus, headers=headers_prometheus, params=app_request)
response_status = response.status_code
result=json.loads(response.text)

pod_request_ram = {}
for x in result['data']['result']:
    pod_request_ram[x['metric']['pod']] = format(float(x['value'][1]), '.3f')
pod_request_ram

{'productcatalogservice-7fcf4f8cc-jf2jg': '109051904.000',
 'checkoutservice-784bfc794f-qch75': '109051904.000',
 'currencyservice-5898885559-q4xh8': '109051904.000',
 'emailservice-6bd8b47657-ltptz': '109051904.000',
 'loadgenerator-84cbcd768c-5ktn6': '310378496.000',
 'frontend-764c5c755f-6f52w': '109051904.000',
 'recommendationservice-79f5f4bbf5-wqwff': '272629760.000',
 'cartservice-d7db78c66-27hmx': '109051904.000',
 'adservice-7cbc9bd9-fwqz7': '230686720.000',
 'shippingservice-b5879cdbf-5mlx7': '109051904.000',
 'paymentservice-6c676df669-z7s4d': '109051904.000',
 'redis-cart-74594bd569-6lw6x': '251658240.000'}

In [8]:
app_request = {"query":"sum(kube_pod_container_resource_requests_cpu_cores{namespace='default'}) by (pod)"}

# cURL command for Node Ram Usage
response = requests.request("GET", url_prometheus, headers=headers_prometheus, params=app_request)
response_status = response.status_code
result=json.loads(response.text)

pod_request_cpu = {}
for x in result['data']['result']:
    pod_request_cpu[x['metric']['pod']] = format(float(x['value'][1]), '.3f')
pod_request_cpu

{'currencyservice-5898885559-q4xh8': '0.110',
 'paymentservice-6c676df669-z7s4d': '0.110',
 'redis-cart-74594bd569-6lw6x': '0.080',
 'loadgenerator-84cbcd768c-5ktn6': '0.310',
 'shippingservice-b5879cdbf-5mlx7': '0.110',
 'checkoutservice-784bfc794f-qch75': '0.110',
 'frontend-764c5c755f-6f52w': '0.110',
 'recommendationservice-79f5f4bbf5-wqwff': '0.110',
 'cartservice-d7db78c66-27hmx': '0.210',
 'productcatalogservice-7fcf4f8cc-jf2jg': '0.110',
 'adservice-7cbc9bd9-fwqz7': '0.210',
 'emailservice-6bd8b47657-ltptz': '0.110'}

In [9]:
app_request = {"query":"kube_node_status_allocatable{resource='memory'}"}

# cURL command for Node Ram Usage
response = requests.request("GET", url_prometheus, headers=headers_prometheus, params=app_request)
response_status = response.status_code
result=json.loads(response.text)

node_allocated_ram = {}
for x in result['data']['result']:
    node_allocated_ram[x['metric']['node']] = x['value'][1]
node_allocated_ram

{'gke-onlineboutique-default-pool-db17c72b-4hwg': '6340206592',
 'gke-onlineboutique-default-pool-db17c72b-jl1c': '6340198400',
 'gke-onlineboutique-default-pool-db17c72b-phxm': '6340206592',
 'gke-onlineboutique-default-pool-db17c72b-tds3': '6340206592'}

In [10]:
app_request = {"query":"kube_node_status_allocatable{resource='cpu'}"}

# cURL command for Node Ram Usage
response = requests.request("GET", url_prometheus, headers=headers_prometheus, params=app_request)
response_status = response.status_code
result=json.loads(response.text)

node_allocated_cpu = {}
for x in result['data']['result']:
    node_allocated_cpu[x['metric']['node']] = x['value'][1]
node_allocated_cpu

{'gke-onlineboutique-default-pool-db17c72b-4hwg': '1.93',
 'gke-onlineboutique-default-pool-db17c72b-jl1c': '1.93',
 'gke-onlineboutique-default-pool-db17c72b-phxm': '1.93',
 'gke-onlineboutique-default-pool-db17c72b-tds3': '1.93'}

In [11]:
host_list = []
for host in node_allocated_cpu:
    host_list.append(host)
host_list

['gke-onlineboutique-default-pool-db17c72b-4hwg',
 'gke-onlineboutique-default-pool-db17c72b-jl1c',
 'gke-onlineboutique-default-pool-db17c72b-phxm',
 'gke-onlineboutique-default-pool-db17c72b-tds3']

In [12]:
# POD CPU USAGE
deployment_pods = []
pod_usage_cpu = {}
initial_placement = {}
for i in range(number_of_hosts):
    query_pod_cpu = {"query":"avg(rate(container_cpu_usage_seconds_total{kubernetes_io_hostname='"+str(host_machines[i])+"',pod!~'billowing.*', namespace='default'}[30m])) by (pod)"}
    pod_usage_cpu[host_machines[i]] = {}
    
    # cURL command for Pod Cpu Usage
    response = requests.request("GET", url_prometheus, headers=headers_prometheus, params=query_pod_cpu)
    response_status = response.status_code
    result=json.loads(response.text)
    
    initial_placement[host_machines[i]] = []
    service_list = []
    number_of_pods = len(result["data"]["result"])
    for k in range(number_of_pods):
         service_list.append(result["data"]["result"][k]["metric"]["pod"])
         initial_placement[host_machines[i]].append(service_list[k])
         pod_usage_cpu[host_machines[i]][service_list[k]] = format(float(result["data"]["result"][k]["value"][1]), '.4f')
    deployment_pods.append(service_list)
    service_list.clear()
initial_placement

{'gke-onlineboutique-default-pool-db17c72b-phxm': ['frontend-764c5c755f-6f52w',
  'recommendationservice-79f5f4bbf5-wqwff',
  'emailservice-6bd8b47657-ltptz',
  'loadgenerator-84cbcd768c-5ktn6'],
 'gke-onlineboutique-default-pool-db17c72b-4hwg': ['shippingservice-b5879cdbf-5mlx7',
  'adservice-7cbc9bd9-fwqz7'],
 'gke-onlineboutique-default-pool-db17c72b-jl1c': ['currencyservice-5898885559-q4xh8',
  'paymentservice-6c676df669-z7s4d',
  'checkoutservice-784bfc794f-qch75',
  'redis-cart-74594bd569-6lw6x'],
 'gke-onlineboutique-default-pool-db17c72b-tds3': ['productcatalogservice-7fcf4f8cc-jf2jg',
  'cartservice-d7db78c66-27hmx']}

In [13]:
# POD RAM USAGE
pod_usage_ram = {}

for i in range(number_of_hosts):
    query_pod_ram = {"query":"avg(container_memory_max_usage_bytes{instance='"+host_machines[i]+"', namespace='default', pod!~'billowing.*'}) by(pod)"}
    pod_usage_ram[host_machines[i]] = {}
    
    # cURL command for Pod Ram Usage
    response = requests.request("GET", url_prometheus, headers=headers_prometheus, params=query_pod_ram)
    response_status = response.status_code
    result=json.loads(response.text)
    
    number_of_pods = len(result["data"]["result"])
    for k in range(number_of_pods):
         pod = result["data"]["result"][k]["metric"]["pod"]
         pod_usage_ram[host_machines[i]][pod] = format(float(result["data"]["result"][k]["value"][1]), '.1f')
pod_usage_ram

{'gke-onlineboutique-default-pool-db17c72b-phxm': {'emailservice-6bd8b47657-ltptz': '45655040.0',
  'loadgenerator-84cbcd768c-5ktn6': '46078976.0',
  'frontend-764c5c755f-6f52w': '34457600.0',
  'recommendationservice-79f5f4bbf5-wqwff': '46362624.0'},
 'gke-onlineboutique-default-pool-db17c72b-4hwg': {'shippingservice-b5879cdbf-5mlx7': '30121984.0',
  'adservice-7cbc9bd9-fwqz7': '89632768.0'},
 'gke-onlineboutique-default-pool-db17c72b-jl1c': {'checkoutservice-784bfc794f-qch75': '33368064.0',
  'redis-cart-74594bd569-6lw6x': '26094592.0',
  'currencyservice-5898885559-q4xh8': '43339776.0',
  'paymentservice-6c676df669-z7s4d': '38755328.0'},
 'gke-onlineboutique-default-pool-db17c72b-tds3': {'cartservice-d7db78c66-27hmx': '49955840.0',
  'productcatalogservice-7fcf4f8cc-jf2jg': '31676416.0'}}

In [14]:
#Graph Integration from Kiali - Services and Affinities

# Url of Kiali Graph
url_kiali = "http://"+vm_external_ip+":"+str(kiali_port)+"/kiali/api/namespaces/graph"

query_string_kiali = {"duration":"30m","namespaces":namespace,"graphType":"workload"} # Graph type must be Wokload and i can change the graph duration

headers_kiali = {
    'cache-control': "no-cache"
}

# cURL command
response = requests.request("GET", url_kiali, headers=headers_kiali, params=query_string_kiali)

response_status = response.status_code

result=json.loads(response.text)
# INFO NOTE: redis-cart won't appear from kiali graph. There must be internal communication between car
#            cartservice and redis-cart so these two pods should be together and calculate as one
# Graph Services ID

services_id = {}
unused_services_id = {}
for i in range(len(result["elements"]["nodes"])):
    if(result["elements"]["nodes"][i]["data"]["namespace"] == namespace):
        if("app" not in result["elements"]["nodes"][i]["data"] or "traffic" not in result["elements"]["nodes"][i]["data"]):
            if("app" in result["elements"]["nodes"][i]["data"]):
                key = result["elements"]["nodes"][i]["data"]["id"]
                unused_services_id[key] = result["elements"]["nodes"][i]["data"]["app"]
                continue
            key = result["elements"]["nodes"][i]["data"]["id"]
            unused_services_id[key] = result["elements"]["nodes"][i]["data"]["service"]
            continue
        key = result["elements"]["nodes"][i]["data"]["id"]
        services_id[key] = result["elements"]["nodes"][i]["data"]["app"]

In [15]:
# Graph edges - Affinities
service_affinities = {}
service_response_times = {}
total_edjes =len(result["elements"]["edges"]) 
for i in range(total_edjes):
    source_id=result["elements"]["edges"][i]["data"]["source"] # Source ID
    destination_id=result["elements"]["edges"][i]["data"]["target"] # Destination ID
    # Avoid traces from unused services dictionary
    if((source_id in unused_services_id.keys()) or (destination_id in unused_services_id.keys())):
        continue
    
    # Track all traces in service id
    if((source_id in services_id.keys()) and (destination_id in services_id.keys())):
        if(services_id[source_id] not in service_affinities.keys()):
            service_affinities[services_id[source_id]] = {}
            service_response_times[services_id[source_id]] ={}
        if(result["elements"]["edges"][i]["data"]["traffic"]["protocol"] == "http"):
            protocol = "http"
        else:
            protocol = "grpc"
        service_affinities[services_id[source_id]][services_id[destination_id]] = result["elements"]["edges"][i]["data"]["traffic"]["rates"][protocol]
        service_response_times[services_id[source_id]][services_id[destination_id]] = result["elements"]["edges"][i]["data"]["responseTime"]
pprint.pprint(service_response_times)
pprint.pprint(service_affinities)

{'checkoutservice': {'cartservice': '9',
                     'currencyservice': '5',
                     'emailservice': '10',
                     'paymentservice': '7',
                     'productcatalogservice': '5',
                     'shippingservice': '5'},
 'frontend': {'adservice': '8',
              'cartservice': '8',
              'checkoutservice': '155',
              'currencyservice': '5',
              'productcatalogservice': '5',
              'recommendationservice': '10',
              'shippingservice': '5'},
 'loadgenerator': {'frontend': '95'},
 'recommendationservice': {'productcatalogservice': '5'}}
{'checkoutservice': {'cartservice': '0.14',
                     'currencyservice': '0.23',
                     'emailservice': '0.07',
                     'paymentservice': '0.07',
                     'productcatalogservice': '0.16',
                     'shippingservice': '0.14'},
 'frontend': {'adservice': '1.14',
              'cartservice': '1.75',
   

In [16]:
# Sort Affinities
sorted_service_affinities = service_affinities.copy()
for key in service_affinities:
    sorted_service_affinities[key] = dict(sorted(sorted_service_affinities[key].items(), key=operator.itemgetter(1),reverse=True))


# Assemble all affinities in one matrix in decent order
total_affinities = {}
for source_key in sorted_service_affinities:
    for destination_key in sorted_service_affinities[source_key]:
        total_affinities[source_key+"->"+destination_key] = float(sorted_service_affinities[source_key][destination_key])
total_affinities = dict(sorted(total_affinities.items(), key=operator.itemgetter(1),reverse=True))
total_affinities

{'frontend->productcatalogservice': 8.96,
 'frontend->currencyservice': 5.74,
 'loadgenerator->frontend': 1.96,
 'frontend->cartservice': 1.75,
 'recommendationservice->productcatalogservice': 1.39,
 'frontend->recommendationservice': 1.39,
 'frontend->adservice': 1.14,
 'frontend->shippingservice': 0.41,
 'checkoutservice->currencyservice': 0.23,
 'checkoutservice->productcatalogservice': 0.16,
 'checkoutservice->cartservice': 0.14,
 'checkoutservice->shippingservice': 0.14,
 'checkoutservice->emailservice': 0.07,
 'checkoutservice->paymentservice': 0.07,
 'frontend->checkoutservice': 0.07}

In [17]:
def modify_pod_requests(resource_dict):
    curr_dict = {}
    for services in resource_dict.keys():
        # Pattern: service_name-ID-SubID
        split_string = re.split("-", services)
        if(len(split_string) == 3):
            curr_service = split_string[0]
        else:
            curr_service = split_string[0] + '-'+ split_string[1]
                
        curr_dict[curr_service] = format(float(resource_dict[services]), '.3f')

    return curr_dict

In [18]:
def modify_pod_resources(resource_dict):
    curr_dict = {}
    for hosts in resource_dict:
        for services in resource_dict[hosts]:
            # Pattern: service_name-ID-SubID
            split_string = re.split("-", services)
            if(len(split_string) == 3):
                curr_service = split_string[0]
            else:
                curr_service = split_string[0] + '-'+ split_string[1]
                
            curr_dict[curr_service] = format(float(resource_dict[hosts][services]), '.3f')
           
    return curr_dict

In [19]:
# Heuristic Based Affinity Planner
# A modified First-Fit algorithm to produce a better service placement according to affinities and resources
# Input: Sorted Affinities, Resource demands and VM available resources
# Output: A new placement solution for current problem

In [20]:
moved_services = [] # List to store which services have been moved (source-destination services)
final_pod_cpu = modify_pod_requests(pod_request_cpu)
final_pod_ram = modify_pod_requests(pod_request_ram)
final_placement = copy.deepcopy(initial_placement)

final_node_available_cpu = {}
final_node_available_ram = {}
for host in host_list:
    final_node_available_cpu[host] = float(node_allocated_cpu[host]) - float(node_request_cpu[host])
    final_node_available_ram[host] = float(node_allocated_ram[host]) - float(node_request_ram[host])

print("----------INITIAL PLACEMENT------------")
pprint.pprint(final_placement)
print("NODE AVAILABLED CPU")
pprint.pprint(final_node_available_cpu)
print("NODE AVAILABLED RAM")
pprint.pprint(final_node_available_ram)
print("---------------------------------------")

for key in total_affinities:
   
    #Partition dictionary
    partition_key = key.partition('->')
    source_service = partition_key[0]
    dest_service = partition_key[2]
    
    # Initialize variables
    source_host = ""
    dest_host = ""
    available_node_source_cpu = 0.0
    available_node_source_ram = 0.0
    available_node_dest_cpu = 0.0
    available_node_dest_ram = 0.0
    source_cpu = 0.0
    source_ram = 0.0
    dest_cpu = 0.0
    dest_ram = 0.0
    
    #Find resources
    for host in host_list:
        for service in pod_usage_cpu[host]:
            # Pattern: service_name-ID-SubID
            split_string = re.split("-", service)
            if(len(split_string) == 3):
                curr_service = split_string[0]
            else:
                curr_service = split_string[0] + '-'+ split_string[1]
            
            #Gather Resources of Source Service
            if source_service == curr_service:
                source_cpu = float(final_pod_cpu[curr_service])
                source_ram = float(final_pod_ram[curr_service])
                available_node_source_cpu = float(final_node_available_cpu[host])
                available_node_source_ram = float(final_node_available_ram[host])
                source_host = host
                source_pod = service
                
            #Gather Resources of Destination Service
            if dest_service == curr_service:
                dest_cpu = float(final_pod_cpu[curr_service])
                dest_ram = float(final_pod_ram[curr_service])
                available_node_dest_cpu = float(final_node_available_cpu[host])
                available_node_dest_ram = float(final_node_available_ram[host])
                dest_host = host
                dest_pod = service
    

    # Check for same host
    if dest_host == source_host:
        continue # Proceed to next iteration
    else:
        moved_Flag = False
        # Check if destination service has already moved
        if dest_service not in moved_services:
            if((dest_cpu < available_node_source_cpu) and (dest_ram < available_node_source_ram)):
                # CPU resources update
                final_node_available_cpu[source_host] = float(final_node_available_cpu[source_host]) - dest_cpu
                final_node_available_cpu[dest_host] = float(final_node_available_cpu[dest_host]) + dest_cpu

                # RAM resources update
                final_node_available_ram[source_host] = float(final_node_available_ram[source_host]) - dest_ram
                final_node_available_ram[dest_host] = float(final_node_available_ram[dest_host]) + dest_ram

                # Host services transfer and update
                final_placement[dest_host].remove(dest_pod)
                final_placement[source_host].append(dest_pod)
                moved_Flag = True    
        # Check if source service has already moved
        elif source_service not in moved_services:
            if((source_cpu < available_node_dest_cpu) and (source_ram < available_node_dest_ram)):
                final_node_available_cpu[source_host] = float(final_node_available_cpu[source_host]) + source_cpu
                final_node_available_cpu[dest_host] = float(final_node_available_cpu[dest_host]) - source_cpu

                # RAM resources update
                final_node_available_ram[source_host] = float(final_node_available_ram[source_host]) + source_ram
                final_node_available_ram[dest_host] = float(final_node_available_ram[dest_host]) - source_ram

                # Host services transfer and update
                final_placement[source_host].remove(source_pod)
                final_placement[dest_host].append(source_pod)
                moved_Flag = True
            
    # If services "moved" then append them to list and continue
    if moved_Flag:
        moved_services.append(dest_service)
        moved_services.append(source_service)

print("")
print("------------FINAL PLACEMENT------------")
pprint.pprint(final_placement)
print("NODE AVAILABLED CPU")
pprint.pprint(final_node_available_cpu)
print("NODE AVAILABLED RAM")
pprint.pprint(final_node_available_ram)

----------INITIAL PLACEMENT------------
{'gke-onlineboutique-default-pool-db17c72b-4hwg': ['shippingservice-b5879cdbf-5mlx7',
                                                   'adservice-7cbc9bd9-fwqz7'],
 'gke-onlineboutique-default-pool-db17c72b-jl1c': ['currencyservice-5898885559-q4xh8',
                                                   'paymentservice-6c676df669-z7s4d',
                                                   'checkoutservice-784bfc794f-qch75',
                                                   'redis-cart-74594bd569-6lw6x'],
 'gke-onlineboutique-default-pool-db17c72b-phxm': ['frontend-764c5c755f-6f52w',
                                                   'recommendationservice-79f5f4bbf5-wqwff',
                                                   'emailservice-6bd8b47657-ltptz',
                                                   'loadgenerator-84cbcd768c-5ktn6'],
 'gke-onlineboutique-default-pool-db17c72b-tds3': ['productcatalogservice-7fcf4f8cc-jf2jg',
                