In [23]:
import yaml
import os
import subprocess

In [None]:
def bayesian_optimisation(n_iters, sample_loss, bounds, n_pre_samples=5,
                          random_search=False, alpha=1e-5, epsilon=1e-7):
    """ 
    Uses Gaussian Processes to optimise the loss function `sample_loss`.
    """

    x_list = []
    y_list = []

    n_params = bounds.shape[0]

 
    for params in np.random.uniform(bounds[:, 0], bounds[:, 1], (n_pre_samples, bounds.shape[0])):
        x_list.append(params)
        y_list.append(sample_loss(params))


    xp = np.array(x_list)
    yp = np.array(y_list)

    # Create the GP
    kernel = gp.kernels.Matern()
    model = gp.GaussianProcessRegressor(kernel=kernel,
                                        alpha=alpha,
                                            n_restarts_optimizer=10,
                                            normalize_y=True)

    for n in range(n_iters):

        model.fit(xp, yp)

        # Sample next hyperparameter
        if random_search:
            x_random = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(random_search, n_params))
            ei = -1 * expected_improvement(x_random, model, yp, greater_is_better=True, n_params=n_params)
            next_sample = x_random[np.argmax(ei), :]
        else:
            next_sample = sample_next_hyperparameter(expected_improvement, model, yp, greater_is_better=True, bounds=bounds, n_restarts=100)

        # Duplicates will break the GP. In case of a duplicate, we will randomly sample a next query point.
        if np.any(np.abs(next_sample - xp) <= epsilon):
            next_sample = np.random.uniform(bounds[:, 0], bounds[:, 1], bounds.shape[0])

        # Sample loss for new set of parameters
        cv_score = sample_loss(next_sample)

        # Update lists
        x_list.append(next_sample)
        y_list.append(cv_score)

        # Update xp and yp
        xp = np.array(x_list)
        yp = np.array(y_list)

    return xp, yp

In [None]:
def sample_loss(params):
    return cross_val_score(SVC(C=10 ** params[0], gamma=10 ** params[1], random_state=12345),
                           X=data, y=target, scoring='roc_auc', cv=3).mean()

In [None]:
bounds = np.array([[0, 10000], [0, 10000], [0, 10000],[0, 10000],[0, 10000]])

xp, yp = bayesian_optimisation(n_iters=30, 
                               sample_loss=sample_loss, 
                               bounds=bounds,
                               n_pre_samples=3,
                               random_search=100000)

In [24]:
config_file = "destination-rules.yaml"
kubectl_apply_cmd = "kubectl apply -f "

class IstioConfig:
    def __init__(self, file_name):
        self.file_name = file_name
        with open(file_name, "r") as stream:
            docs = yaml.load_all(stream, Loader=yaml.FullLoader)
            self.docs = list(docs)
    
    # save to yaml file
    def save(self):
        with open(self.file_name, "w") as stream:
            yaml.dump_all(self.docs, stream)
    
    # apply to Istio cluster
    def apply(self):
        self.save()
        os.system(kubectl_apply_cmd + self.file_name)
        
    # reset to default config (does not save)
    def reset(self):
        for i in range(self.size()):
            ic.docs[i]["spec"]["trafficPolicy"]["connectionPool"]["http"] = {}
            ic.docs[i]["spec"]["trafficPolicy"]["connectionPool"]["tcp"] = {}
    
    def size(self):
        return len(self.docs)
    
    def get_docs(self):
        return self.docs
    
    def _get_http(self, i):
        return ic.docs[i]["spec"]["trafficPolicy"]["connectionPool"]["http"]
    
    def _get_tcp(self, i):
        return ic.docs[i]["spec"]["trafficPolicy"]["connectionPool"]["tcp"]
    
    #### tcp ####
    # maxConnections: int32
    # 100
    def set_tcp_max_connections(self, i, val):
        self._get_tcp(i)["maxConnections"] = val
    
    # connectTimeout: int (unit ms)
    # 30
    def set_tcp_connect_timeout(self, i, val):
        self._get_tcp(i)["connectTimeout"] = str(val) + "ms"
        
    # TcpKeepalive time: int (unit ms)
    # 
    def set_tcp_keeplive_probes(self, i, val):
        val = str(val) + "ms"
        tcp = self._get_tcp(i)
        if "tcpKeepalive" in tcp:
            tcp["tcpKeepalive"]["time"] = val
        else:
            tcp["tcpKeepalive"] = {"time": val}
            
    # TcpKeepalive time: int (unit ms)
    # 7200s
    def set_tcp_keeplive_time(self, i, val):
        val = str(val) + "ms"
        tcp = self._get_tcp(i)
        if "tcpKeepalive" in tcp:
            tcp["tcpKeepalive"]["time"] = val
        else:
            tcp["tcpKeepalive"] = {"time": val}
            
    # TcpKeepalive interval: int (unit ms)
    # 75s
    def set_tcp_keeplive_interval(self, i, val):
        val = str(val) + "ms"
        tcp = self._get_tcp(i)
        if "tcpKeepalive" in tcp:
            tcp["tcpKeepalive"]["interval"] = val
        else:
            tcp["tcpKeepalive"] = {"interval": val}
    
    #### http ####
    # http1MaxPendingRequests: int32
    def set_http_http1_max_pending_requests(self, i, val):
        self._get_http(i)["http1MaxPendingRequests"] = val
    
    # http2MaxRequests: int32
    def set_http_http2_max_requests(self, i, val):
        self._get_http(i)["http2MaxRequests"] = val
    
    # maxRequestsPerConnection: int32
    def set_http_max_requests_per_connection(self, i, val):
        self._get_http(i)["maxRequestsPerConnection"] = val
        
    # maxRetries: int32
    def set_http_max_retries(self, i, val):
        self._get_http(i)["maxRetries"] = val
    
    # idleTimeout : int (unit ms)
    def set_http_idle_timeout(self, i, val):
        self._get_http(i)["idleTimeout"] = str(val) + "ms"
        
    # useClientProtocol: bool
    def set_http_use_client_protocol(self, i, val):
        self._get_http(i)["useClientProtocol"] = val

In [25]:
def generate_load(num_clients, num_reqs):
    cmd = "docker run load-test -h {url} -r {num_reqs} -c {num_clients}".format(url = GATEWAY_URL, num_reqs = num_reqs, num_clients = num_clients)
    return subprocess.getoutput(cmd)

In [26]:
from subprocess import Popen
from prometheus_api_client import PrometheusConnect

prometheus_host_cmd = ["/home/jupyter/.istioctl/bin/istioctl", "dashboard", "prometheus"]
prometheus_host = "http://localhost:9090"

# start a local Prometheus host
host = subprocess.Popen(prometheus_host_cmd)
client = PrometheusConnect(url =prometheus_host, disable_ssl=True)

Unable to listen on port 9090: Listeners failed to create with the following errors: [unable to create listener: Error listen tcp4 127.0.0.1:9090: bind: address already in use unable to create listener: Error listen tcp6 [::1]:9090: bind: address already in use]


http://localhost:45649


In [28]:
client = PrometheusConnect(url ="http://localhost:45649", disable_ssl=True)

In [29]:
GATEWAY_URL = '34.133.80.64:80'
config_file = "destination-rules.yaml"
kubectl_apply_cmd = "kubectl apply -f "
ic = IstioConfig(config_file)

In [37]:
import random
import time
def generate_data(ic, prometheus_client, num_data, num_clients, num_reqs, config_upper_bound):
        '''
            randomly config the istio
            generate load with {num_clients} clients and {num_reqs} requests
            save the performance from Prometheus
            repeat {num_data} times and get {num_data} data
            
            @return states, latencies
        '''
        NUM_ENDPOINTS = 14
        
        states = []
        performances = []
        
        # randomly config the istio
        ic.reset()
        
        for i in range(NUM_ENDPOINTS):
            if i != 4:
                random_vals = random.sample(range(1, config_upper_bound), 6)
                ic.set_tcp_max_connections(i, random_vals[0])
                ic.set_tcp_connect_timeout(i, random_vals[1])
                ic.set_http_http1_max_pending_requests(i, random_vals[2])
                ic.set_http_http2_max_requests(i, random_vals[3])
                ic.set_http_max_requests_per_connection(i, random_vals[4])
                ic.set_http_max_retries(i, random_vals[5])
                states.append(random_vals)

        ic.apply()
        
        # generate load
        print("num clients {}, num reqs {}".format(num_clients, num_reqs))
        
        output = generate_load(num_clients, num_reqs)
        #print(output)
        time.sleep(3)
        metrics = prometheus_client.custom_query(query="microservices_demo_user_request_latency_microseconds")
        if len(metrics) > 0:
            latencies_99_quantile = list([float(metrics[i]['value'][1]) for i in [2,5,8,14]])
        else:
            # cannot fetch performance metrics
            latencies_99_quantile = [float("nan"), float("nan"), float("nan"), float("nan")]
        performances.append(latencies_99_quantile)
        
        return states, performances

In [38]:
states, latencies = generate_data(ic, client, 10, 10, 200, 10000)

destinationrule.networking.istio.io/carts configured
destinationrule.networking.istio.io/carts-db configured
destinationrule.networking.istio.io/catalogue configured
destinationrule.networking.istio.io/catalogue-db configured
destinationrule.networking.istio.io/front-end unchanged
destinationrule.networking.istio.io/orders configured
destinationrule.networking.istio.io/orders-db configured
destinationrule.networking.istio.io/payment configured
destinationrule.networking.istio.io/queue-master configured
destinationrule.networking.istio.io/rabbitmq configured
destinationrule.networking.istio.io/session-db configured
destinationrule.networking.istio.io/shipping configured
destinationrule.networking.istio.io/user configured
destinationrule.networking.istio.io/user-db configured
num clients 10, num reqs 200


In [40]:
latencies

[[0.00926263, 0.010547914, 0.010141945000000001, 0.016321986]]

In [None]:
#Random Forest
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
# Define the hyperparameter configuration space
tune_params = {
    'http1MaxPendingRequests': sp_randint(0,10000), #2^32-1
    "http2MaxRequests":sp_randint(0,10000),
    'maxRequestsPerConnection': sp_randint(0,10000),
    "maxRetries":sp_randint(0,10000),
    "maxConnections":sp_randint(0,10000),
    # "idleTimeout":sp_randint(0,1000)
    # "useClientProtocol":['mse','mae']
}
n_iter_search=20 #number of iterations is set to 20, you can increase this number if time permits
clf = RandomForestRegressor(random_state=0)
Random = RandomizedSearchCV(clf, param_distributions=tune_params,n_iter=n_iter_search,cv=3,scoring='neg_mean_squared_error')
Random.fit(X, y)
print(Random.best_params_)
print("MSE:"+ str(-Random.best_score_))

In [None]:
#Random Forest
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
# Define the hyperparameter configuration space
tune_params = {
    'test1': sp_randint(0,10), #2^32-1
    "test2":sp_randint(0,10),
}
n_iter_search=20 #number of iterations is set to 20, you can increase this number if time permits
clf = RandomForestRegressor(random_state=0)
Random = RandomizedSearchCV(clf, param_distributions=tune_params,n_iter=n_iter_search,cv=3,scoring='neg_mean_squared_error')
Random.fit(X, y)
print(Random.best_params_)
print("MSE:"+ str(-Random.best_score_))

In [43]:
l1 = [0.013327259000000001, 0.014051314, 0.013654487000000002, 0.033338778]
l2 = [0.012540949000000001, 0.013926724000000001, 0.013654487000000002, 0.030811651000000002]

In [44]:
sum(l1) / 4

0.0185929595

In [45]:
sum(l2) / 4

0.017733452750000003

In [46]:

0.0185929595 - 0.017733452750000003

0.0008595067499999956

In [47]:
0.0008595067499999956 / 0.0185929595

0.046227538440020574

2021-12-13T01:14:20.712763Z	error	klog	lost connection to pod
