In [3]:
import numpy as np
import random

In [39]:
class EpsilonGreedy():

    def __init__(self, context_system_dict, epsilon):
        # For each context, a list of all configurations available
        # For each arm a mean reward value R and
        # the number of times an arm has been played N
        # {C1: {S1: {'R': 0, 'N', 0}, S2: {'R': 0, 'N': 0}...}, C2: {...}, ...}
        self.epsilon = epsilon

        self.context_dict = {}
        for context, system in context_system_dict.items():
            system_dict = {}
            for system_config in system:
                system_str = ''.join([str(i) for i in system_config])
                system_dict[system_str] = {'R': 0, 'N': 0} # R, N
            self.context_dict[context] = system_dict
            

    def select_arm(self, context):
        
        if np.random.rand() < self.epsilon:
            available_system_configs = self.context_dict[context]
            return random.choice(list(available_system_configs.keys()))
        else:
            context_arms = self.context_dict[context]
            best_arm_config = ''
            best_arm_R = -1
            for config, values in context_arms.items():
                if values['R'] > best_arm_R:
                    best_arm_config = config
                    best_arm_R = values['R']
            return best_arm_config

    def update_arm(self, context, configuration, reward):
        alpha = 0.3
        gamma = 0.9

        r = self.context_dict[context][configuration]['R']
        n = self.context_dict[context][configuration]['N']
        max_next_reward = 0
        # TODO context here needs to be derived from the extended context (current Context + chosen Configuration)
        for _, values in  self.context_dict[context].items():
            if values['R'] > max_next_reward:
                max_next_reward = values['R']
        self.context_dict[context][configuration]['R'] = r + alpha(reward + gamma * max_next_reward - r)
        self.context_dict[context][configuration]['N'] = n + 1
    

In [5]:

class Thompson():

    def __init__(self, n):
        self.R, self.N = self.init_bandit

    def init_bandit(n):
        R = np.zeros(n)
        N = np.zeros(n)
        return R, N

    def select_arm():
        pass

    def run_bandit():
        pass

In [6]:
import numpy as np

class ThompsonSamplingGaussian:
    def __init__(self, n_arms, known_variance=1.0):
        self.n_arms = n_arms
        self.known_variance = known_variance
        self.means = np.zeros(n_arms)  # Posterior means for each arm
        self.precision = np.ones(n_arms)  # Posterior precisions (inverse variance)

    def select_arm(self):
        samples = np.random.normal(self.means, 1 / np.sqrt(self.precision))
        return np.argmax(samples)

    def update(self, chosen_arm, reward):
        self.precision[chosen_arm] += 1 / self.known_variance
        self.means[chosen_arm] = ((self.means[chosen_arm] * (self.precision[chosen_arm] - 1)) + reward) / self.precision[chosen_arm]

# Define different reward distributions for each arm
true_means = [1.0, 2.0, -1.0, 0.5]  # Example means for each arm
true_variances = [0.5, 1.0, 2.0, 0.1]  # Example variances for each arm

# Simulate rewards from different distributions
def simulate_reward(arm):
    return np.random.normal(true_means[arm], np.sqrt(true_variances[arm]))

# Example usage
n_arms = len(true_means)
agent = ThompsonSamplingGaussian(n_arms)
frequency = [0 for _ in range(len(true_means))]
for t in range(1000):
    chosen_arm = agent.select_arm()
    frequency[chosen_arm] += 1
    reward = simulate_reward(chosen_arm)  # Simulated reward from the true distribution of the chosen arm
    agent.update(chosen_arm, reward)

# Printing out the estimated means and precisions after running the simulation
print("Estimated Means:", agent.means)
print("Precisions:", agent.precision)
print("Frequency", frequency)


Estimated Means: [ 0.89908517  2.00743351 -0.33847329  0.36681283]
Precisions: [  8. 988.   4.   4.]
Frequency [7, 987, 3, 3]


In [40]:
import time
%run swim.ipynb
%run feature_model_oo.ipynb

# run adaptation logic
feature_model = FM('swim_fm.json')
context_system_dict = feature_model.get_context_system_dictionary()
print(feature_model.ordered_names)
print(feature_model.system_ordered_names)
print(feature_model.context_ordered_names)

for context, config in context_system_dict.items():
    print(context, config)

swim_client = SwimClient()
swim_client.connect('localhost', 4242)
simulator_interface = SwimSimulatorInterface(swim_client, feature_model)


epsilon_greedy_cmab = EpsilonGreedy(context_system_dict, 0.9)

def servers_and_dimmer_from_config(config):
    servers = 0
    dimmer = 0
    for config_value, name in zip(config, feature_model.system_ordered_names):
        print(config_value, name)
        if ("servers_" in name) and (config_value == "1"):
            servers = feature_model.numerical_feature_name_to_value_range(name)[0]
        if ("dimmer_" in name) and (config_value == "1"):
            dimmer = feature_model.numerical_feature_name_to_value_range(name)[0]
    return servers, dimmer


for i in range(1):

    # 1. get context
    try:
        config, context, performance = simulator_interface.monitor()
        print(config, context, performance)
    except TypeError as err:
        print(err)

    # 2. get best next arm and reconfigure
    selected_arm = epsilon_greedy_cmab.select_arm(feature_model.translate_binary_context_to_str(context))
    print(selected_arm)
    # TODO send config to server
    print(servers_and_dimmer_from_config(selected_arm))
    #for selected_arm

    # 3. trigger for monitoring (delayed) - sleep until adapted
    # if number of servers changed -> sleep 60 + x secs
    # if dimmer changed -> sleep x secs
    # else sleep Y secs

    # 4. monitor performance
    config, context, performance = simulator_interface.monitor()

    # 5. update arms

swim_client.disconnect()


['root', 'system', 'context', 'servers', 'dimmer', 'requestArrivalRate', 'servers_0', 'servers_1', 'servers_2', 'servers_3', 'servers_4', 'servers_5', 'servers_6', 'servers_7', 'servers_8', 'servers_9', 'servers_10', 'dimmer_0', 'dimmer_1', 'dimmer_2', 'dimmer_3', 'dimmer_4', 'dimmer_5', 'dimmer_6', 'dimmer_7', 'dimmer_8', 'dimmer_9', 'requestArrivalRate_0', 'requestArrivalRate_25', 'requestArrivalRate_50', 'requestArrivalRate_75', 'requestArrivalRate_100']
['servers', 'dimmer', 'servers_0', 'servers_1', 'servers_2', 'servers_3', 'servers_4', 'servers_5', 'servers_6', 'servers_7', 'servers_8', 'servers_9', 'servers_10', 'dimmer_0', 'dimmer_1', 'dimmer_2', 'dimmer_3', 'dimmer_4', 'dimmer_5', 'dimmer_6', 'dimmer_7', 'dimmer_8', 'dimmer_9']
['requestArrivalRate', 'requestArrivalRate_0', 'requestArrivalRate_25', 'requestArrivalRate_50', 'requestArrivalRate_75', 'requestArrivalRate_100']
101000 [[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 0, 0, 0, 0, 

In [38]:
import time
%run swim.ipynb

class Arm():

    def __init__(self, r, n):
        self.r = r
        self.n = n

class EpsilonGreedyNoFM():

    def __init__(self, context_system_dict, epsilon):
        # For each context, a list of all configurations available
        # For each arm a mean reward value R and
        # the number of times an arm has been played N
        # {C1: {S1: {'R': 0, 'N', 0}, S2: {'R': 0, 'N': 0}...}, C2: {...}, ...}
        self.epsilon = epsilon

        self.context_dict = {}
        for context, system in context_system_dict.items():
            system_dict = {}
            for system_config in system:
                system_dict[(system_config['servers'], system_config['dimmer'])] = Arm(0, 0)
            self.context_dict[context] = system_dict

    def select_arm(self, context):
        
        if np.random.rand() < self.epsilon:
            available_system_configs = self.context_dict[context]
            return random.choice(list(available_system_configs.keys()))
        else:
            context_arms = self.context_dict[context]
            best_arm_config = ''
            best_arm_R = -100
            for config, arm in context_arms.items():
                if arm.r > best_arm_R:
                    best_arm_config = config
                    best_arm_R = arm.r
            return best_arm_config

    def update_arm(self, context, configuration, reward):
        alpha = 0.3
        gamma = 0.9

        r = self.context_dict[context][configuration].r
        n = self.context_dict[context][configuration].n
        max_next_reward = 0
        # TODO context here needs to be derived from the extended context (current Context + chosen Configuration)
        for _, arm in  self.context_dict[context].items():
            if arm.r > max_next_reward:
                max_next_reward = arm.r
        self.context_dict[context][configuration].r = r + alpha(reward + gamma * max_next_reward - r)
        self.context_dict[context][configuration].n = n + 1
    

class AdaptationLogic:

    MAX_SERVERS = 10
    DIMMER_INTERVALS = 10
    MAX_REQUEST_RATE = 60
    REQUEST_RATE_INTERVALS = 12

    def __init__(self):

        context_system_dict = {}
        configs = []
        for servers in range(self.MAX_SERVERS):
            for dimmer in range(self.DIMMER_INTERVALS):
                configs.append({'servers': servers, 'dimmer': dimmer / self.DIMMER_INTERVALS})
        for context in range(int(self.MAX_REQUEST_RATE / self.REQUEST_RATE_INTERVALS)):
            context_system_dict[int(context * self.MAX_REQUEST_RATE / self.REQUEST_RATE_INTERVALS)] = configs
        print(context_system_dict)

    def run(self, num_runs=1):
        swim_client = SwimClient()
        swim_client.connect('localhost', 4242)
        simulator_interface = SwimSimulatorInterfaceNoFM(swim_client)
        epsilon_greedy_cmab = EpsilonGreedyNoFM(context_system_dict, 0.9)

        servers = -1
        dimmer = -1
            
        for run in range(num_runs):
            # 1. get context
            monitored_values = simulator_interface.monitor_values()

            # 2. get best next arm and reconfigure
            current_request_arrival_rate = monitored_values.request_arrival_rate
            rounded_RAR = self.round_context_from_monitor(current_request_arrival_rate)
            print(rounded_RAR)
            selected_arm = epsilon_greedy_cmab.select_arm(rounded_RAR)

            # 3. trigger for monitoring (delayed) - sleep until adapted
            # if number of servers changed -> sleep 60 + x secs
            # if dimmer changed -> sleep x secs
            # else sleep Y secs

            # 4. monitor performance
            monitored_values = simulator_interface.monitor_values()

            # 5. update arms
        
        swim_client.disconnect()

    def round_context_from_monitor(self, request_rate):
        if request_rate > self.MAX_REQUEST_RATE:
            request_rate = self.MAX_REQUEST_RATE

        rounded_value = (request_rate // self.REQUEST_RATE_INTERVALS) * self.REQUEST_RATE_INTERVALS

        return int(rounded_value)
    
adaptation_logic = AdaptationLogic()
adaptation_logic.run()


{0: [{'servers': 0, 'dimmer': 0.0}, {'servers': 0, 'dimmer': 0.1}, {'servers': 0, 'dimmer': 0.2}, {'servers': 0, 'dimmer': 0.3}, {'servers': 0, 'dimmer': 0.4}, {'servers': 0, 'dimmer': 0.5}, {'servers': 0, 'dimmer': 0.6}, {'servers': 0, 'dimmer': 0.7}, {'servers': 0, 'dimmer': 0.8}, {'servers': 0, 'dimmer': 0.9}, {'servers': 1, 'dimmer': 0.0}, {'servers': 1, 'dimmer': 0.1}, {'servers': 1, 'dimmer': 0.2}, {'servers': 1, 'dimmer': 0.3}, {'servers': 1, 'dimmer': 0.4}, {'servers': 1, 'dimmer': 0.5}, {'servers': 1, 'dimmer': 0.6}, {'servers': 1, 'dimmer': 0.7}, {'servers': 1, 'dimmer': 0.8}, {'servers': 1, 'dimmer': 0.9}, {'servers': 2, 'dimmer': 0.0}, {'servers': 2, 'dimmer': 0.1}, {'servers': 2, 'dimmer': 0.2}, {'servers': 2, 'dimmer': 0.3}, {'servers': 2, 'dimmer': 0.4}, {'servers': 2, 'dimmer': 0.5}, {'servers': 2, 'dimmer': 0.6}, {'servers': 2, 'dimmer': 0.7}, {'servers': 2, 'dimmer': 0.8}, {'servers': 2, 'dimmer': 0.9}, {'servers': 3, 'dimmer': 0.0}, {'servers': 3, 'dimmer': 0.1}, {'s

TypeError: list indices must be integers or slices, not str