In [1]:
import numpy as np
import pandas as pd
import time

In [2]:
def policy(pod, latency):
    return pod + latency

In [3]:
'''
build_table(3, 5, 2000, 5)


pod initial_latency	end_latency	      3     4	    5
  3               0	       2000    2003  2004	 2005
  4               0	       2000    2004  2005	 2006
  5               0	       2000    2005  2006	 2007
  3            2001	       4000    4003  4004	 4005
  4            2001	       4000    4004  4005	 4006
  5            2001	       4000    4005  4006	 4007
  3            4001	       8000    8003  8004	 8005
  4            4001	       8000    8004  8005	 8006
  5            4001	       8000    8005  8006	 8007
  3            8001	      16000   16003 16004	16005
  4            8001	      16000   16004 16005	16006
  5            8001	      16000   16005 16006	16007
  3           16001	      32000   32003 32004	32005
  4           16001	      32000   32004 32005	32006
  5           16001	      32000   32005 32006	32007
'''
def build_table(min_pods, max_pods, initial_lat, interval_size):
    intervals = []
    intervals.append((0, initial_lat))
    for x in range(interval_size - 1):
        initial_interval_value = intervals[-1][1]
        last_interval_value = initial_interval_value * 2 
        intervals.append((initial_interval_value + 1, last_interval_value))

    table = []

    for y in intervals:
        for x in range(min_pods, max_pods + 1):
            options = np.zeros(max_pods + 1 - min_pods)
            options = [policy(z + x, y[1]) for z in range(len(options))]
            
            table.append([x, y[0], y[1]] + list(options))

    labels = ['pod', 'initial_latency', 'end_latency']
    actions = list(np.arange(min_pods, max_pods + 1).astype(np.str_))

    return pd.DataFrame(table, columns=labels+actions)

In [4]:
'''
find_option(table, 5, 3000)

   3	    4	    5
2003	 2004	 2005
2004	 2005	 2006
2005	 2006	 2007
'''
def find_options(table, pod, latency):
    return table[(table['pod'] == pod) & (table['initial_latency'] <= latency) & (table['end_latency'] >= latency)].iloc[:,3::]

In [5]:
'''
find_best_action(table, 5, 3000)

3
'''
def find_best_action(table, pod, latency):
    options = find_options(table, pod, latency)
    
    return int(np.min(options).head(1).index.item())

In [6]:
def update_action_result(table, pod, latency, action, result):
        table.loc[(table['pod'] == pod) & (table['initial_latency'] <= latency) & (table['end_latency'] >= latency), str(action)] = result
        print("updating pod", pod, " latency ", latency, " result ", result)
        return table

In [7]:
min_pods = 3
max_pods = 5
initial_latency = 2000
interval_size = 5
reward = 0

epsilon = 0.5

In [8]:
coeficiente_diel = 120000
pods = 1

In [9]:
def get_latency():
    return coeficiente_diel

In [10]:
def get_pods():
    global pods
    return pods

In [11]:
def set_pods(new_pods):
    global coeficiente_diel
    global pods
    pods = new_pods
    coeficiente_diel = coeficiente_diel/new_pods

In [12]:
table = build_table(min_pods, max_pods, initial_latency, interval_size)
table

Unnamed: 0,pod,initial_latency,end_latency,3,4,5
0,3,0,2000,2003,2004,2005
1,4,0,2000,2004,2005,2006
2,5,0,2000,2005,2006,2007
3,3,2001,4000,4003,4004,4005
4,4,2001,4000,4004,4005,4006
5,5,2001,4000,4005,4006,4007
6,3,4001,8000,8003,8004,8005
7,4,4001,8000,8004,8005,8006
8,5,4001,8000,8005,8006,8007
9,3,8001,16000,16003,16004,16005


In [13]:
for x in range(10):
    latency = get_latency()
    pods = get_pods()
    
    result = policy(pods, latency)
    
    print("\n--- Exec ", x)
    print("latency ", latency)
    print("pods ", pods)
    print("result ", result)
    
    if np.random.random() > epsilon:
            # GET THE ACTION
        action = find_best_action(table, pods, latency)
        print("choose action ", action)
    else:
        action = np.random.randint(min_pods, max_pods + 1)
        print("random ", action)
            
    set_pods(action)
#     time.sleep(3)
    
    new_latency = get_latency()
    new_result = policy(action, new_latency)
    
    print("new_latency ", new_latency)
    print("new_result ", new_result)        
    
#     global table
    table = update_action_result(table, pods, latency, action, new_result)
    
    if new_result < result:
        reward += 5
        print("reward ", reward)  
    else:
        reward -= 5
        print("reward ", reward)   


--- Exec  0
latency  120000
pods  1
result  120001
random  5
new_latency  24000.0
new_result  24005.0
updating pod 5  latency  120000  result  24005.0
reward  5

--- Exec  1
latency  24000.0
pods  5
result  24005.0
random  5
new_latency  4800.0
new_result  4805.0
updating pod 5  latency  24000.0  result  4805.0
reward  10

--- Exec  2
latency  4800.0
pods  5
result  4805.0
random  5
new_latency  960.0
new_result  965.0
updating pod 5  latency  4800.0  result  965.0
reward  15

--- Exec  3
latency  960.0
pods  5
result  965.0
random  4
new_latency  240.0
new_result  244.0
updating pod 4  latency  960.0  result  244.0
reward  20

--- Exec  4
latency  240.0
pods  4
result  244.0
random  3
new_latency  80.0
new_result  83.0
updating pod 3  latency  240.0  result  83.0
reward  25

--- Exec  5
latency  80.0
pods  3
result  83.0
choose action  3
new_latency  26.666666666666668
new_result  29.666666666666668
updating pod 3  latency  80.0  result  29.666666666666668
reward  30

--- Exec  6
lat