# Optimal Resource Allocation in Public and Private Cloud

## Define System Model (MDP)

In [None]:
class System: # represents the MDP
    
    def __init__(self, E, Pefc) -> None:
        self.E = E # total VMs at edge
        self.H = [] # Record List
        self.Pe, self.Pf, self.Pc = Pefc # cost parameters
        self.verbose=False
        
    def reset(self):
        self.et = self.E # currently avaialble VMs
        self.H.clear() # clear record list
        self.t = 0   # time step
        return self

    def step(self, d, l, a): 
        # represents handling one user demand
        self.t+=1
        verb = self.verbose
        # d = no of vms requesterd, 
        # l = duration requested
        # a = action (ratio of vms allocated from cloud)
        if verb: 
            print(f'{self.t=}')
            print(f'\tDemand, {d, l}')
            print(f'\tAction, {a}')
        
        c = int(d*a) # vms allocated from cloud
        e = d - c    # vms allocated from edge
        r = self.et - e # remaining VMs after allocation
        # check if enough vms available?
        if r<0:
            e = self.et # allocated all from edge
            c += (-r) # take remaining from cloud
        self.et -= e
        if verb: 
            print(f'\tAllocated from Cloud, {c=}')
            print(f'\tAllocated from Edge, {e=}')
            print(f'\tRemaining, {self.et=}')
        # generate allocation record 
        if e > 0:
            self.H.append([e, l]) #<-- appending a list to a list
            if verb:
                print(f'\t\tAllocation Record, {[e,l]}')
                print(f'\t\tAllocation Record List {self.H=}')    

        # cost at edge node
        Ce = (self.E-self.et)*self.Pf + (self.et)*self.Pe

        # cost at private cloud
        Cpri = c*self.Pc + Ce
        if verb:
            print(f'\tCost At Edge Node, {Ce=}')
            print(f'\tCost At Private Cloud, {Cpri=}')

        #<------------------------------- round 

        # update allocation record
        for el in self.H: el[-1]-=1
        # release exsisting VMs
        # remove completed records
        i=0
        n=0 # no of busy vms (waiting to be released)
        while i < len(self.H):
            if self.H[i][-1]==0: 
                n+=self.H[i][0] # reclaim
                del self.H[i]   # remove
            else: i+=1 # skip
        
        if verb:
            print(f'\tUpdated Allocation Record List {self.H=}')  
            print(f'\tVMs waiting to be released {n=}')

        self.et += n # update available
        if verb: print(f'\tVMs available at next time slot {self.et=}')
        return Cpri


## Define Environment for RL

In [None]:
import numpy as np
class Environment: # Encapsulates an MDP for agent interaction
    def __init__(self, E, D, L, T, Pefc, seed=None ) -> None:
        self.E, self.D, self.L, self.T = E, D, L, T

        self.nS = self.T*self.D*self.L*(self.E+1)
        self.nD = self.D*self.L
        self.DL = np.array([ [(d,l) for l in range(1, L+1)] for d in range(1, D+1)])
        self.DL_ = self.DL.reshape(self.DL.shape[0]*self.DL.shape[1], self.DL.shape[2])
        self.dls = np.arange(len(self.DL_))
        self.S = np.zeros(4, dtype=int) #(etdl)
        self.A=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
        self.nA = len(self.A)
        self.sim = System(E, Pefc)
        self.rng = np.random.default_rng(seed)
    
    def reset(self):
        self.sim.reset()
        self.S[0] = self.sim.et
        self.S[1] = self.sim.t
        self.S[2:] = self.DL_[self.rng.choice(self.dls)]
        return self.S

    def step(self, action):
        #print(f'{self.S=}, {self.A=}::{action=}')
        cost = self.sim.step(d=self.S[2], l=self.S[3], a=self.A[action])
        done = not(self.sim.t < self.T)
        self.S[0] = self.sim.et
        self.S[1] = self.sim.t
        self.S[2:] = 0 if done else self.DL_[self.rng.choice(self.dls)]
        return self.S, float(-cost), done

## Initialize Environment

In [None]:
env = Environment(E=19, D=4, L=3, T=3, Pefc=(0.03, 0.20, 3.00), seed=13)
env.sim.verbose=False
env.__dict__

## Q-Learning Algorithm

In [None]:
def Q_Learning(
        mdp,    # formulated mdp 
        πe,     # behaviour policy
        α,      # learning rate
        γ,      # discount factor
        N,      # number of learning rounds
    ):
    Q = {}  # initialize Q-Table
    s = tuple(mdp.reset()) # reset the mdp and obtain initial state
    if s not in Q: 
        # add obtained state to Q-Table and initalize values as zeros
        Q[s] = [0.0 for _ in range(mdp.nA)] 

    for n in range(N): # learning loop
        a = πe(s) # select action using behaviour policy
        s_, r,  done = mdp.step(a) # obtain reward(r) and next state(s_)
        s_ = tuple(s_)
        if s_ not in Q: 
            # add obtained next state to Q-Table and initalize values as zeros
            Q[s_] = [0.0 for _ in range(mdp.nA)]
        
        Q[s][a] = (1-α) * Q[s][a] + (α) * (r + γ * max(Q[s_])) # update Q-values
        if done: # final state reached?
            s = tuple(mdp.reset()) # reset the mdp
            if s not in Q: Q[s] = [0.0 for _ in range(mdp.nA)]
        else:
            s = s_  # continue to next time step
    return Q


### Run Q-Learning Algorithm

In [None]:
q = Q_Learning(env, lambda s: int(np.random.randint(0, env.nA)), 0.5, 1.0, 1000)

### Learnt Q-Table

In [None]:
q

### Learnt policy

In [None]:
def Q_Policy(mdp, Q):
    s = tuple(mdp.reset())
    done = False
    ret = 0.0
    while not done:
        q = Q[s]
        a = np.argmax(q)
        s_, r,  done = mdp.step(a)
        s_ = tuple(s_)
        ret+=r
        print(f'{s=}, {q=}, {a=}, {r=}, {s_=}, {done=}, {ret=}')
    return ret

### Run Learnt policy

In [None]:
ret = Q_Policy(env, q)
print(ret)

# Environment Simulation

(for worked out examples)

In [None]:
S = System(E=80, Pefc=(0.03, 0.20, 3.00)).reset()
print(S.__dict__)

In [None]:
cost = S.step(d=30, l=2, a=0.4)
print(S.__dict__)

In [None]:
cost = S.step(d=10, l=1, a=0.7)
print(S.__dict__)

In [None]:
cost = S.step(d=20, l=2, a=0.8)
print(S.__dict__)