## Q values visualization
### $\rho^{*} = \lim_{N \rightarrow \infty} \frac{1}{N} \mathbb{E}_{\pi^*}\left[\sum_{i=1}^{N} c_i\right]$ which is the average 
### $Q^{\pi^*}(s, a) = \sum_{i=1}^{\infty} \mathbb{E}_{\pi^*}\big[c_i - \rho \ \big\vert s, a \big]$

In [None]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
from scipy.optimize import fsolve, minimize
from scipy.integrate import quad
from scipy import linspace, meshgrid, arange, empty, concatenate, newaxis, shape
from collections import deque
import nbimporter

In [1]:
# Returns transition probabilities from any state (n1, n2)
def transition_probabilities(model_pars, state, action):
    print(f"action type: {type(action)}, action value: {action}")
    if isinstance(action, np.ndarray):
        print(f"action shape: {action.shape}")
    
    lam, mu, cores, p1, p2, alpha, M = model_pars
    n1, n2 = state
    
    action = float(action)
    
    p1 = lam*alpha if n1 < M else 0
    p2 = lam*(1-alpha) if n2 < M else 0
    p3 = min(n1, cores*action)*mu*speed_up(p1, cores*action/n1) if n1 > 0 else 0
    p4 = min(n2, cores*(1-action))*mu*speed_up(p2, cores*(1-action)/n2) if n2 > 0 else 0
    p5 = 1 - p1 - p2 - p3 - p4
    return [p1, p2, p3, p4, p5]

# Given current state, it samples the next state under the optimal core allocation policy
# which is evaluated by solving the Bellman optimality equation
def next_state(model_pars, current_state, action):
    n1, n2 = current_state
    
    possible_next_states = [[n1+1, n2], [n1, n2+1], [n1-1, n2], [n1, n2-1], [n1, n2]]
    indices = [0, 1, 2, 3, 4]
    probabilities = transition_probabilities(model_pars, current_state, action)
    
    next_state = possible_next_states[np.random.choice(indices, size = 1, p = probabilities)[0]]
    return next_state

# Calculates average cost under policy pi
def empirical_average_cost(model_pars, pi):
    state = [0, 0]
    empirical_total_cost = 0
    n_iter = 100000
    for i in range(n_iter):
        action = pi[state[0], state[1]]
        state = next_state(model_pars, state, action)
        empirical_total_cost += (state[0] + state[1])
    empirical_avg_cost = empirical_total_cost/n_iter
    return empirical_avg_cost

def Q(model_pars, avg_cost, pi):
    lam, mu, cores, p1, p2, alpha, M = model_pars
    
    Q_values = np.zeros((M+1, M+1))
    
    n_iter = 10000
    for i in range(M+1):
        for j in range(M+1):
            state = [i, j]
            # print("State = ", state)
            total_relative_cost = 0
            for k in range(n_iter):
                state = next_state(model_pars, state, pi)
                total_relative_cost += (state[0] + state[1]) - avg_cost
            avg_relative_cost = total_relative_cost/n_iter
            Q_values[i,j] = avg_relative_cost
        print("Q values for n1 = ", i, " are done")
    
    return Q_values

In [None]:
empirical_avg_cost = empirical_average_cost(model_pars, pi_optimal)
print("Average cost = ", empirical_avg_cost)
print(" ")
Q_values = Q(model_pars, empirical_avg_cost, pi_optimal)

In [None]:
# Create meshgrid for i, j values
n1_values = np.arange(M+1)
n2_values = np.arange(M+1)
N1, N2 = np.meshgrid(n1_values, n2_values)

# Create 3D plot
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')

# Plot the surface
ax.plot_surface(N1, N2, Q_values, cmap='viridis')

# Labels and title
ax.set_xlabel('n1')
ax.set_ylabel('n2')
ax.set_zlabel('Q[n1,n2]')
ax.set_title('3D Plot of Q[n1,n2]')

plt.show()

In [None]:
x = np.arange(M+1)
for i in [0, 20]:
    y = Q_values[i, :]  # Select the row corresponding to i
    plt.plot(x, y, label=f"Q[n1, n2] for n1 = {i}")

plt.xlabel("n2")
plt.ylabel("Q[n1, n2]")
plt.title("Q-values for different n1")
plt.legend()
plt.show()