In [17]:
import numpy as np
from logger import setup_logger

In [24]:
rl_logger = setup_logger("Agent")

# Create Bandit Class

In [19]:
class Bandit :
    def __init__(self, k: int=10) :
        self.k = k
        self.q_star = np.random.normal(loc=0.0, scale=1.0, size=k)
        self.optimal_action = np.argmax(self.q_star)
    
    def step(self, action: int) -> float :
        true_value = self.q_star[action]
        return np.random.normal(loc=true_value, scale=1.0)

In [20]:
bandit = Bandit(k=10)

### Bandit Info

In [28]:
print(f"#Arms: {bandit.k}\n")

for i, val in enumerate(bandit.q_star) :
    print(f"q*({i}) = {val:.2f}")

print(f"\nOptimal Action: {bandit.optimal_action}")

#Arms: 10

q*(0) = 0.82
q*(1) = -1.52
q*(2) = -0.43
q*(3) = -0.74
q*(4) = -0.70
q*(5) = -2.14
q*(6) = -0.63
q*(7) = 0.60
q*(8) = 2.56
q*(9) = 0.39

Optimal Action: 8


In [21]:
np.random.seed(42)

# Approach: Random Policy

In [25]:
for t in range(5) :
    action = np.random.choice(10)
    reward = bandit.step(action)
    rl_logger.info(f"Reward: {reward}")
    rl_logger.info(f"Action: {action}")
    rl_logger.info("Optimal" if action == bandit.optimal_action else "Not Optimal")

2025-05-12 18:25:45,125 - Agent - INFO - Reward: -1.622660734104696
2025-05-12 18:25:45,127 - Agent - INFO - Action: 0
2025-05-12 18:25:45,127 - Agent - INFO - Not Optimal
2025-05-12 18:25:45,128 - Agent - INFO - Reward: 0.17539516086189688
2025-05-12 18:25:45,128 - Agent - INFO - Action: 2
2025-05-12 18:25:45,129 - Agent - INFO - Not Optimal
2025-05-12 18:25:45,129 - Agent - INFO - Reward: -0.6539301384183166
2025-05-12 18:25:45,130 - Agent - INFO - Action: 6
2025-05-12 18:25:45,130 - Agent - INFO - Not Optimal
2025-05-12 18:25:45,130 - Agent - INFO - Reward: -0.34779248458497514
2025-05-12 18:25:45,131 - Agent - INFO - Action: 4
2025-05-12 18:25:45,131 - Agent - INFO - Not Optimal
2025-05-12 18:25:45,131 - Agent - INFO - Reward: -0.9531689153062436
2025-05-12 18:25:45,132 - Agent - INFO - Action: 2
2025-05-12 18:25:45,132 - Agent - INFO - Not Optimal
