In [1]:
# Class-type for the medical bottles experiment
# First version by JAGR.
# Sample average method to update action-values (Q-values).  

import random

class bottle:
    def __init__(self, name, real_prob):
        # Constructor: it executes when you create a new bottle
        # Here we define the initial states of the bottle. Attributes.
        self.name = name
        self.real_prob = real_prob

        self.q_value = 0.0  # Initial estimate of the value of the bottle
        self.count = 0      # Number of times the bottle has been used
        self.accum_reward = 0.0  # Total reward obtained from this bottle

    def try_bottle(self):
        # Simulate trying the bottle
        if random.random() < self.real_prob:
            return 1  # Reward of 1 if successful
        else:
            return 0  # Reward of 0 if not successful

    def learn(self, reward):
        # Update the Q-value based on the received reward
        self.count += 1
        self.accum_reward += reward
        if self.count > 0:
            self.q_value = self.accum_reward / self.count  # Update Q-value as average reward
    
    def __str__(self):
        return f"Bottle {self.name} -> Q: {self.q_value:.4f} (Chosen {self.count} times)"
    


In [None]:
# Create bottles with different real probabilities

bottles = [
    bottle("P", 0.25), 
    bottle("Y", 0.75), 
    bottle("B", 0.50)
    ]

n_steps = 10000
for t in range(1, n_steps+1):
    selected_bottle = random.choice(bottles)
    reward = selected_bottle.try_bottle()
    selected_bottle.learn(reward)
    #print(f"Iteration {t}: Tried bottle {selected_bottle.name}, Reward: {reward}, Q-value: {selected_bottle.q_value:.4f}, Count: {selected_bottle.count}, Accumulated Reward: {selected_bottle.accum_reward}")

for i in bottles:
    print(i)
