In [None]:
# Example Distribution model- HoaDNT@fe.edu.vn
import numpy as np

class Bandit:
    def __init__(self, true_means):
        self.true_means = true_means

    def pull_arm(self, arm):
        return np.random.normal(self.true_means[arm], 1)

class DistributionModel:
    def __init__(self, num_arms):
        self.num_arms = num_arms
        self.mean_rewards = np.zeros(num_arms)
        self.variance_rewards = np.ones(num_arms)

    def update_distribution(self, arm, reward):
        # Update mean and variance of rewards for the selected arm
        n = np.sum(self.mean_rewards != 0)
        self.mean_rewards[arm] = (self.mean_rewards[arm] * (n - 1) + reward) / n
        self.variance_rewards[arm] = np.var(reward)

# Define the true means of the bandit arms
true_means = [1.0, 2.0]

# Create a bandit environment with the true means
bandit = Bandit(true_means)

# Create a distribution model for the bandit
distribution_model = DistributionModel(len(true_means))

# Pull arms and update distribution model
num_pulls = 1000
for _ in range(num_pulls):
    arm = np.random.randint(len(true_means))  # Randomly select an arm to pull
    reward = bandit.pull_arm(arm)  # Pull the selected arm and observe reward
    distribution_model.update_distribution(arm, reward)  # Update distribution model

# Print the updated distribution model
print("Updated Distribution Model:")
print("Mean Rewards:", distribution_model.mean_rewards)
print("Variance of Rewards:", distribution_model.variance_rewards)