In [4]:
import numpy as np

def alexey_strategy(simulations, rounds, p):
    rewards = 0.0

    for _ in range(simulations):
        wins, losses = np.zeros(len(p), dtype=int), np.zeros(len(p), dtype=int)
        reward = 0
        for t in range(rounds):
            theta = [np.random.beta(1 + wins[i], 1 + losses[i]) for i in range(len(p))]
            chosen_arm = np.argmax(theta)
            if np.random.rand() < p[chosen_arm]:
                wins[chosen_arm] += 1
            else:
                losses[chosen_arm] += 1
            reward += np.random.rand() < p[np.argmax(theta)]
        rewards += reward

    return rewards / simulations


def polina_strategy(simulations, rounds, p):
    rewards = 0.0

    for _ in range(simulations):
        wins, losses = np.zeros(len(p), dtype=int), np.zeros(len(p), dtype=int)
        reward = 0
        for t in range(rounds):
            exp_values = np.array([(1 + w) / (2 + w + l) for w, l in zip(wins, losses)])
            best_arms = np.where(exp_values == np.max(exp_values))[0]
            if len(best_arms) == 1:
                chosen_arm = best_arms[0]
            else:
                chosen_arm = np.random.choice(best_arms)
            if np.random.rand() < p[chosen_arm]:
                wins[chosen_arm] += 1
            else:
                losses[chosen_arm] += 1
            reward += np.random.rand() < p[chosen_arm]
        rewards += reward

    return rewards / simulations


simulations, rounds, p = 10000, 200, [0.3, 0.4, 0.5]

alexey_result, polina_result = alexey_strategy(simulations, rounds, p), polina_strategy(simulations, rounds, p)

print(f"а) Ожидаемый выигрыш Алексея: {alexey_result:.2f} фридрихсдоров")
print(f"б) Ожидаемый выигрыш Полины: {polina_result:.2f} фридрихсдоров")

а) Ожидаемый выигрыш Алексея: 90.43 фридрихсдоров
б) Ожидаемый выигрыш Полины: 90.50 фридрихсдоров
