### Online content Recommendation System

In [1]:
import random
import math


class User:
    def __init__(self, user_id):
        self.user_id = user_id
        self.preferences = {}  # content_id -> (estimated reward, number of impressions)

    def update_preference(self, content_id, reward):
        if content_id not in self.preferences:
            self.preferences[content_id] = (0, 0)
        old_reward, num_impressions = self.preferences[content_id]
        self.preferences[content_id] = (old_reward + reward, num_impressions + 1)

    def get_preference(self, content_id):
        return self.preferences.get(content_id, (0, 0))


class Content:
    def __init__(self, content_id, category):
        self.content_id = content_id
        self.category = category


class UCB1Bandit:
    def __init__(self, exploration_constant=1.0):
        self.exploration_constant = exploration_constant

    def choose_content(self, user, content_options):
        ucb_values = {}
        total_impressions = sum(
            user.preferences.get(c.content_id, (0, 0))[1] for c in content_options
        )

        for content in content_options:
            reward, impressions = user.get_preference(content.content_id)
            if impressions == 0:
                ucb_values[content] = float("inf")  # Explore untested content
            else:
                ucb_values[content] = (
                    reward / impressions
                    + self.exploration_constant
                    * math.sqrt(math.log(total_impressions) / impressions)
                )

        return max(ucb_values, key=ucb_values.get)


# Example Usage
users = {
    1: User(1),
    2: User(2),
}

contents = {
    "A": Content("A", "News"),
    "B": Content("B", "Sports"),
    "C": Content("C", "Tech"),
}

bandit = UCB1Bandit()

for user_id in users:
    user = users[user_id]
    for _ in range(10):  # Simulate 10 rounds of recommendations
        available_contents = list(contents.values())
        chosen_content = bandit.choose_content(user, available_contents)
        reward = random.uniform(0, 1)  # Simulate a random reward
        print(f"User {user_id} recommended: {chosen_content.content_id}, Reward: {reward:.2f}")
        user.update_preference(chosen_content.content_id, reward)

# Print final preferences
for user_id in users:
    print(f"User {user_id} Preferences: {users[user_id].preferences}")


User 1 recommended: A, Reward: 0.78
User 1 recommended: B, Reward: 0.62
User 1 recommended: C, Reward: 0.28
User 1 recommended: A, Reward: 0.99
User 1 recommended: B, Reward: 0.39
User 1 recommended: A, Reward: 0.94
User 1 recommended: A, Reward: 0.81
User 1 recommended: C, Reward: 0.00
User 1 recommended: A, Reward: 0.35
User 1 recommended: B, Reward: 0.38
User 2 recommended: A, Reward: 0.08
User 2 recommended: B, Reward: 0.79
User 2 recommended: C, Reward: 0.04
User 2 recommended: B, Reward: 0.17
User 2 recommended: B, Reward: 0.27
User 2 recommended: A, Reward: 0.69
User 2 recommended: C, Reward: 0.69
User 2 recommended: A, Reward: 0.29
User 2 recommended: C, Reward: 0.10
User 2 recommended: B, Reward: 0.31
User 1 Preferences: {'A': (3.869092427980904, 5), 'B': (1.3836313969589782, 3), 'C': (0.2826405323337502, 2)}
User 2 Preferences: {'A': (1.0541793168437796, 3), 'B': (1.539280713377267, 4), 'C': (0.8255414437975654, 3)}
