In [1]:
import numpy as np
import random

class KArmBandit:
    def __init__(self, k, epsilon=0.1):
        self.k = k  # Number of articles (arms)
        self.epsilon = epsilon  # Exploration probability
        self.q_values = np.zeros(k)  # Estimated value of each arm
        self.arm_counts = np.zeros(k)  # Counts for each arm

    def select_arm(self):
        # Epsilon-greedy policy
        if random.random() < self.epsilon:
            return random.randint(0, self.k - 1)  # Explore: random selection
        else:
            return np.argmax(self.q_values)  # Exploit: select best arm

    def update_q_value(self, arm, reward):
        # Update the action-value estimate using incremental formula
        self.arm_counts[arm] += 1
        self.q_values[arm] += (reward - self.q_values[arm]) / self.arm_counts[arm]

    def simulate_rewards(self, views_range=(1, 100)):
        """
        Simulates views for each arm as a random number within a specified range.
        In a real system, this would be based on actual view counts.
        """
        return np.random.randint(views_range[0], views_range[1], size=self.k)



In [4]:
# Parameters
num_arms = 10  # Number of articles
num_iterations = 1000
bandit = KArmBandit(num_arms)

# Simulate user views for each article (reward)
for _ in range(num_iterations):
    # Select an article (arm) to promote
    chosen_arm = bandit.select_arm()
    
    # Simulate view counts for each article as reward
    rewards = bandit.simulate_rewards()
    reward = rewards[chosen_arm]
    
    # Update the Q-value (estimated reward) for the chosen arm
    bandit.update_q_value(chosen_arm, reward)

    # Print or log data as needed (optional)
    print(f"Chosen Article: {chosen_arm}, Views: {reward}, Q-Values: {bandit.q_values}")

# Final Q-values (estimated views for each article)
print("Estimated views for each article:", bandit.q_values)


Chosen Article: 0, Views: 15, Q-Values: [15.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Chosen Article: 0, Views: 20, Q-Values: [17.5  0.   0.   0.   0.   0.   0.   0.   0.   0. ]
Chosen Article: 0, Views: 54, Q-Values: [29.66666667  0.          0.          0.          0.          0.
  0.          0.          0.          0.        ]
Chosen Article: 5, Views: 85, Q-Values: [29.66666667  0.          0.          0.          0.         85.
  0.          0.          0.          0.        ]
Chosen Article: 5, Views: 50, Q-Values: [29.66666667  0.          0.          0.          0.         67.5
  0.          0.          0.          0.        ]
Chosen Article: 5, Views: 59, Q-Values: [29.66666667  0.          0.          0.          0.         64.66666667
  0.          0.          0.          0.        ]
Chosen Article: 5, Views: 19, Q-Values: [29.66666667  0.          0.          0.          0.         53.25
  0.          0.          0.          0.        ]
Chosen Article: 5, Views: 16, Q-Values: 