In [1]:
import numpy as np
import random

class MovieRecommenderKArmBandit:
    def __init__(self, num_movies, epsilon=0.1):
        self.num_movies = num_movies  # Total number of movies (arms)
        self.epsilon = epsilon        # Exploration rate
        self.movie_rewards = np.zeros(num_movies)  # Sum of rewards per movie
        self.movie_counts = np.zeros(num_movies)   # Count of selections per movie

    def recommend_movie(self):
        # Epsilon-greedy strategy
        if random.random() < self.epsilon:
            # Explore: Select a random movie
            movie_id = random.randint(0, self.num_movies - 1)
        else:
            # Exploit: Select movie with highest average reward
            average_rewards = self.movie_rewards / (self.movie_counts + 1e-5)  # Avoid division by zero
            movie_id = np.argmax(average_rewards)
        return movie_id

    def update_feedback(self, movie_id, reward):
        # Update reward sum and count for the selected movie
        self.movie_rewards[movie_id] += reward
        self.movie_counts[movie_id] += 1

    def print_recommendation_stats(self):
        # Print average reward for each movie
        average_rewards = self.movie_rewards / (self.movie_counts + 1e-5)
        for i in range(self.num_movies):
            print(f"Movie {i}: Average Reward = {average_rewards[i]:.2f}, Counts = {int(self.movie_counts[i])}")

# Simulate a recommendation environment
def simulate_movie_recommendation(recommender, num_rounds=1000):
    # Define a true probability of engagement for each movie (ground truth for the simulation)
    true_engagement_rates = np.random.rand(recommender.num_movies)
    print("True Engagement Rates for Movies:", true_engagement_rates)

    for _ in range(num_rounds):
        recommended_movie = recommender.recommend_movie()
        
        # Simulate user feedback based on true engagement rates
        reward = 1 if random.random() < true_engagement_rates[recommended_movie] else 0
        
        # Update the bandit with the observed reward
        recommender.update_feedback(recommended_movie, reward)

# Define the number of movies
num_movies = 10  

# Initialize the recommender system
recommender = MovieRecommenderKArmBandit(num_movies, epsilon=0.1)

# Run the simulation
simulate_movie_recommendation(recommender, num_rounds=1000)

# Print recommendation stats
print("\nRecommendation Statistics:")
recommender.print_recommendation_stats()


True Engagement Rates for Movies: [0.84158406 0.42403806 0.59145165 0.19289109 0.19929931 0.38544966
 0.06423425 0.29916287 0.90534737 0.28666259]

Recommendation Statistics:
Movie 0: Average Reward = 0.87, Counts = 158
Movie 1: Average Reward = 0.43, Counts = 7
Movie 2: Average Reward = 0.46, Counts = 13
Movie 3: Average Reward = 0.08, Counts = 12
Movie 4: Average Reward = 0.10, Counts = 10
Movie 5: Average Reward = 0.50, Counts = 8
Movie 6: Average Reward = 0.50, Counts = 4
Movie 7: Average Reward = 0.40, Counts = 5
Movie 8: Average Reward = 0.91, Counts = 772
Movie 9: Average Reward = 0.18, Counts = 11
