In [1]:
#Problem statement:
#Implementing a recommendation system using K-arm bandit.
#The use case considered is a movie recommendation system which can recommend a movie to a user based on what genre of movies he prefers to watch.

In [None]:
import random

# Define the genres
genres = ['action', 'comedy', 'drama', 'sci-fi', 'horror']

# Initialize reward estimates and counts
reward_estimates = {genre: 0.0 for genre in genres}
counts = {genre: 0 for genre in genres}

def choose_genre(epsilon):
    """
    Choose a genre based on epsilon-greedy strategy.
    
    Args:
    epsilon (float): Exploration probability.
    
    Returns:
    str: Chosen genre.
    """
    if random.random() < epsilon:
        # Explore: Choose a random genre
        return random.choice(genres)
    else:
        # Exploit: Choose the genre with the highest estimated reward
        return max(reward_estimates, key=reward_estimates.get)
def update_reward(genre, reward):
    """
    Update the reward estimate for a given genre.
    
    Args:
    genre (str): The genre to update.
    reward (int): The reward received (0 or 1).
    """
    counts[genre] += 1
    # Update the reward estimate using incremental average formula
    reward_estimates[genre] += (reward - reward_estimates[genre]) / counts[genre]

def simulate_recommendation_process(n_interactions, epsilon=0.4):
    """
    Simulate a movie recommendation process using epsilon-greedy strategy.
    
    Args:
    n_interactions (int): Number of recommendations to simulate.
    epsilon (float): Exploration probability, default is 0.4.
    """
    for _ in range(n_interactions):
        genre = choose_genre(epsilon)
        # Simulating a reward based on user engagement (assume binary reward for simplicity)
        reward = random.choice([0, 1])  
        update_reward(genre, reward)
        print(f"Recommended: {genre}, Reward: {reward}, Updated Estimates: {reward_estimates}")

# Example usage
simulate_recommendation_process(20, epsilon=0.3)