<a href="https://colab.research.google.com/github/LuisPJ/An-lisis-de-datos/blob/main/Algoritmo_%CE%B5_Greedy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

class EpsilonGreedy:
    def __init__(self, n_restaurants, epsilon):
        self.n_restaurants = n_restaurants
        self.epsilon = epsilon
        self.visits = np.zeros(n_restaurants)
        self.satisfaction = np.zeros(n_restaurants)

    def choose_restaurant(self):
        if np.random.random() < self.epsilon:
            return np.random.choice(self.n_restaurants)  # Explore
        else:
            return np.argmax(self.satisfaction / (self.visits + 1e-5))  # Exploit

    def update(self, restaurant, score):
        self.visits[restaurant] += 1
        self.satisfaction[restaurant] += score

n_restaurants = 3
epsilon = 0.1
n_days = 120

true_avg_satisfaction = np.array([8, 6, 9])
true_stddev_satisfaction = np.array([1, 2, 1.5])

total_satisfaction_arr = []
for i in range(50):  # Run the simulation 50 times
    epsilon_greedy_restaurant = EpsilonGreedy(n_restaurants, epsilon)
    total_satisfaction = 0

    for _ in range(n_days):
        restaurant = epsilon_greedy_restaurant.choose_restaurant()
        score = np.random.normal(loc=true_avg_satisfaction[restaurant], scale=true_stddev_satisfaction[restaurant])
        epsilon_greedy_restaurant.update(restaurant, score)
        total_satisfaction += score

    print("Total Satisfaction (Epsilon-Greedy):", total_satisfaction)
    total_satisfaction_arr.append(total_satisfaction)

# Calculate average satisfaction
np.mean(total_satisfaction_arr) / n_days, np.std(total_satisfaction_arr) / n_days

Total Satisfaction (Epsilon-Greedy): 1012.7696899637901
Total Satisfaction (Epsilon-Greedy): 1016.3702814274035
Total Satisfaction (Epsilon-Greedy): 1051.189279560986
Total Satisfaction (Epsilon-Greedy): 1016.7121199894984
Total Satisfaction (Epsilon-Greedy): 964.4418855428883
Total Satisfaction (Epsilon-Greedy): 1076.5909733970334
Total Satisfaction (Epsilon-Greedy): 1082.5146453146888
Total Satisfaction (Epsilon-Greedy): 1009.1521712164861
Total Satisfaction (Epsilon-Greedy): 1049.0072720824999
Total Satisfaction (Epsilon-Greedy): 1046.9845206426348
Total Satisfaction (Epsilon-Greedy): 1001.9924138888967
Total Satisfaction (Epsilon-Greedy): 1080.8838033416232
Total Satisfaction (Epsilon-Greedy): 947.233817729299
Total Satisfaction (Epsilon-Greedy): 1024.7795448552092
Total Satisfaction (Epsilon-Greedy): 1056.5944841957887
Total Satisfaction (Epsilon-Greedy): 979.9630565289017
Total Satisfaction (Epsilon-Greedy): 993.5580333891783
Total Satisfaction (Epsilon-Greedy): 1059.002940913439

(8.54651309885245, 0.3557040039071172)

In [2]:
import numpy as np

class UCB:
    def __init__(self, n_restaurants):
        self.n_restaurants = n_restaurants
        self.visits = np.zeros(n_restaurants)
        self.satisfaction = np.zeros(n_restaurants)
        self.total_trials = 0

    def choose_restaurant(self):
        if self.total_trials < self.n_restaurants:
            return self.total_trials  # First, visit each restaurant at least once

        ucb_values = np.zeros(self.n_restaurants)
        for restaurant in range(self.n_restaurants):
            avg_score = self.satisfaction[restaurant] / (self.visits[restaurant] + 1e-5)
            confidence_bound = np.sqrt(2 * np.log(self.total_trials + 1) / (self.visits[restaurant] + 1e-5))
            ucb_values[restaurant] = avg_score + confidence_bound

        return np.argmax(ucb_values)

    def update(self, restaurant, score):
        self.visits[restaurant] += 1
        self.satisfaction[restaurant] += score
        self.total_trials += 1

n_restaurants = 3
n_days = 120

true_avg_satisfaction = np.array([8, 6, 9])
true_stddev_satisfaction = np.array([1, 2, 1.5])

total_satisfaction_arr = []
for i in range(50):  # Run the simulation 50 times
    ucb_restaurant = UCB(n_restaurants)
    total_satisfaction = 0

    for _ in range(n_days):
        restaurant = ucb_restaurant.choose_restaurant()
        score = np.random.normal(loc=true_avg_satisfaction[restaurant], scale=true_stddev_satisfaction[restaurant])
        ucb_restaurant.update(restaurant, score)
        total_satisfaction += score

    print("Total Satisfaction (UCB):", total_satisfaction)
    total_satisfaction_arr.append(total_satisfaction)

# Calculate average satisfaction
np.mean(total_satisfaction_arr) / n_days, np.std(total_satisfaction_arr) / n_days

Total Satisfaction (UCB): 1069.8373828316141
Total Satisfaction (UCB): 1048.1151443608094
Total Satisfaction (UCB): 1032.8295897447383
Total Satisfaction (UCB): 1068.2554158643113
Total Satisfaction (UCB): 1068.6768946866805
Total Satisfaction (UCB): 1058.8944307274476
Total Satisfaction (UCB): 1056.8468642597259
Total Satisfaction (UCB): 1086.056020817943
Total Satisfaction (UCB): 1077.7416327166643
Total Satisfaction (UCB): 1046.6865651024461
Total Satisfaction (UCB): 1075.3571595715218
Total Satisfaction (UCB): 1063.7791970040853
Total Satisfaction (UCB): 1068.857798327497
Total Satisfaction (UCB): 1081.8321924172405
Total Satisfaction (UCB): 1068.9791285261324
Total Satisfaction (UCB): 1049.416645886939
Total Satisfaction (UCB): 1074.6430903508565
Total Satisfaction (UCB): 1085.1024312214724
Total Satisfaction (UCB): 1076.9972928072698
Total Satisfaction (UCB): 1067.697402649218
Total Satisfaction (UCB): 1055.871211003861
Total Satisfaction (UCB): 1046.280574509486
Total Satisfacti

(8.876820452585429, 0.1721559508072293)

In [3]:
import numpy as np

class ThompsonSampling:
    def __init__(self, n_restaurants):
        self.n_restaurants = n_restaurants
        self.visits = np.zeros(n_restaurants)
        self.satisfaction = np.zeros(n_restaurants)
        self.alpha = np.ones(n_restaurants)  # Beta distribution parameters
        self.beta = np.ones(n_restaurants)

    def choose_restaurant(self):
        sampled_values = np.random.beta(self.alpha, self.beta)
        return np.argmax(sampled_values)

    def update(self, restaurant, score):
        self.visits[restaurant] += 1
        self.satisfaction[restaurant] += score
        # Update the beta distribution based on the satisfaction score
        if score > np.mean(self.satisfaction / (self.visits + 1e-5)):
            self.alpha[restaurant] += 1  # success
        else:
            self.beta[restaurant] += 1  # failure

n_restaurants = 3
n_days = 120

true_avg_satisfaction = np.array([8, 6, 9])
true_stddev_satisfaction = np.array([1, 2, 1.5])

total_satisfaction_arr = []
for i in range(50):  # Run the simulation 50 times
    thompson_sampling_restaurant = ThompsonSampling(n_restaurants)
    total_satisfaction = 0

    for _ in range(n_days):
        restaurant = thompson_sampling_restaurant.choose_restaurant()
        score = np.random.normal(loc=true_avg_satisfaction[restaurant], scale=true_stddev_satisfaction[restaurant])
        thompson_sampling_restaurant.update(restaurant, score)
        total_satisfaction += score

    print("Total Satisfaction (Thompson Sampling):", total_satisfaction)
    total_satisfaction_arr.append(total_satisfaction)

# Calculate average satisfaction
np.mean(total_satisfaction_arr) / n_days, np.std(total_satisfaction_arr) / n_days

Total Satisfaction (Thompson Sampling): 1024.3225121242926
Total Satisfaction (Thompson Sampling): 1072.9072283949095
Total Satisfaction (Thompson Sampling): 1064.0822541040473
Total Satisfaction (Thompson Sampling): 1056.5705609117665
Total Satisfaction (Thompson Sampling): 1073.4409688865824
Total Satisfaction (Thompson Sampling): 1034.3491037788037
Total Satisfaction (Thompson Sampling): 1039.4856300545653
Total Satisfaction (Thompson Sampling): 1044.9703878020102
Total Satisfaction (Thompson Sampling): 1004.19534845405
Total Satisfaction (Thompson Sampling): 1069.7141904855023
Total Satisfaction (Thompson Sampling): 1046.3007821060528
Total Satisfaction (Thompson Sampling): 976.4279335823903
Total Satisfaction (Thompson Sampling): 1031.9127797616666
Total Satisfaction (Thompson Sampling): 958.0711766688589
Total Satisfaction (Thompson Sampling): 1061.1139787208724
Total Satisfaction (Thompson Sampling): 1047.2318493429475
Total Satisfaction (Thompson Sampling): 1050.0739064108318
T

(8.624916080063525, 0.26181585558360987)