Multi-Armed Bandit Problem with UCB Algorithm

Pranav Rao 2211cs020420


In [1]:
import math
import random

class MultiArmedBandit:
    def __init__(self, n_arms):
        self.n_arms = n_arms
        self.counts = [0] * n_arms  # Number of times each arm is pulled
        self.values = [0.0] * n_arms  # Estimated rewards for each arm

    def select_arm(self):
        total_counts = sum(self.counts)
        if total_counts < self.n_arms:
            return total_counts  # Explore untried arms first
        ucb_values = [
            self.values[i] + math.sqrt(2 * math.log(total_counts) / self.counts[i])
            for i in range(self.n_arms)
        ]
        return ucb_values.index(max(ucb_values))  # Select the arm with max UCB

    def update(self, arm, reward):
        self.counts[arm] += 1
        n = self.counts[arm]
        value = self.values[arm]
        self.values[arm] = ((n - 1) / n) * value + (1 / n) * reward

# Game Simulation
def simulate_game(bandit, reward_probabilities, n_rounds):
    total_reward = 0
    for _ in range(n_rounds):
        arm = bandit.select_arm()
        reward = 1 if random.random() < reward_probabilities[arm] else 0
        bandit.update(arm, reward)
        total_reward += reward
    return total_reward

# Example usage
n_arms = 3
reward_probabilities = [0.2, 0.5, 0.8]  # Hidden probabilities of rewards for each arm
bandit = MultiArmedBandit(n_arms)
total_reward = simulate_game(bandit, reward_probabilities, 1000)
print(f"Total reward after 1000 rounds: {total_reward}")


Total reward after 1000 rounds: 733


 UCB for IoT-Based Smart Home Energy Optimization

In [2]:
import random
import math

class SmartHomeOptimizer:
    def __init__(self, n_devices):
        self.n_devices = n_devices
        self.counts = [0] * n_devices
        self.efficiency = [0.0] * n_devices

    def select_device(self):
        total_counts = sum(self.counts)
        if total_counts < self.n_devices:
            return total_counts  # Explore untried devices
        ucb_values = [
            self.efficiency[i] + math.sqrt(2 * math.log(total_counts) / self.counts[i])
            for i in range(self.n_devices)
        ]
        return ucb_values.index(max(ucb_values))

    def update(self, device, efficiency):
        self.counts[device] += 1
        n = self.counts[device]
        value = self.efficiency[device]
        self.efficiency[device] = ((n - 1) / n) * value + (1 / n) * efficiency

# Simulate real-time energy usage
def simulate_energy_optimizer(optimizer, energy_efficiency, n_rounds):
    total_efficiency = 0
    for _ in range(n_rounds):
        device = optimizer.select_device()
        efficiency = random.uniform(0, energy_efficiency[device])  # Simulate efficiency
        optimizer.update(device, efficiency)
        total_efficiency += efficiency
    return total_efficiency

# Example usage
n_devices = 3
energy_efficiency = [0.6, 0.8, 0.9]  # Max efficiency levels for devices
optimizer = SmartHomeOptimizer(n_devices)
total_efficiency = simulate_energy_optimizer(optimizer, energy_efficiency, 1000)
print(f"Total energy efficiency after 1000 rounds: {total_efficiency}")


Total energy efficiency after 1000 rounds: 395.82163348576444


Chess-like Game with PAC Algorithm

In [3]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Simplified chess board states and optimal moves
# Example: [state, optimal_move]
data = [
    [[1, 0, 0, 0], 0],  # Move left
    [[0, 1, 0, 0], 1],  # Move right
    [[0, 0, 1, 0], 2],  # Move forward
    [[0, 0, 0, 1], 3]   # Move backward
]

# Prepare dataset
X = [d[0] for d in data]  # Board states
y = [d[1] for d in data]  # Optimal moves
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train PAC model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

# Predict next move
new_state = [0, 1, 0, 0]  # Example new board state
predicted_move = model.predict([new_state])[0]
print(f"Predicted move for the new state: {predicted_move}")


Accuracy: 0.0
Predicted move for the new state: 3
