<a href="https://colab.research.google.com/github/Akshithaaa11/Assignments/blob/main/RL_150.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
#1]
import numpy as np

# Simulating the game environment
class GameEnvironment:
    def __init__(self, n_actions):
        self.n_actions = n_actions
        self.probabilities = np.random.rand(n_actions)  # Hidden reward probabilities

    def play(self, action):
        return 1 if np.random.rand() < self.probabilities[action] else 0


# UCB Algorithm
class UCB:
    def __init__(self, n_actions):
        self.n_actions = n_actions
        self.counts = np.zeros(n_actions)  # Number of times each action is chosen
        self.values = np.zeros(n_actions)  # Average reward for each action

    def select_action(self, t):
        if t < self.n_actions:
            return t  # Ensure each action is tried at least once
        ucb_values = self.values + np.sqrt((2 * np.log(t)) / (self.counts + 1e-5))
        return np.argmax(ucb_values)

    def update(self, action, reward):
        self.counts[action] += 1
        self.values[action] += (reward - self.values[action]) / self.counts[action]


# Simulation
def simulate_game(n_actions, n_rounds):
    game = GameEnvironment(n_actions)
    ucb = UCB(n_actions)
    total_reward = 0

    for t in range(n_rounds):
        action = ucb.select_action(t)
        reward = game.play(action)
        ucb.update(action, reward)
        total_reward += reward

    print(f"Total Reward: {total_reward}")
    print(f"Estimated Action Values: {ucb.values}")
    print(f"Action Counts: {ucb.counts}")

simulate_game(n_actions=5, n_rounds=1000)


Total Reward: 940
Estimated Action Values: [0.14285714 0.97941176 0.86896552 0.99226804 0.83185841]
Action Counts: [ 14. 340. 145. 388. 113.]


In [2]:
#2]
import numpy as np
class IoTEnvironment:
    def __init__(self, n_devices):
        self.n_devices = n_devices
        self.efficiency = np.random.rand(n_devices)  # Varying efficiency of devices

    def use_device(self, device):
        return self.efficiency[device] + np.random.normal(0, 0.1)  # Add randomness
class IoTUCB:
    def __init__(self, n_devices):
        self.n_devices = n_devices
        self.counts = np.zeros(n_devices)  # Number of times each device is chosen
        self.values = np.zeros(n_devices)  # Average efficiency of each device

    def select_device(self, t):
        if t < self.n_devices:
            return t  # Ensure each device is tried at least once
        ucb_values = self.values + np.sqrt((2 * np.log(t)) / (self.counts + 1e-5))
        return np.argmax(ucb_values)

    def update(self, device, reward):
        self.counts[device] += 1
        self.values[device] += (reward - self.values[device]) / self.counts[device]


# Simulation
def optimize_energy(n_devices, n_iterations):
    environment = IoTEnvironment(n_devices)
    ucb = IoTUCB(n_devices)

    for t in range(n_iterations):
        device = ucb.select_device(t)
        efficiency = environment.use_device(device)
        ucb.update(device, efficiency)

    print(f"Estimated Device Efficiencies: {ucb.values}")
    print(f"Device Usage Counts: {ucb.counts}")

optimize_energy(n_devices=3, n_iterations=500)


Estimated Device Efficiencies: [0.74688353 0.58691404 0.91308495]
Device Usage Counts: [ 99.  48. 353.]


In [4]:
#3]
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generating simplified board states
def generate_board_states(n_samples):
    X = []
    y = []
    for _ in range(n_samples):
        board = np.random.choice([0, 1], size=(4, 4))  # 0: empty, 1: pawn
        king_position = (np.random.randint(4), np.random.randint(4))
        board[king_position] = 2  # 2: king
        X.append(board.flatten())  # Flatten board state
        y.append(np.random.choice(["move_left", "move_right", "move_up", "move_down"]))  # Random optimal move
    return np.array(X), np.array(y)

# Generate dataset
X, y = generate_board_states(1000)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a decision tree classifier
clf = DecisionTreeClassifier(max_depth=5)
clf.fit(X_train, y_train)

# Evaluate the model
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Test on a new board state
test_board = np.random.choice([0, 1], size=(4, 4))
king_position = (np.random.randint(4), np.random.randint(4))
test_board[king_position] = 2
print("Test Board:")
print(test_board)

prediction = clf.predict([test_board.flatten()])
print(f"Predicted Move: {prediction[0]}")


Model Accuracy: 0.29
Test Board:
[[1 0 0 0]
 [1 1 0 1]
 [2 1 0 1]
 [0 0 1 0]]
Predicted Move: move_down
