In [2]:
import numpy as np
import random

class WindyGridworld:
    def __init__(self, width=10, height=7, wind=[0, 0, 0, 1, 1, 1, 2, 2, 1, 0]):
        self.width = width
        self.height = height
        self.wind = wind
        self.start = (0, 3)
        self.goal = (7, 3)
        self.actions = [(0, -1), (0, 1), (-1, 0), (1, 0)]  # Left, Right, Up, Down

    def step(self, state, action):
        x, y = state
        dx, dy = self.actions[action]

        # Apply wind effect
        y += self.wind[x]

        # Apply action movement
        x = max(0, min(self.width - 1, x + dx))
        y = max(0, min(self.height - 1, y + dy))

        new_state = (x, y)
        reward = -1  # Every move gets a reward of -1
        done = new_state == self.goal
        return new_state, reward, done

class Agent:
    def __init__(self, env, alpha=0.1, gamma=1.0, epsilon=0.1, n=1):
        self.env = env
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.n = n
        self.q_values = np.zeros((env.width, env.height, len(env.actions)))
        self.policy = np.full((env.width, env.height), 1)

    def choose_action(self, state):
        if random.random() < self.epsilon:
            return random.randint(0, len(self.env.actions) - 1)  # Explore
        else:
            x, y = state
            return np.argmax(self.q_values[x, y])  # Exploit

    def update(self, state, action, reward, next_state):
        x, y = state
        nx, ny = next_state
        best_next_action = np.argmax(self.q_values[nx, ny])
        self.q_values[x, y, action] += self.alpha * (
            reward + self.gamma * self.q_values[nx, ny, best_next_action] - self.q_values[x, y, action]
        )

    def train(self, episodes=500):
        for episode in range(episodes):
            state = self.env.start
            done = False
            while not done:
                action = self.choose_action(state)
                next_state, reward, done = self.env.step(state, action)
                self.update(state, action, reward, next_state)
                state = next_state

    def get_optimal_policy(self):
        optimal_policy = np.zeros((self.env.width, self.env.height), dtype=int)
        for x in range(self.env.width):
            for y in range(self.env.height):
                optimal_policy[x, y] = np.argmax(self.q_values[x, y])
        return optimal_policy

# Initialize environment and agent
env = WindyGridworld()
agent = Agent(env)
agent.train()

# Display the optimal policy
optimal_policy = agent.get_optimal_policy()
print("Optimal Policy:")
print(optimal_policy)

import numpy as np
import scipy.stats
from sklearn.model_selection import train_test_split, ShuffleSplit
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_digits

# Load dataset (you can replace this with another dataset)
digits = load_digits()
X, y = digits.data, digits.target

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create subsets using ShuffleSplit
n_subsets = 1000
subset_size = 100
ss = ShuffleSplit(n_splits=n_subsets, train_size=subset_size, random_state=42)

# Train 1,000 Decision Trees
tree_predictions = np.zeros((n_subsets, len(y_test)))

for i, (subset_idx, _) in enumerate(ss.split(X_train)):
    X_subset, y_subset = X_train[subset_idx], y_train[subset_idx]

    # Train a Decision Tree with optimal hyperparameters
    tree = DecisionTreeClassifier(max_depth=10, random_state=i)
    tree.fit(X_subset, y_subset)

    # Store test set predictions
    tree_predictions[i] = tree.predict(X_test)

# Majority voting across the 1,000 trees
final_predictions = scipy.stats.mode(tree_predictions, axis=0).mode.flatten()

# Evaluate accuracy
ensemble_accuracy = accuracy_score(y_test, final_predictions)
print(f"Ensemble Model Accuracy: {ensemble_accuracy:.4f}")



Optimal Policy:
[[0 3 3 3 0 1 1]
 [2 1 3 0 2 2 2]
 [3 0 2 0 1 1 0]
 [3 1 0 3 3 3 3]
 [0 3 3 3 3 3 3]
 [0 0 3 3 3 3 3]
 [0 0 0 3 0 3 3]
 [0 3 0 0 2 3 3]
 [3 0 2 0 0 1 3]
 [2 1 2 0 0 0 0]]
Ensemble Model Accuracy: 0.9333


In [3]:
import numpy as np
import scipy.stats
from sklearn.model_selection import train_test_split, ShuffleSplit
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_digits

# Load dataset (you can replace this with another dataset)
digits = load_digits()
X, y = digits.data, digits.target

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create subsets using ShuffleSplit
n_subsets = 1000
subset_size = 100
ss = ShuffleSplit(n_splits=n_subsets, train_size=subset_size, random_state=42)

# Train 1,000 Decision Trees
tree_predictions = np.zeros((n_subsets, len(y_test)))

for i, (subset_idx, _) in enumerate(ss.split(X_train)):
    X_subset, y_subset = X_train[subset_idx], y_train[subset_idx]

    # Train a Decision Tree with optimal hyperparameters
    tree = DecisionTreeClassifier(max_depth=10, random_state=i)
    tree.fit(X_subset, y_subset)

    # Store test set predictions
    tree_predictions[i] = tree.predict(X_test)

# Majority voting across the 1,000 trees
final_predictions = scipy.stats.mode(tree_predictions, axis=0).mode.flatten()

# Evaluate accuracy
ensemble_accuracy = accuracy_score(y_test, final_predictions)
print(f"Ensemble Model Accuracy: {ensemble_accuracy:.4f}")



Ensemble Model Accuracy: 0.9333
