In [1]:
import gym

env = gym.make('CartPole-v1')


In [2]:
import numpy as np

class QLearningAgent:
    def __init__(self, state_space, action_space, learning_rate=0.1, discount_factor=0.99, exploration_rate=1.0, exploration_decay=0.995):
        self.state_space = state_space
        self.action_space = action_space
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_decay = exploration_decay
        self.q_table = np.zeros(state_space + [action_space])

    def choose_action(self, state):
        if np.random.rand() < self.exploration_rate:
            return np.random.choice(self.action_space)
        return np.argmax(self.q_table[state])

    def update_q_table(self, state, action, reward, next_state):
        best_next_action = np.argmax(self.q_table[next_state])
        td_target = reward + self.discount_factor * self.q_table[next_state][best_next_action]
        td_error = td_target - self.q_table[state][action]
        self.q_table[state][action] += self.learning_rate * td_error

    def decay_exploration(self):
        self.exploration_rate *= self.exploration_decay


In [3]:
num_episodes = 1000
max_steps_per_episode = 100

agent = QLearningAgent(state_space=[env.observation_space.n], action_space=env.action_space.n)

for episode in range(num_episodes):
    state = env.reset()
    state = tuple(state)
    total_reward = 0

    for step in range(max_steps_per_episode):
        action = agent.choose_action(state)
        next_state, reward, done, _ = env.step(action)
        next_state = tuple(next_state)
        agent.update_q_table(state, action, reward, next_state)
        state = next_state
        total_reward += reward

        if done:
            break

    agent.decay_exploration()
    print(f"Episode {episode + 1}: Total Reward = {total_reward}")


AttributeError: 'Box' object has no attribute 'n'

In [None]:
total_rewards = []
success_rate = 0

for episode in range(num_episodes):
    state = env.reset()
    state = tuple(state)
    total_reward = 0

    for step in range(max_steps_per_episode):
        action = agent.choose_action(state)
        next_state, reward, done, _ = env.step(action)
        next_state = tuple(next_state)
        state = next_state
        total_reward += reward

        if done:
            if total_reward >= max_steps_per_episode:
                success_rate += 1
            break

    total_rewards.append(total_reward)

average_reward = np.mean(total_rewards)
success_rate = (success_rate / num_episodes) * 100

print(f"Average Reward: {average_reward}")
print(f"Success Rate: {success_rate}%")


In [6]:
# Introduction
# This notebook demonstrates a simple reinforcement learning example using Q-learning.

# Environment Setup
import gym
import numpy as np
import matplotlib.pyplot as plt

# Create the environment
env = gym.make('FrozenLake-v1', is_slippery=False)

# Defining the RL Environment
state_space = env.observation_space.n
action_space = env.action_space.n

# Agent Design
q_table = np.zeros((state_space, action_space))
learning_rate = 0.1
discount_factor = 0.99
epsilon = 1.0
epsilon_decay = 0.995
min_epsilon = 0.01
episodes = 1000

# Training the Agent
rewards = []
for episode in range(episodes):
    state = env.reset()
    total_reward = 0
    done = False
    while not done:
        if np.random.rand() < epsilon:
            action = env.action_space.sample()
        else:
            action = np.argmax(q_table[state])
        
        next_state, reward, done, other1, other2 = env.step(action)
        q_table[state, action] = q_table[state, action] + learning_rate * (reward + discount_factor * np.max(q_table[next_state]) - q_table[state, action])
        state = next_state
        total_reward += reward
    
    epsilon = max(min_epsilon, epsilon * epsilon_decay)
    rewards.append(total_reward)

# Evaluation
average_rewards = np.mean(rewards)
print(f'Average reward over {episodes} episodes: {average_rewards}')

# Visualization
plt.plot(rewards)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Rewards over Time')
plt.show()

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices