In [7]:
pip install gym numpy tensorflow torch stable-baselines3

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import gymnasium as gym

# Create Environment
env = gym.make("FrozenLake-v1", is_slippery=False)
state_size = env.observation_space.n
action_size = env.action_space.n

Q_table = np.zeros((state_size, action_size))

alpha = 0.1  # Learning rate
gamma = 0.99  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_decay = 0.995
epsilon_min = 0.01
episodes = 2000  # Number of games

import warnings
warnings.simplefilter(action='ignore', category=DeprecationWarning)


for episode in range(episodes):
    state = env.reset()[0]
    done = False

    while not done:
        # Choose action using epsilon-greedy strategy
        if np.random.rand() < epsilon:
            action = np.random.choice(action_size)  # Exploration
        else:
            action = np.argmax(Q_table[state])  # Exploitation

        # Take action and observe reward
        next_state, reward, done, _, _ = env.step(action)

        # Update Q-table using Bellman equation
        Q_table[state, action] = Q_table[state, action] + alpha * (
            reward + gamma * np.max(Q_table[next_state]) - Q_table[state, action]
        )

        state = next_state

    # Decay epsilon to reduce exploration over time
    epsilon = max(epsilon * epsilon_decay, epsilon_min)

print("Training complete!")


Training complete!


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np
import gymnasium as gym
from collections import deque

import warnings
warnings.simplefilter(action='ignore', category=DeprecationWarning)


env = gym.make("CartPole-v1")
state_size = env.observation_space.shape[0]
action_size = env.action_space.n

class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 24)
        self.fc2 = nn.Linear(24, 24)
        self.fc3 = nn.Linear(24, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)


model = DQN(state_size, action_size)
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_function = nn.MSELoss()

memory = deque(maxlen=2000)
gamma = 0.95  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_min = 0.01
epsilon_decay = 0.995
batch_size = 64
episodes = 1000


def remember(state, action, reward, next_state, done):
    memory.append((state, action, reward, next_state, done))

def replay():
    if len(memory) < batch_size:
        return

    minibatch = random.sample(memory, batch_size)
    for state, action, reward, next_state, done in minibatch:
        target = reward
        if not done:
            target += gamma * torch.max(model(torch.FloatTensor(next_state))).item()
        
        output = model(torch.FloatTensor(state))[action]
        
        # Ensure target has the same shape as output
        loss = loss_function(output.view(-1), torch.tensor([target], dtype=torch.float32))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


for episode in range(episodes):
    state = env.reset()[0]
    done = False
    total_reward = 0

    while not done:
        if np.random.rand() < epsilon:
            action = random.randrange(action_size)  # Explore
        else:
            action = torch.argmax(model(torch.FloatTensor(state))).item()  # Exploit

        next_state, reward, done, _, _ = env.step(action)
        remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward

    replay()
    epsilon = max(epsilon * epsilon_decay, epsilon_min)

    print(f"Episode {episode+1}, Total Reward: {total_reward}")

print("Training complete!")




Episode 1, Total Reward: 18.0
Episode 2, Total Reward: 23.0
Episode 3, Total Reward: 20.0
Episode 4, Total Reward: 21.0
Episode 5, Total Reward: 27.0
Episode 6, Total Reward: 31.0
Episode 7, Total Reward: 15.0
Episode 8, Total Reward: 20.0
Episode 9, Total Reward: 15.0
Episode 10, Total Reward: 15.0
Episode 11, Total Reward: 36.0
Episode 12, Total Reward: 10.0
Episode 13, Total Reward: 13.0
Episode 14, Total Reward: 15.0
Episode 15, Total Reward: 22.0
Episode 16, Total Reward: 21.0
Episode 17, Total Reward: 35.0
Episode 18, Total Reward: 21.0
Episode 19, Total Reward: 11.0
Episode 20, Total Reward: 13.0
Episode 21, Total Reward: 35.0
Episode 22, Total Reward: 19.0
Episode 23, Total Reward: 21.0
Episode 24, Total Reward: 20.0
Episode 25, Total Reward: 37.0
Episode 26, Total Reward: 37.0
Episode 27, Total Reward: 24.0
Episode 28, Total Reward: 12.0
Episode 29, Total Reward: 18.0
Episode 30, Total Reward: 45.0
Episode 31, Total Reward: 16.0
Episode 32, Total Reward: 15.0
Episode 33, Total