<a href="https://colab.research.google.com/github/Lupoyo/projectNew/blob/main/Deep_Convolutional_Q_Learning_for_Pac_Man_Complete_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Convolutional Q-Learning for Pac-Man

## Part 0 - Installing the required packages and importing the libraries

### Installing Gymnasium

In [None]:
!pip install gymnasium
!pip install "gymnasium[atari, accept-rom-license]"
!apt-get install -y swig
!pip install gymnasium[box2d]
!pip install gym[atari]
!pip install --upgrade gym

Collecting ale-py>=0.9 (from gymnasium[accept-rom-license,atari])
  Using cached ale_py-0.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.6 kB)
Using cached ale_py-0.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)
Installing collected packages: ale-py
  Attempting uninstall: ale-py
    Found existing installation: ale-py 0.7.5
    Uninstalling ale-py-0.7.5:
      Successfully uninstalled ale-py-0.7.5
Successfully installed ale-py-0.10.1


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
swig is already the newest version (4.0.2-1ubuntu1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.
Collecting ale-py~=0.8.0 (from gym[atari])
  Downloading ale_py-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Downloading ale_py-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ale-py
  Attempting uninstall: ale-py
    Found existing installation: ale-py 0.10.1
    Uninstalling ale-py-0.10.1:
      Successfully uninstalled ale-py-0.10.1
Successfully installed ale-py-0.8.1




### Importing the libraries

In [None]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import deque
from torch.utils.data import DataLoader, TensorDataset

## Part 1 - Building the AI

### Creating the architecture of the Neural Network

In [None]:
class Network(nn.Module):

  def __init__(self, action_size, seed = 42):
    super(Network, self).__init__()
    self.seed = torch.manual_seed(seed)
    self.conv1 = nn.Conv2d(3, 32, kernel_size = 8, stride = 4)
    self.bn1 = nn.BatchNorm2d(32)
    self.conv2 = nn.Conv2d(32, 64, kernel_size = 4, stride = 2)
    self.bn2 = nn.BatchNorm2d(64)
    self.conv3 = nn.Conv2d(64, 64, kernel_size = 3, stride = 1)
    self.bn3 = nn.BatchNorm2d(64)
    self.conv4 = nn.Conv2d(64, 128, kernel_size = 3, stride = 1)
    self.bn4 = nn.BatchNorm2d(128)
    self.fc1 = nn.Linear(10 * 10 * 128, 512)
    self.fc2 = nn.Linear(512, 256)
    self.fc3 = nn.Linear(256, action_size)

  def forward(self, state):
    x = F.relu(self.bn1(self.conv1(state)))
    x = F.relu(self.bn2(self.conv2(x)))
    x = F.relu(self.bn3(self.conv3(x)))
    x = F.relu(self.bn4(self.conv4(x)))
    x = x.view(x.size(0), -1)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    return self.fc3(x)

## Part 2 - Training the AI

### Setting up the environment

In [None]:

!pip install atari-py  # For Atari environments

Collecting atari-py
  Downloading atari-py-0.2.9.tar.gz (540 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/540.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m532.5/540.6 kB[0m [31m17.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m540.6/540.6 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: atari-py
  Building wheel for atari-py (setup.py) ... [?25l[?25hdone
  Created wheel for atari-py: filename=atari_py-0.2.9-cp310-cp310-linux_x86_64.whl size=2872390 sha256=be8b45c14b59368903b65eb7ab1341aca8f0da53954038337799989258f5a714
  Stored in directory: /root/.cache/pip/wheels/75/6f/04/1f3bf5255580101e16ff487564354dddcdd23ec3b43b775b7a
Successfully built atari-py
Installing collected packages: atari-py
Successfully installed atari-py-0.2.9


In [None]:
!pip install --upgrade gym

Collecting gym
  Downloading gym-0.26.2.tar.gz (721 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/721.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m716.8/721.7 kB[0m [31m22.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.7/721.7 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: gym
  Building wheel for gym (pyproject.toml) ... [?25l[?25hdone
  Created wheel for gym: filename=gym-0.26.2-py3-none-any.whl size=827695 sha256=7ed5ae35716fb26089811b25e5540e6d2a6a0ad9e9418436e3606b0f6200a686
  Stored in directory: /root/.cache/pip/wheels/b9/22/6d/3e7b32d98451b4cd9d12417052affbeeeea012955d437da1da
Successfully built gym
Installing col

In [None]:
import gymnasium as gym

# List all available environments in gymnasium
available_envs = list(gym.envs.registry.keys())
print(available_envs)


['CartPole-v0', 'CartPole-v1', 'MountainCar-v0', 'MountainCarContinuous-v0', 'Pendulum-v1', 'Acrobot-v1', 'phys2d/CartPole-v0', 'phys2d/CartPole-v1', 'phys2d/Pendulum-v0', 'LunarLander-v3', 'LunarLanderContinuous-v3', 'BipedalWalker-v3', 'BipedalWalkerHardcore-v3', 'CarRacing-v3', 'Blackjack-v1', 'FrozenLake-v1', 'FrozenLake8x8-v1', 'CliffWalking-v0', 'Taxi-v3', 'tabular/Blackjack-v0', 'tabular/CliffWalking-v0', 'Reacher-v2', 'Reacher-v4', 'Reacher-v5', 'Pusher-v2', 'Pusher-v4', 'Pusher-v5', 'InvertedPendulum-v2', 'InvertedPendulum-v4', 'InvertedPendulum-v5', 'InvertedDoublePendulum-v2', 'InvertedDoublePendulum-v4', 'InvertedDoublePendulum-v5', 'HalfCheetah-v2', 'HalfCheetah-v3', 'HalfCheetah-v4', 'HalfCheetah-v5', 'Hopper-v2', 'Hopper-v3', 'Hopper-v4', 'Hopper-v5', 'Swimmer-v2', 'Swimmer-v3', 'Swimmer-v4', 'Swimmer-v5', 'Walker2d-v2', 'Walker2d-v3', 'Walker2d-v4', 'Walker2d-v5', 'Ant-v2', 'Ant-v3', 'Ant-v4', 'Ant-v5', 'Humanoid-v2', 'Humanoid-v3', 'Humanoid-v4', 'Humanoid-v5', 'Humano

In [None]:
import gymnasium as gym

# Try creating the MsPacman environment
env = gym.make('MsPacman-v0')
state_shape = env.observation_space.shape
state_size = state_shape[0] if len(state_shape) > 0 else 1
number_actions = env.action_space.n

print('State shape: ', state_shape)
print('State size: ', state_size)
print('Number of actions: ', number_actions)

NameNotFound: Environment `MsPacman` doesn't exist.

In [None]:
pip install --upgrade gymnasium



### Initializing the hyperparameters

In [None]:
learning_rate = 5e-4
minibatch_size = 64
discount_factor = 0.99

### Preprocessing the frames

In [None]:
from PIL import Image
from torchvision import transforms

def preprocess_frame(frame):
  frame = Image.fromarray(frame)
  preprocess = transforms.Compose([transforms.Resize((128, 128)), transforms.ToTensor()])
  return preprocess(frame).unsqueeze(0)

### Implementing the DCQN class

In [None]:
class Agent():

  def __init__(self, action_size):
    self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    self.action_size = action_size
    self.local_qnetwork = Network(action_size).to(self.device)
    self.target_qnetwork = Network(action_size).to(self.device)
    self.optimizer = optim.Adam(self.local_qnetwork.parameters(), lr = learning_rate)
    self.memory = deque(maxlen = 10000)

  def step(self, state, action, reward, next_state, done):
    state = preprocess_frame(state)
    next_state = preprocess_frame(next_state)
    self.memory.append((state, action, reward, next_state, done))
    if len(self.memory) > minibatch_size:
      experiences = random.sample(self.memory, k = minibatch_size)
      self.learn(experiences, discount_factor)

  def act(self, state, epsilon = 0.):
    state = preprocess_frame(state).to(self.device)
    self.local_qnetwork.eval()
    with torch.no_grad():
      action_values = self.local_qnetwork(state)
    self.local_qnetwork.train()
    if random.random() > epsilon:
      return np.argmax(action_values.cpu().data.numpy())
    else:
      return random.choice(np.arange(self.action_size))

  def learn(self, experiences, discount_factor):
    states, actions, rewards, next_states, dones = zip(*experiences)
    states = torch.from_numpy(np.vstack(states)).float().to(self.device)
    actions = torch.from_numpy(np.vstack(actions)).long().to(self.device)
    rewards = torch.from_numpy(np.vstack(rewards)).float().to(self.device)
    next_states = torch.from_numpy(np.vstack(next_states)).float().to(self.device)
    dones = torch.from_numpy(np.vstack(dones).astype(np.uint8)).float().to(self.device)
    next_q_targets = self.target_qnetwork(next_states).detach().max(1)[0].unsqueeze(1)
    q_targets = rewards + discount_factor * next_q_targets * (1 - dones)
    q_expected = self.local_qnetwork(states).gather(1, actions)
    loss = F.mse_loss(q_expected, q_targets)
    self.optimizer.zero_grad()
    loss.backward()
    self.optimizer.step()

### Initializing the DCQN agent

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np

# Define a simple neural network for Q-value approximation
class QNetwork(nn.Module):
    def __init__(self, state_size, action_size):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(state_size, 64)  # Hidden layer with 64 units
        self.fc2 = nn.Linear(64, 64)  # Another hidden layer
        self.fc3 = nn.Linear(64, action_size)  # Output layer (Q-values for each action)

    def forward(self, state):
        x = torch.relu(self.fc1(state))  # ReLU activation for the hidden layers
        x = torch.relu(self.fc2(x))
        return self.fc3(x)  # Return the Q-values for all actions

# Define the Agent class
class Agent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.qnetwork = QNetwork(state_size, action_size)  # Initialize the Q-network
        self.optimizer = optim.Adam(self.qnetwork.parameters(), lr=0.001)  # Optimizer for training
        self.loss_fn = nn.MSELoss()  # Loss function (Mean Squared Error)

    # Act method for choosing actions based on epsilon-greedy strategy
    def act(self, state, epsilon):
        state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0)  # Convert state to tensor and add batch dimension

        if random.random() < epsilon:
            return random.choice(range(self.action_size))  # Exploration: random action
        else:
            # Exploitation: select the action with the highest Q-value from the Q-network
            with torch.no_grad():
                q_values = self.qnetwork(state_tensor)
                return np.argmax(q_values.cpu().numpy())  # Convert tensor to numpy and get the action with max Q-value

    def step(self, state, action, reward, next_state, done):
        # Learning method (can include Q-learning or DQN updates)
        pass

# Create the environment (replace with your environment)
import gym
env = gym.make('CartPole-v1')

# Initialize agent with state and action sizes
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = Agent(state_size, action_size)

# Training loop
number_episodes = 2000
maximum_number_timesteps_per_episode = 10000
epsilon_starting_value = 1.0
epsilon_ending_value = 0.01
epsilon_decay_value = 0.995
epsilon = epsilon_starting_value
scores_on_100_episodes = deque(maxlen=100)

for episode in range(1, number_episodes + 1):
    state, _ = env.reset()  # Reset the environment and get the initial state
    score = 0

    # Iterate through timesteps for the current episode
    for t in range(maximum_number_timesteps_per_episode):
        action = agent.act(state, epsilon)  # Agent chooses an action
        next_state, reward, done, _, _ = env.step(action)  # Perform the action in the environment
        agent.step(state, action, reward, next_state, done)  # Agent learns from the transition
        state = next_state  # Update the state
        score += reward  # Accumulate the score

        if done:  # If the episode is done, break the loop
            break

    # Track scores over the last 100 episodes
    scores_on_100_episodes.append(score)

    # Decay epsilon (epsilon-greedy exploration strategy)
    epsilon = max(epsilon_ending_value, epsilon_decay_value * epsilon)

    # Print average score for the last 100 episodes
    print('\rEpisode {}\tAverage Score: {:.2f}'.format(episode, np.mean(scores_on_100_episodes)), end="")

    # Every 100 episodes, print the average score
    if episode % 100 == 0:
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(episode, np.mean(scores_on_100_episodes)))

    # Check if the environment is solved (average score exceeds 500 over the last 100 episodes)
    if np.mean(scores_on_100_episodes) >= 500.0:
        print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(episode - 100, np.mean(scores_on_100_episodes)))
        torch.save(agent.qnetwork.state_dict(), 'checkpoint.pth')  # Save model weights
        break


Episode 100	Average Score: 19.75
Episode 200	Average Score: 13.24
Episode 300	Average Score: 11.24
Episode 400	Average Score: 10.42
Episode 500	Average Score: 9.89
Episode 600	Average Score: 9.81
Episode 700	Average Score: 9.54
Episode 800	Average Score: 9.45
Episode 900	Average Score: 9.38
Episode 1000	Average Score: 9.32
Episode 1100	Average Score: 9.29
Episode 1200	Average Score: 9.38
Episode 1300	Average Score: 9.49
Episode 1400	Average Score: 9.45
Episode 1500	Average Score: 9.40
Episode 1600	Average Score: 9.41
Episode 1700	Average Score: 9.42
Episode 1800	Average Score: 9.33
Episode 1900	Average Score: 9.45
Episode 2000	Average Score: 9.52


In [None]:
import gym

env = gym.make('MsPacmanDeterministic-v0')  # Create the MsPacman environment

Error: We're Unable to find the game "MsPacman". Note: Gym no longer distributes ROMs. If you own a license to use the necessary ROMs for research purposes you can download them via `pip install gym[accept-rom-license]`. Otherwise, you should try importing "MsPacman" via the command `ale-import-roms`. If you believe this is a mistake perhaps your copy of "MsPacman" is unsupported. To check if this is the case try providing the environment variable `PYTHONWARNINGS=default::ImportWarning:ale_py.roms`. For more information see: https://github.com/mgbellemare/Arcade-Learning-Environment#rom-management

### Training the DCQN agent

## Part 3 - Visualizing the results

In [None]:
import gym
import glob
import io
import base64
import imageio
from IPython.display import HTML, display

def show_video_of_model(agent, env_name):
    env = gym.make(env_name, render_mode='rgb_array')  # Ensure ROMs are installed or specified
    state, _ = env.reset()
    done = False
    frames = []

    while not done:
        frame = env.render()
        frames.append(frame)
        action = agent.act(state)
        state, reward, done, _, _ = env.step(action)

    env.close()
    imageio.mimsave('video.mp4', frames, fps=30)

show_video_of_model(agent, 'MsPacmanDeterministic-v0')

def show_video():
    mp4list = glob.glob('*.mp4')
    if len(mp4list) > 0:
        mp4 = mp4list[0]
        video = io.open(mp4, 'r+b').read()
        encoded = base64.b64encode(video)
        display(HTML(data=f'''<video alt="test" autoplay
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{encoded.decode('ascii')}" type="video/mp4" />
             </video>'''))
    else:
        print("Could not find video")

show_video()

Error: We're Unable to find the game "MsPacman". Note: Gym no longer distributes ROMs. If you own a license to use the necessary ROMs for research purposes you can download them via `pip install gym[accept-rom-license]`. Otherwise, you should try importing "MsPacman" via the command `ale-import-roms`. If you believe this is a mistake perhaps your copy of "MsPacman" is unsupported. To check if this is the case try providing the environment variable `PYTHONWARNINGS=default::ImportWarning:ale_py.roms`. For more information see: https://github.com/mgbellemare/Arcade-Learning-Environment#rom-management