In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd /content/gdrive/My Drive/EVA_MiLab_Hackathon


Mounted at /content/gdrive
/content/gdrive/My Drive/EVA_MiLab_Hackathon


In [None]:
!pip install swig
!pip install "gymnasium[box2d]==1.0.0"

Collecting swig
  Downloading swig-4.3.0-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (3.5 kB)
Downloading swig-4.3.0-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.9 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m61.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: swig
Successfully installed swig-4.3.0
Collecting box2d-py==2.3.5 (from gymnasium[box2d]==1.0.0)
  Downloading box2d-py-2.3.5.tar.gz (374 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.4/374.4 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: box2d-py
  Building wheel for box2d-py (setup.py) ... [?25l[?25hdone
  Created wheel for box2d-py: filename=box2d_py-2.3.5-cp311-cp311-linux_x86_64.whl size=2379448 s

In [None]:
from itertools import count
import matplotlib
import matplotlib.pyplot as plt
from time import gmtime, strftime
import gymnasium as gym
import numpy as np
import torch
from matplotlib import pyplot as plt



from agents.cnn_dqn import CNN_DQN_Agent


def plot_durations(episode_durations, show_result=False, save_path=None):
    plt.figure(1)
    durations_t = torch.tensor(episode_durations, dtype=torch.float)
    if show_result:
        plt.title('Result')
    else:
        plt.clf()
        plt.title('Training...')
    plt.xlabel('Episode')
    plt.ylabel('Duration')
    plt.plot(durations_t.numpy())
    # Take 100 episode averages and plot them too
    if len(durations_t) >= 100:
        means = durations_t.unfold(0, 100, 1).mean(1).view(-1)
        means = torch.cat((torch.zeros(99), means))
        plt.plot(means.numpy())

    plt.pause(0.001)  # pause a bit so that plots are updated
    if is_ipython:
        if not show_result:
            display.display(plt.gcf())
            display.clear_output(wait=True)
        else:
            display.display(plt.gcf())

    # Save the plot if a save path is provided
    if save_path:
        plt.savefig(save_path)
        print(f"Plot saved at {save_path}")


def train_agent(episodes, run_name):
    env = gym.make("CarRacing-v3", render_mode="rgb_array", lap_complete_percent=0.95, domain_randomize=False, continuous=True)

    agent = CNN_DQN_Agent(
        input_shape=env.observation_space.shape,
        action_space=env.action_space,
        run_name = RUN_NAME,
        **hyperparameters
        )
    # agent.load_checkpoint()


    for episode in range(episodes):
        state, info = env.reset()
        state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        total_reward = 0
        done = False

        for t in count():
            action = agent.select_action(state)

            observation, reward, terminated, truncated, info = env.step(action.cpu().numpy())
            reward = torch.tensor([reward], device=device)
            done = terminated or truncated

            if terminated:
                next_state = None
            else:
                next_state = torch.tensor(observation, dtype=torch.float32, device=device).unsqueeze(0)

            agent.memory.push(state.to(agent.device), action.to(agent.device), next_state.to(agent.device), reward.to(agent.device))
            agent.train_step()
            state = next_state
            total_reward += reward

            if done:
              agent.episode_durations.append(t + 1)
              plot_durations(agent.episode_durations)

              # Save plot at the end of training
              if episode == episodes - 1:  # Last episode
                  plot_durations(agent.episode_durations, show_result=True, save_path=f"plots/{run_name}_training_plot.png")
              break

        agent.log_reward(episode, total_reward)

        if episode % 50 == 0 and episode > 0:
            agent.save_checkpoint(episode)



        print(f"Episode {episode}: Total Reward: {total_reward.cpu().item()}")

    env.close()



In [None]:
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device {device}")

hyperparameters = {
    "batch_size": 128,  # More stable training
    "gamma": 0.99,  # Focus more on long-term rewards
    "epsilon_start": 1.0,
    "epsilon_end": 0.1,
    "tau": 0.01,  # Faster soft updates
    "epsilon_decay_steps": 25000,  # Balance exploration & exploitation
    "learning_rate": 0.0003,  # Keep same
    "replay_buffer_size": 50000,  # Store more experience
    "steps_per_target_net_update": 1000  # Update target net less frequently
}


RUN_NAME = f"CNN_DQN_{strftime('%Y%m%d%H%M%S', gmtime())}"
train_agent(episodes = 500, run_name = RUN_NAME)

Episode 499: Total Reward: 14.942222595214844


<Figure size 640x480 with 0 Axes>