In [None]:
import torch
import gymnasium as gym
import dqn as d
import importlib
import cv2 
from gymnasium.wrappers import GrayscaleObservation, ResizeObservation, FrameStackObservation
import threading
import matplotlib.pyplot as plt
from IPython.display import clear_output
import time
import plotly.graph_objects as go
importlib.reload(d)

In [None]:
def plot_rewards_threaded(interval=1.0, smooth_window=10):
    fig = go.FigureWidget()
    fig.add_scatter(x=[], y=[], mode='lines+markers', name='Smoothed Reward')
    fig.update_layout(
        title='Episode Rewards Over Time',
        xaxis_title='Episode',
        yaxis_title='Reward',
        yaxis_range=[0, None]
    )
    display(fig)

    def plot_loop():
        while True:
            time.sleep(interval)
            if episode_rewards:
                smoothed = [
                    sum(episode_rewards[max(0, i - smooth_window):i + 1]) / (i - max(0, i - smooth_window) + 1)
                    for i in range(len(episode_rewards))
                ]
                with fig.batch_update():
                    fig.data[0].x = list(range(len(smoothed)))
                    fig.data[0].y = smoothed

    t = threading.Thread(target=plot_loop, daemon=True)
    t.start()

In [None]:
env = gym.make("CarRacing-v3", render_mode="rgb_array", continuous=False)
env = d.SkipFrame(env, skip=4)
env = GrayscaleObservation(env)
env = ResizeObservation(env, (84, 84))
env = FrameStackObservation(env, stack_size=4)

action_dim = env.action_space.n  # 5 discrete actions

state_shape = (4, 84, 84)

buffer = d.ReplayBuffer(capacity=800_000, state_dim=state_shape)
agent = d.DQNAgent(state_shape, action_dim, buffer, ddqn=True)
factor = 0.7 
num_episodes = 2000

In [None]:
episode_rewards = []
plot_rewards_threaded()

for episode in range(num_episodes):
    state, _ = env.reset()
    terminated = truncated = False
    cumm_reward = 0
    step_num = 0
    neg_count = 0
    while not (terminated or truncated):
        action = agent.act(state)
        next_state, reward, terminated, truncated, _ = env.step(action)

        step_num += 1
        cumm_reward += reward

        if cumm_reward < 0:
            break

        if step_num > 300:
            if reward < 0:
                neg_count += 1
            if neg_count >= 25:
                break
            
        buffer.push(state, action, reward, next_state, terminated)
        agent.train()

        state = next_state

    agent.decay(episode,factor)
    
    episode_rewards.append(cumm_reward)

env.close()

In [None]:
agent.load_full_model('models/DQN_model.pth')

In [None]:
env = gym.make("CarRacing-v3", render_mode="human", continuous=False)
env = d.SkipFrame(env, skip=4)
env = GrayscaleObservation(env)
env = ResizeObservation(env, (84, 84))
env = FrameStackObservation(env, stack_size=4)
test_episodes = 1

for episode in range(test_episodes):
    state, _ = env.reset()
    terminated = truncated = False
    while not (terminated or truncated):
        action = agent.policy(state)
        next_state, reward, terminated, truncated, _ = env.step(action)
        state = next_state

env.close()

In [None]:
agent.save_full_model('model.pth')