In [1]:
from mss import mss
import cv2
import numpy as np
import time
import pyautogui
import pytesseract
from matplotlib import pyplot as plt
import gymnasium as gym
from gym import Env 
from gymnasium import spaces
import os
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common import env_checker
from stable_baselines3 import DQN


In [83]:
class DinoWeb(gym.Env):
    def __init__(self):
        super().__init__()
        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(low=0, high=255, shape=(1, 83, 100), dtype=np.float32)
        self.cap = mss()
        self.game_location = {"top": 300, "left": 0, "width": 600, "height": 500}
        self.done_location = {"top": 405, "left": 430, "width": 600, "height": 50}
        self.done_pixel = (666, 411)
    def step(self, action):
        action_map = {0: 'up', 1: 'down', 2: 'none'}
        if action != 2:
            pyautogui.press(action_map[action])
        done, done_cap = self.get_done()
        obs = self.get_observation()
        reward = 1
        info = {}

        return obs, reward, done, False, info

    def render(self):
        cv2.imshow('Dino', np.array(self.cap.grab(self.game_location))[:,:,:3])
        if cv2.waitKey(1) & 0xFF == ord('q'):
            self.close()

    def reset(self, seed=None, options=None):
        time.sleep(1.5)
        pyautogui.click(300, 300)
        pyautogui.press('space')
        return self.get_observation(), {}

    def close(self):
        cv2.destroyAllWindows()
        cv2.waitKey(1)

    def get_observation(self):
        raw = np.array(self.cap.grab(self.game_location))[:,:,:3]
        gray = cv2.cvtColor(raw, cv2.COLOR_BGR2GRAY)
        resized = cv2.resize(gray, (100, 83))
        channel = np.reshape(resized, (1, 83, 100)).astype(np.float32)
        return channel

    def get_done(self):
        screen = np.array(self.cap.grab(self.done_location))[:,:,:3]
        done_pixel_color = screen[15, 400]
        done = False

        # Check if the pixel color matches the game over color
        if np.array_equal(done_pixel_color, [83, 83, 83]):  # Replace with the actual game over color
            done = True
        return done, done_pixel_color

In [72]:
env_checker.check_env(env)



In [6]:
class TrainAndLoggingCallback(BaseCallback):
    def __init__(self, check_freq, save_path,verbose=0):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'moddel{}'.format(self.n_calls))
            self.model.save(model_path)
            
        return True

In [7]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [8]:
callback = TrainAndLoggingCallback(check_freq=1000, save_path=CHECKPOINT_DIR)

In [74]:
model = DQN('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, buffer_size=300000, learning_starts=1000,policy_kwargs=dict(normalize_images=False))

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




In [75]:
model.learn(total_timesteps=100000, callback=callback)

Logging to ./logs/DQN_3
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 36.8     |
|    ep_rew_mean      | 36.8     |
|    exploration_rate | 0.986    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 5        |
|    time_elapsed     | 28       |
|    total_timesteps  | 147      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 43.5     |
|    ep_rew_mean      | 43.5     |
|    exploration_rate | 0.967    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 5        |
|    time_elapsed     | 61       |
|    total_timesteps  | 348      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 47.6     |
|    ep_rew_mean      | 47.6     |
|    exploration_rate | 0.946    |
| time/               |        

<stable_baselines3.dqn.dqn.DQN at 0x13a787440>

In [None]:
for episode in range(10):
    obs = env.reset()[0]
    done = False
    total_reward = 0
    while not done:
        action, _states = model.predict(obs)
        print(action) 
        obs, reward, done, oui, info = env.step(int(action))
        total_reward += reward
    print(f'Episode {episode} Total Reward: {total_reward}')