In [9]:
from mss import mss
import pydirectinput
import cv2
import numpy as np
import pytesseract
from matplotlib import pyplot as plt
import time
from gym import Env
from gym.spaces import Box, Discrete
import os 
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3 import DQN
pytesseract.pytesseract.tesseract_cmd=r'C:\Users\33771\anaconda3\envs\tesseract\Library\bin\tesseract.exe'

In [10]:
class DinoGame(Env) :
    def __init__(self):
        super().__init__()

        self.observation_space = Box(low=0,high=255, shape=(1,83,100),dtype=np.uint8)
        self.action_space = Discrete(3)

        self.cap =mss()
        self.game_location = {'top':300,'left':0,'width':600,'height':500}
        self.done_location = {'top':405,'left':630,'width':660,'height':70}

    def step(self,action) :
        actions = {
            0:"space",
            1:"down",
            2:"no_op"
        }
        if action != 2 :
            pydirectinput.press(actions[action])
        done,done_cap = self.get_done()
        new_observation = self.get_observation()
        reward = 1
        info = {}

        return new_observation,reward,done,info

    def render(self) :
        cv2.imshow('Game',np.array(self.cap.grab(self.game_location))[:,:,:3])
        if cv2.waitKey(1) & 0xFF == ord('q') :
            self.close()
    
    def reset(self) :
        time.sleep(1)
        pydirectinput.click(x=150,y=150)
        pydirectinput.press("space")
        return self.get_observation()
    
    
    def close(self) :
        cv2.destroyAllWindows()
    
    
    def get_observation(self) :
        raw = np.array(self.cap.grab(self.game_location))[:,:,:3]
        gray = cv2.cvtColor(raw,cv2.COLOR_BGR2GRAY)
        resized = cv2.resize(gray,(100,83))
        channel = np.reshape(resized,(1,83,100))
        return channel
    
    
    def get_done(self) :
        done_cap = np.array(self.cap.grab(self.done_location))[:,:,:3]
        done_strings = ['GAME','GAHE']
        done = False
        res = pytesseract.image_to_string(done_cap)[:4]
        if res in done_strings : 
            done = True
        
        return done,done_cap

In [11]:
env = DinoGame()

In [44]:
obs = env.get_observation()

In [45]:
done,done_cap = env.get_done()

In [48]:
#FOR TESTING OUR MODEL

for episode in range(10) :
    obs = env.reset()
    done = False 
    total_reward = 0

    while not done :
        obs, reward, done, info = env.step(env.action_space.sample())
        total_reward += reward
    print(f'Total Reward for episode {episode} is {total_reward}')

Total Reward for episode 0 is 17
Total Reward for episode 1 is 21
Total Reward for episode 2 is 24
Total Reward for episode 3 is 24
Total Reward for episode 4 is 37
Total Reward for episode 5 is 24
Total Reward for episode 6 is 19
Total Reward for episode 7 is 28
Total Reward for episode 8 is 27
Total Reward for episode 9 is 43


In [23]:

class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model3_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [24]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [25]:
callback = TrainAndLoggingCallback(check_freq=1000,save_path=CHECKPOINT_DIR)

In [29]:
model = DQN('CnnPolicy',env,tensorboard_log=LOG_DIR,verbose=1,buffer_size=1200000,learning_starts=1000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [30]:
model.learn(total_timesteps=200000,callback=callback)

Logging to ./logs/DQN_2
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 24.2     |
|    ep_rew_mean      | 24.2     |
|    exploration_rate | 0.995    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 2        |
|    time_elapsed     | 41       |
|    total_timesteps  | 97       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.4     |
|    ep_rew_mean      | 22.4     |
|    exploration_rate | 0.991    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 2        |
|    time_elapsed     | 77       |
|    total_timesteps  | 179      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.8     |
|    ep_rew_mean      | 23.8     |
|    exploration_rate | 0.986    |
| time/               |        

<stable_baselines3.dqn.dqn.DQN at 0x1fb70afa8e0>

In [36]:
model = DQN.load(os.path.join('train','best_model2_200000'))

In [39]:
obs = env.reset()
done = False
total_reward = 0

while not done :
    action, _ = model.predict(obs)
    obs,reward,done,info = env.step(int(action))
    time.sleep(0.01)
    total_reward+=reward
print(f'Total Reward of episode is {total_reward}')
time.sleep(1)

Total Reward of episode is 21
