# Dino Chrome Browser Playing Game Agent

## Importing Libraries

In [None]:
from mss import mss
import pydirectinput
import cv2
import numpy as np
import pytesseract 
import matplotlib.pyplot as plt
import time
from gym import Env
from gym.wrappers import FrameStack
from gym.spaces import Box, Discrete
import os
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common import env_checker

## Creating the Environment

In [None]:
class Game(Env):
    
    def __init__(self):
        super().__init__()
        self.observation_space = Box(low=0, high = 255, shape=(1,83,100), dtype = (np.uint8))
        self.action_space = Discrete(3)
        self.cap = mss()
        self.game_location = {'top':1200, 'left':0, 'width':1600, 'height':800}
        self.done_location = {'top':1010, 'left':1130, 'width':1660, 'height':170}
        self.reward_location = {'top':415, 'left':3400, 'width':370, 'height':600}
    
    def step(self, action):
        action_map = {
            0: 'space',
            1: 'down',
            2: 'no_op'
                     }
        
        if action != 2: 
            pydirectinput.press(action_map[action])
            
        done, done_cap = self.get_done()
        new_observation = self.get_observation()
        
        pic = np.array(self.cap.grab(self.reward_location))[:,:,:3]
        pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
        
        pic = cv2.cvtColor(pic, cv2.COLOR_BGR2GRAY)
        pic = cv2.resize(pic, (200,300))
        kernel = np.ones((1, 1), np.uint8)
        kernel1 = 1
        #pic = cv2.dilate(pic, kernel, iterations=1)
        pic = cv2.erode(pic, kernel, iterations=1) 
        pic = cv2.medianBlur(pic, kernel1)
        pic = cv2.bilateralFilter(pic, kernel1, sigmaColor=130, sigmaSpace=130)
        
        raw_r = pytesseract.image_to_string(pic)
        raw_r2 = raw_r.strip()
        raw_r3 = raw_r2[2:]
        
        #reward = int(raw_r3)
        '''
        if action == 2: reward = 1  # + int(raw_r3)
        elif action == 0: reward = -1
        elif action == 1: reward = -2
        '''
        reward = 1
        
        info = {}
        
        return new_observation, reward, done, info
        
    
    def render(self):
        cv2.imshow('Game', np.array(self.cap.grab(self.game_location))[:,:,:3])
        if cv2.waitKey(1) & 0xFF == ord('q'):
            self.close()
    
    
    def reset(self):
        time.sleep(0.1)
        pydirectinput.click(x=150, y=150)
        pydirectinput.press('space')
        return self.get_observation()
    
    def get_observation(self):
        raw = np.array(self.cap.grab(self.game_location))[:,:,:3]
        gray = cv2.cvtColor(raw, cv2.COLOR_BGR2GRAY)
        ob = cv2.resize(gray, (100,83))
        #ob = cv2.erode(resize, kernel, iterations=1) 
        #ob = cv2.medianBlur(ob, kernel1)
        #ob = cv2.bilateralFilter(ob, kernel1, sigmaColor=130, sigmaSpace=130)
        channel = np.reshape(ob, (1,83,100))
        return channel

    def get_done(self):
        done_cap = np.array(self.cap.grab(self.done_location))[:,:,:3]
        done_strings = ['GAME','GAHE']
        done = False
        pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
        res = pytesseract.image_to_string(done_cap)[:4]
        if res in done_strings:
            done = True
                       
        return done, done_cap
    
    def close(self):
        cv2.destroyAllWindows()
    

In [None]:
env = Game()

## Setting up Callback Function

In [None]:
class TrainAndLoggingCallback(BaseCallback):
    
    def __init__ (self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        
    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
            
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self .model.save(model_path)
        return True

In [None]:
CHECKPOINT_DIR = './RL_MODEL2/'
LOG_DIR = './logs/'

In [None]:
callback = TrainAndLoggingCallback(check_freq=1000, save_path = CHECKPOINT_DIR)

## Building and Training DQN

In [None]:
model = DQN('CnnPolicy', env, tensorboard_log = LOG_DIR, verbose=1, buffer_size=1200000, learning_starts=100 ,learning_rate = 0.000001)#, n_steps = 512)

In [None]:
model.learn(total_timesteps=50000, callback=callback)

## Testing the Model

In [None]:
model = DQN.load('./RL_MODEL2/best_model_3000')

In [None]:
for episode in range(4):
    obs = env.reset()
    done=False
    total_reward=0
    
    while not done:
        action,_ = model.predict(obs)
        obs, reward, done, info = env.step(int(action))
        time.sleep(0.01)
        total_reward += reward
    print(f"Total reward for episode {episode} is {reward}")
    time.sleep(1)    

#### MISC TO BE IGNORED (Testing OCR on Score)

In [None]:
reward_location = {'top':415, 'left':3400, 'width':370, 'height':600}
game_location = {'top':1200, 'left':0, 'width':1600, 'height':800}
done_location = {'top':1010, 'left':1130, 'width':1660, 'height':170}
pic = np.array(mss().grab(game_location))[:,:,:3]

In [None]:
plt.imshow(pic)

In [None]:
pic = cv2.cvtColor(pic, cv2.COLOR_BGR2GRAY)
pic = cv2.resize(pic, (200,300))
kernel = np.ones((1, 1), np.uint8)
kernel1 = 1
#pic = cv2.dilate(pic, kernel, iterations=1)
pic = cv2.erode(pic, kernel, iterations=1)
pic = cv2.medianBlur(pic, kernel1)
pic = cv2.bilateralFilter(pic, kernel1, sigmaColor=120, sigmaSpace=120)

In [None]:
plt.imshow(pic)

In [None]:
reward = pytesseract.image_to_string(pic)

In [None]:
reward

In [None]:
reward = reward.strip()
reward = reward[1:]
reward = int(reward)