# 1. Install and Import Dependencies

In [None]:
#INSTALL

!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install opencv-python
!pip install mss
!pip install Pillow
!pip install pytesseract
!pip install mss
!pip install pydirectinput
!pip install gym

In [None]:
#IMPORTS

from mss import mss
import pydirectinput
import cv2
import numpy as np
import pytesseract
from matplotlib import pyplot as plt
import time
from gym import Env
from gym.spaces import Box, Discrete

pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

# 2. Build the Environment

## 2.1 Create Environment

In [None]:
class WebGame(Env):
    def __init__(self):
        
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(1,83,100), dtype=np.uint8)
        self.action_space = Discrete(3)
        self.cap = mss()
        self.game_location = {'top': 300, 'left': 0, 'width': 600, 'height': 500}
        self.done_location = {'top': 405, 'left': 630, 'width': 300, 'height': 70}
        
    def step(self, action):
        
        action_dict = { 0:'space', 1:'down', 2:'no_op' }
        
        if action !=2:
            pydirectinput.press(action_dict[action])
        
        done, done_capt = self.get_done() 
        new_obs = self.get_observation()
        
        reward = 1
        info = {}
        
        return new_obs, reward, done, info
            
    def reset(self):
        
        pydirectinput.click(x=150, y=150)
        pydirectinput.press('space')
        
        return self.get_observation()
    
    def get_observation(self):
        
        raw = np.array(self.cap.grab(self.game_location))[:,:,:3].astype(np.uint8)
        gray = cv2.cvtColor(raw, cv2.COLOR_BGR2GRAY)
        resized = cv2.resize(gray, (100,83))
        channel = np.reshape(resized, (1,83,100))
        
        return channel
    
    def get_done(self):
        
        done_cap = np.array(self.cap.grab(self.done_location))
        done_strings = ["G"]
        done = False
        res = pytesseract.image_to_string(done_cap)[:1]
        if res in done_strings:
            done = True

        return done, done_cap

## 2.2 Check Environment

In [None]:
env_checker.check_env(env)

## 2.3 Test Environment

In [None]:
for i in range(10): 
    obs = env.reset()
    done = False  
    total_reward   = 0
    while not done: 
        obs, reward,  done, tr, inf =  env.step(env.action_space.sample())
        total_reward  += reward
    print('Total Reward for episode {} is {}'.format(episode, total_reward))

# 3. Train the Model

## 3.1 Create Callback

In [None]:
#IMPORTS

import os 

from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common import env_checker

In [None]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [None]:
CHECKPOINT_DIR = ".train"
LOG_DIR = ".logs"

callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

## 3.2 Build DQN

In [None]:
#IMPORTS

from stable_baselines3 import DQN
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack

In [None]:
env = WebGame()

In [None]:
#Creating Deep Q Network model with CNN Policy

model = DQN('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, buffer_size=10000, learning_starts=0)

## 3.3 Train Model

In [None]:
#LEARNING SECTION

model.learn(total_timesteps=90000, callback=callback)

## 3.4 Load Train Dataset

In [None]:
#LOADING AND EXECUTION SECTION

model = DQN.load('best_model_90000')

# 4. Execute the Model

In [None]:
for episode in range(10): 
    observation = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(observation)
        observation, reward, done, info = env.step(int(action))
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(episode+1, total_reward))
    time.sleep(1)