# 1. Install and Import Dependencies

In [315]:
#!pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113

In [316]:
#%pip install stable-baselines3[extra] protobuf==3.20.*

In [317]:
#%pip install mss pydirectinput pytesseract

In [318]:

#%pip install gymnasium

In [319]:
import os
from mss import mss
import pydirectinput
import cv2
import numpy as np
import pytesseract
from matplotlib import pyplot as plt
import time
#import gymnasium as gym
#from gymnasium import spaces
import gym
from gym import spaces
from TerrarianEyes import TerrarianEyes
%matplotlib inline

# 2. Build the Environment

## 2.1 Create Environment

In [320]:
%reload_ext autoreload
%autoreload 2
class TerrEnv(gym.Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}
    def __init__(self):
        super().__init__()
        # Setup spaces
        self.observation_space = spaces.Dict(
            {
                'map' :       spaces.Box(low=0, high=11, shape=(67, 120), dtype=np.int8),
                'inventory' : spaces.Box(low=0, high=11, shape=(9,10), dtype=np.int8),
            }
        )
        self.action_space = spaces.Discrete(7)
        # Capture game frames
        self.cap = mss()
        self.game_location = {'top': 0, 'left': 0, 'width': 1920, 'height': 1080}
        self.done_location = {'top': 460, 'left': 760, 'width': 400, 'height': 70}
        self.action_map = {
            0: 'no_op',
            1: 'space',
            2: 'w', 
            3: 'd', 
            4: 'a', 
            5: 'attack',
            6: 'cut'
        }
        # Create Instance
        tiles_weights_path = os.path.join('runs', 'train', 'yolov5s6-tiles', 'weights', 'best.pt')
        objects_weights_path = os.path.join('runs', 'train', 'yolov5l6-objects', 'weights', 'best.pt')
        self.eyes = TerrarianEyes(tiles_weights_path, objects_weights_path)
        self.timer = None
        self.time_limit = 120
        self.day_timer = time.time()
        self.day_limit = 360
           
    def step(self, action):
        if self.timer is None:
            raise AssertionError("Cannot call env.step() before calling reset()")
        # Get current health
        health = self.eyes.map.getHealth()
        if action == 0:
            reward = 0
        elif action < 5:
            pydirectinput.press(self.action_map[action])
            reward = 1 
        else: 
            # In case we need map or inventory
            if action == 5: # attack
                # find closest enemy position and check 
                with open("delete.txt", 'w') as f:
                    f.write(str(self.eyes.map))
                attack, x, y= self.eyes.map.isEnemyOnAttackRange()
                # if is in attack range
                if attack:
                    # Move mouse to enemy position
                    pydirectinput.moveTo((x+1)*16 + 16, y*16 + 8)
                    # attack
                    pydirectinput.press('1')
                    # Press the left mouse button
                    pydirectinput.mouseDown(button='left')
                    # Release the left mouse button
                    pydirectinput.mouseUp(button='left')
                    reward = 2
                else:
                    # if not
                    reward = 0
            elif action == 6: # cut wood
                # find closest tree position and check 
                # if is in cut range
                cut, x, y = self.eyes.map.isTreeOnCutRange()
                # if is in attack range
                if cut:
                    # Move mouse to enemy position
                    pydirectinput.moveTo((x+1)*16 + 8, y*16 + 8)
                    
                    # attack
                    pydirectinput.press('3')
                    # Press the left mouse button
                    pydirectinput.mouseDown(button='left')
                    # Release the left mouse button
                    pydirectinput.mouseUp(button='left')
                    reward = 3
                else:
                    # if not
                    reward = 0

        done, _ = self.get_done() 
        observation = self.get_observation()
        new_health = self.eyes.map.getHealth()
        if new_health - health < 0:
            reward = reward - 2
        # calculate real reward
        info = {}
        return observation, reward, done, info
        
    
    def reset(self):
        time.sleep(10)
        pydirectinput.click(x=150, y=250)
        self.timer = time.time()
        observation = self._get_obs()
        return observation
        
    def render(self):
        cv2.imshow('Game', self.current_frame)
        key = cv2.waitKey(1)
        if key == ord('q'):
            self.close()
         
    def close(self):
        cv2.destroyAllWindows()
    
    #def _get_obs(self):
        #return {"map": self.eyes.map.current_map, "inventory": self.eyes.inventory.inventory}

    def _get_obs(self):
        return {"map": self.eyes.map.current_map, "inventory": self.eyes.inventory.inventory}

    def get_observation(self):
        raw = np.array(self.cap.grab(self.game_location))[:,:,:3].astype(np.uint8)
        self.eyes.updateMap(raw)
        self.eyes.updateInventory(raw)
        #gray = cv2.cvtColor(raw, cv2.COLOR_BGR2GRAY)
        #resized = cv2.resize(gray, (1920,1080))
        #channel = np.reshape(resized, (1,1080,1920))
        return {"map": self.eyes.map.current_map, "inventory": self.eyes.inventory.inventory}
    
    def get_done(self):
        done = False
        done_cap = None
        if time.time() - self.day_timer  > self.day_limit:
            # reset day

            # Open inventory
            pydirectinput.press('Esc')
            # Click Power menu
            pydirectinput.moveTo(50, 315)
            # Press the left mouse button
            pydirectinput.mouseDown(button='left')
            # Release the left mouse button
            pydirectinput.mouseUp(button='left')

            #Click other place in the power menu to reset view
            pydirectinput.moveTo(50, 530)
            # Press the left mouse button
            pydirectinput.mouseDown(button='left')
            # Release the left mouse button
            pydirectinput.mouseUp(button='left')

            #Click time menu
            pydirectinput.moveTo(50, 630)
            # Press the left mouse button
            pydirectinput.mouseDown(button='left')
            # Release the left mouse button
            pydirectinput.mouseUp(button='left')

            #Click dawn
            pydirectinput.moveTo(115, 655)
            # Press the left mouse button
            pydirectinput.mouseDown(button='left')
            # Release the left mouse button
            pydirectinput.mouseUp(button='left')

            
            # Close inventory
            pydirectinput.press('Esc')

            self.day_timer = time.time()            

        elif time.time() - self.timer  > self.time_limit:
            done = True
        else:
            done_cap = np.array(self.cap.grab(self.done_location))
            done_strings = ['You', 'You ']
            res = pytesseract.image_to_string(done_cap)[:4]
            if res in done_strings:
                done = True
        return done, done_cap

# 2.2 Test Environment

In [321]:
env = TerrEnv()

YOLOv5  2023-3-26 Python-3.9.2rc1 torch-1.13.1+cu116 CUDA:0 (NVIDIA GeForce RTX 3070 Ti, 8192MiB)

Fusing layers... 
Model summary: 206 layers, 12319756 parameters, 0 gradients, 16.1 GFLOPs
Fusing layers... 
Model summary: 346 layers, 76157124 parameters, 0 gradients, 110.0 GFLOPs


In [322]:
#obs=env.get_observation()
#env.eyes.updateMap(env.observation)
#env.eyes.map.getHealth()

In [323]:
#plt.imshow(cv2.cvtColor(obs[0], cv2.COLOR_GRAY2BGR))

In [324]:
#done, done_cap = env.get_done()

In [325]:
#plt.imshow(done_cap)

In [326]:
#pytesseract.image_to_string(done_cap)[:4]

In [327]:
for episode in range(10): 
    obs = env.reset()
    done = False
    total_reward = 0
    while not done:
        action = env.action_space.sample()
        obs, reward,  done, info =  env.step(env.action_space.sample())
        total_reward  += reward
    print('Total Reward for episode {} is {}'.format(episode, total_reward))    

KeyboardInterrupt: 

# 3. Train the Model

## 3.1 Create Callback

In [292]:
# Import os for file path management
import os 
# Import Base Callback for saving models
from stable_baselines3.common.callbacks import BaseCallback
# Check Environment    
from stable_baselines3.common import env_checker
from gym.utils.env_checker import check_env

In [293]:
#env_checker.check_env(env)
check_env(env)

In [294]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [295]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [296]:
callback = TrainAndLoggingCallback(check_freq=1000, save_path=CHECKPOINT_DIR)

## 3.2 Build DQN and Train

In [297]:
from stable_baselines3 import DQN
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack

In [None]:
env = TerrEnv()

In [299]:
model = DQN('MultiInputPolicy', env, tensorboard_log=LOG_DIR, verbose=1, buffer_size=1200000, learning_starts=10)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [300]:
model.learn(total_timesteps=100000, callback=callback)

Logging to ./logs/DQN_1


KeyboardInterrupt: 

In [None]:
model.load('train_first/best_mode l_50000') 

# 4. Test out Model

In [None]:
for episode in range(5): 
    obs = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, findOut = model.predict(obs)
        obs, reward, done, info = env.step(int(action))
        time.sleep(0.01)
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(episode, total_reward))
    time.sleep(2)