In [18]:
from core.common.agent import Agent
from core.common.util import *
from collections import deque
import random
import numpy as np

# Inherit Agent class as a parent class
class DeepSARSAgent(Agent):
    
    # Detailed description about input parameters see API Doc
    def __init__(self, action_size, model, load_model=False, discount_factor=0.99, learning_rate=0.001,
                 epsilon=1, epsilon_decay=0.999, epsilon_min=0.01,
                 file_path='', training_mode=True, **kwargs):
        
        # Call constructor of parent's class
        super(DeepSARSAgent, self).__init__(**kwargs)

        # Set parameters from inputs
        self.load_model = load_model
        self.action_size = action_size
        self.model = model
        self.discount_factor = discount_factor
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        self.training_mode = training_mode
        self.file_path = file_path

        # Set the epsilon as minimum value, if not training mode.
        if not self.training_mode:
            self.epsilon = self.epsilon_min

        # memory for train (S,A,R,S',A')
        self.observations = deque(maxlen=2)
        self.recent_observation = None
        self.recent_action = None


        if self.load_model and os.path.isfile(file_path):
            self.load_weights(file_path)
    
    # Get an action to be taken from observation
    def forward(self, observation):
        
        # Take a random acton with probability = epsilon
        if self.training_mode and np.random.rand() <= self.epsilon:
            action = random.randrange(self.action_size)
        else:
        # Take a best acton with probability = (1 - epsilon)
            state = np.float32(observation)
            q_values = self.model.predict(np.expand_dims(state, 0))
            action = np.argmax(q_values[0])

        # set memory for training
        self.recent_observation = observation
        self.recent_action = action

        return [action]
        
    # Updates the agent's network
    def backward(self, reward, terminal):
        
        self.observations.append([self.recent_observation, self.recent_action, reward, terminal])

        if self.step == 0:
            return

        # Decaying the epsilon
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        # Use a memory to train
        experience = self.observations.popleft()
        state = np.float32(experience[0])
        action = experience[1]
        reward = experience[2]
        done = experience[3]

        # Get next action on next state from current model
        next_state = np.float32(self.recent_observation)
        next_action = self.forward(next_state)

        # Compute Q values for target network update
        # Q(S,A) <- Q(S,A) + alpha(R + gammaQ(S',A') - Q(S,A))
        target = self.model.predict(np.expand_dims(state, 0))[0]
        if done:
            target[action] = reward
        else:
            target[action] = (reward + self.discount_factor *
                              self.model.predict(np.expand_dims(next_state, 0))[0][next_action])

        target = np.reshape(target, [1, self.action_size])

        self.model.fit(np.expand_dims(state, 0), target, epochs=1, verbose=0)
        return

    # Compile the model
    def compile(self, optimizer, metrics=[]):
        self.model.compile(optimizer=optimizer, loss='mse')
        return
    
    # Load trained weight from an HDF5 file.
    def load_weights(self, filepath) :
        self.model.load_weights(filepath)
        return

    # Save trained weight from an HDF5 file.
    def save_weights(self, filepath, overwrite):
        self.model.save_weights(filepath, overwrite)
        return

In [19]:
from saida_gym.starcraft.avoidReavers import AvoidReavers
env = AvoidReavers(action_type=0, move_angle=30, move_dist=2, frames_per_step=24, vsersion=0, verbose=0)

FileNotFoundError: [WinError 2] 지정된 파일을 찾을 수 없습니다: '..\\..\\..\\cpp\\Release\\SAIDA\\SAIDA.exe'