In [4]:
import numpy as np
from tqdm import tqdm
import onnxruntime as rt
from mlagents_envs.base_env import ActionTuple
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel

In [5]:
# Load Brain
sess = rt.InferenceSession("trained_brains/FootballPlayer.onnx")

# Params
input_name0 = sess.get_inputs()[0].name
input_name1= sess.get_inputs()[1].name
label_name = sess.get_outputs()[0].name

def get_brain_action(observation):
    
    a = np.ones((2,2)).astype(np.float32)
    new_obs = observation.astype(np.float32)

    pred = sess.run(['continuous_actions', 'discrete_actions'], {input_name0: np.array(new_obs).reshape(1,36), input_name1: a})
    return pred[0], pred[1][0].reshape(1,1)

In [6]:
# This will open the environment
channel = EngineConfigurationChannel()
env = UnityEnvironment('./mini_football_windows/Mini Football Environment.exe', seed=42, side_channels=[channel])

In [7]:
# channel.set_configuration_parameters(
#     width= 848,
#     height= 480,
#     quality_level= 1,
#     time_scale= 100,
#     target_frame_rate= -1,
#     capture_frame_rate= 60,
#     )

In [8]:
env.step()
behavior_name = list(env.behavior_specs)[0]

def play_episode():
    env.reset()

    decision_steps, terminal_steps = env.get_steps(behavior_name)
    done = False
    step = 0
    reward = 0

    while not done:
        
        step += 1

        state = decision_steps.obs[0][0]
        reward += decision_steps.reward[0]

        actions = get_brain_action(state)
        action = ActionTuple(actions[0], actions[1])
        #action = ActionTuple(get_random_action()[0], get_random_action()[1])
        env.set_actions(behavior_name=behavior_name, action=action)
        env.step()

        decision_steps, terminal_steps = env.get_steps(behavior_name)

        for agent_id in terminal_steps:
            reward += terminal_steps.reward[0]
            done = True

    return reward

In [9]:
env.reset()
all_rewards = []
for i in tqdm(range(100)):
    all_rewards.append(play_episode())

print(np.mean(all_rewards))
#0.9611509997766552

100%|██████████| 100/100 [05:12<00:00,  3.12s/it]

0.9472909977429663



