In [None]:
import gym
import time
import matplotlib.pyplot as plt
from pyswip import Prolog
from utils import create_level, define_reward, process_state, perform_action, show_match

In [None]:
H = 15
W = 15
NUM_EPISODES = 100
MAX_STEPS = 200
MONSTER = ['kobold', 'giant bat']
WEAPON = []
PATH = 'kb.pl'

In [None]:
des_file = create_level(width = W, height = H, monsters = MONSTER, weapons = WEAPON, potion = True, armor = False)
print(des_file)
reward_manager = define_reward(monsters = MONSTER)

In [None]:
env = gym.make('MiniHack-Skill-Custom-v0',
               character="sam-hum-neu-mal",
               #character="bar-hum-neu-mal",
               observation_keys=('screen_descriptions','inv_strs','blstats','message','pixel'),
               des_file=des_file,
               reward_manager=reward_manager
               )

In [None]:
obs = env.reset()
env.render()

In [None]:
plt.imshow(obs['pixel'][20:300, 480:775])

Initialize the knowledge base.

In [None]:
KB = Prolog()
KB.consult(PATH)

#### Main code
- Perform `NUM_EPISODES` experiences in the environment.
- Use `Prolog` to define the axioms and choose the action to perform.
- The main goal is to _reach and eat_ the `apple`.

In [None]:

# for i in range(0):
#     print(f'Action performed: {i} {repr(env.actions[i])}')

In [None]:
rewards = [] 
step = []
step_win = []
wins = 0
for episode in range(NUM_EPISODES):
    # count the number of steps of the current episode
    steps = 0
    # store the cumulative reward
    reward = 0.0
    # collect obs['pixel'] to visualize
    ep_states = []

    obs = env.reset()
    ep_states.append(obs['pixel'])
    done = False

    # Main loop
    while not done and steps < MAX_STEPS:
        # Get the observation from the env and assert the facts in the kb
        process_state(obs, KB, MONSTER)
        # Query Prolog
        # Run the inference and get the action to perform
        # Get the first answer from Prolog -> the top-priority action
        try:
            action = list(KB.query('action(X)'))[0]
            action = action['X']
            # print("ACTION: ", action)
        except Exception as e:
            print(e)
            action = None

        # Perform the action in the environment
        if action:
            # print(f"Action from kb: {action}")
            obs, rwd, done, info = perform_action(action, env, KB)
            message = bytes(obs['message']).decode('utf-8').rstrip('\x00')
            # print(message)
            reward += rwd
            ep_states.append(obs['pixel'])
            # env.render()
        else:
            print("ERROR: No action can be performed")
            break

        steps += 1
        step.append(steps)
    
    if info["end_status"].name == "TASK_SUCCESSFUL":
        wins += 1
        step_win.append(steps)

    # Display game with interface
    show_match(ep_states)
    # Print information about the ended episode
    print(f'Episode {episode + 1} - {steps} steps')
    print(f'End status: {info["end_status"].name}')
    print(f'Final reward: {reward}')

    rewards.append(reward)

    # reset the environment and retract axioms that may cause errors
    obs = env.reset()
    KB = Prolog()
    KB.consult(PATH)

    # time.sleep(1)


print(f'After {NUM_EPISODES} episodes, mean return is {sum(rewards)/NUM_EPISODES}')
print(f'and the total number of winning episodes is {wins}')
print(f'the mean number of step per episode is {sum(step)/len(step)}')
print(f'the mean number of step per winning epidose is {sum(step_win)/len(step_win)}')
print("The rewards of the episodes are:", rewards)