In [1]:
import sys
import os
from pathlib import Path

root_dir = Path(os.getcwd())

if str(root_dir.parents[1]) not in sys.path:
  sys.path.append(str(root_dir.parents[1]))

#print(sys.path)

from LMORL.BAN.API.agents.DQNHybrid import DQNHybrid

import mo_gymnasium as mo_gym

env = mo_gym.make("mo-lunar-lander-v2")


UnsupportedPythonError: It seems your Julia and PyJulia setup are not supported.

Julia executable:
    julia
Python interpreter and libpython used by PyCall.jl:
    /usr/bin/python3
    /usr/lib/x86_64-linux-gnu/libpython3.10.so.1.0
Python interpreter used to import PyJulia and its libpython.
    /usr/bin/python3
    /usr/lib/x86_64-linux-gnu/libpython3.10.so.1.0

Your Python interpreter "/usr/bin/python3"
is statically linked to libpython.  Currently, PyJulia does not fully
support such Python interpreter.

The easiest workaround is to pass `compiled_modules=False` to `Julia`
constructor.  To do so, first *reboot* your Python REPL (if this happened
inside an interactive session) and then evaluate:

    >>> from julia.api import Julia
    >>> jl = Julia(compiled_modules=False)

Another workaround is to run your Python script with `python-jl`
command bundled in PyJulia.  You can simply do:

    $ python-jl PATH/TO/YOUR/SCRIPT.py

See `python-jl --help` for more information.

For more information, see:

    https://pyjulia.readthedocs.io/en/latest/troubleshooting.html


In [2]:

input_size = env.observation_space.shape[0]
num_actions = int(env.action_space.n)
action_space = list(range(env.action_space.n))
learning_rate = 0.001
epsilon_decay = 0.995
epsilon_min = 0.1
batch_size = 64
train_start = 64
hidden_size = 128
BAN_SIZE = 3
max_memory_size=2000

agent = DQNHybrid(input_size=input_size, num_actions=num_actions,
                  action_space=action_space, learning_rate=learning_rate,
                  epsilon_decay=epsilon_decay, epsilon_min=epsilon_min,
                  batch_size=batch_size, hidden_size=hidden_size,
                  ban_size=3, max_memory_size=max_memory_size, train_start=100)


In [3]:
episodes = 350
mname = "fooo.model"

rewards, avg_rewards, timings = agent.agent_learning(env=env,episodes = episodes, mname=mname)


19:21:09	Episode	1	timesteps:	74	Took	1.270426 sec - reward:	[-100.0, -140.27313774824142, -20.0]	| 100AvgReward: [-100.0, -140.27313774824142, -20.0]
experience_replay! took 16.35996 seconds
19:21:26	Episode	2	timesteps:	74	Took	16.524599 sec - reward:	[-100.0, -191.8354700654745, -21.0]	| 100AvgReward: [-100.0, -166.05430390685797, -20.5]
19:21:26	Episode	3	timesteps:	59	Took	0.170663 sec - reward:	[-100.0, 0.33456916362047195, -9.0]	| 100AvgReward: [-100.0, -110.59134621669848, -16.666666666666668]
19:21:26	Episode	4	timesteps:	62	Took	0.235497 sec - reward:	[-100.0, -8.953988822177052, -14.0]	| 100AvgReward: [-100.0, -85.18200686806813, -16.0]
19:21:27	Episode	5	timesteps:	102	Took	0.26388 sec - reward:	[-100.0, -112.15321542322636, -32.0]	| 100AvgReward: [-100.0, -90.57624857909977, -19.2]
19:21:27	Episode	6	timesteps:	99	Took	0.27234 sec - reward:	[-100.0, -114.49040330201387, -24.0]	| 100AvgReward: [-100.0, -94.5619410329188, -20.0]
19:21:27	Episode	7	timesteps:	117	Took	0.33025

In [None]:
from matplotlib import pyplot as plt 

def display_plot(rewards:list, episodes:int, title:str = ""):
    """
    plot the behaviour of the reawards during episodes
    """

    fig, (ax1, ax2, ax3) = plt.subplots(3)
    fig.suptitle(title)
    ax1.plot(episodes, rewards[0])
    ax2.plot(episodes, reward[1])
    ax3.plot(episodes, reward[2])

    plt.show()

display_plot(rewards, rewards.len, "Rewards behaviour")