# Notebook to experiment with training:

## Code:

In [1]:
import wandb
import numpy as np

In [2]:
from training_environments import prepare, Glioblastoma, Glioblastoma5actions
from training_dqn import DQN
from training_agents import DQNAgent
from training_buffers import ReplayBuffer

# DQN:

https://docs.pytorch.org/tutorials/intermediate/reinforcement_q_learning.html

In [3]:
ENVI = "Glioblastoma"
NET = "DQN"
AGENT = "DQNAgent"
BUFFER = "ReplayBuffer"

RUN_NAME = "Extended002"
#SAVE_NAME = "Glioblastoma"

LR = 1e-4 #From paper
MEMORY_SIZE = 15000 #From paper
MAX_EPISODES = 90 #From paper

EPSILON = 1.0 #From paper
EPSILON_MIN = 0.1 #From paper
# EPSILON_DECAY = 0.85 #Let's try exponential decay
EPSILON_DECAY = (EPSILON - EPSILON_MIN) / MAX_EPISODES 
# EPSILON_DECAY = 1e-4
DECAY_TYPE = "subtraction"
# DECAY_TYPE = "exponential"
print(f"Starting at {EPSILON}, decaying {EPSILON_DECAY}, will reach {EPSILON_MIN} after {MAX_EPISODES} episodes")

GAMMA = 0.99
BATCH_SIZE = 128 #From paper
BURN_IN = 150
DNN_UPD = 1
DNN_SYNC = 20

Starting at 1.0, decaying 0.01, will reach 0.1 after 90 episodes


In [4]:
train_pairs = prepare()
env=Glioblastoma(*train_pairs[0], grid_size=4)
print(env.observation_space.shape)
print(env.action_space.n)
print(np.arange(env.action_space.n))

net = DQN(env, learning_rate=LR, device='cpu')
buffer = ReplayBuffer(capacity=MEMORY_SIZE)
agent = DQNAgent(env, net, ReplayBuffer, train_pairs,
                 epsilon=EPSILON, eps_decay=EPSILON_DECAY, epsilon_min=EPSILON_MIN,
                 batch_size=BATCH_SIZE, gamma=GAMMA, 
                 memory_size=MEMORY_SIZE, buffer_initial=BURN_IN,
                 save_name=RUN_NAME)

âœ… Found 30 training pairs out of 30 listed in CSV.
(60, 60)
3
[0 1 2]


In [None]:
wandb.login()
wandb.Settings(quiet=True)

wandb.init(project="TFG_Glioblastoma",
           name=RUN_NAME,
           id=RUN_NAME,
           config={
            "environment": ENVI,
            "model": NET,
            "agent": AGENT,
            "buffer": BUFFER,
            "lr": LR,
            "MEMORY_SIZE": MEMORY_SIZE,
            "MAX_EPISODES": MAX_EPISODES,
            "EPSILON": EPSILON,
            "EPSILON_DECAY": EPSILON_DECAY,
            "Decay type": DECAY_TYPE,
            "EPSILON_MIN": EPSILON_MIN,
            "GAMMA": GAMMA,
            "BATCH_SIZE": BATCH_SIZE,
            "BURN_IN": BURN_IN,
            "DNN_UPD": DNN_UPD,
            "DNN_SYNC": DNN_SYNC
})

In [None]:
agent.train(
    train_pairs=train_pairs,
    gamma=GAMMA,
    max_episodes=MAX_EPISODES,
    dnn_update_frequency=DNN_UPD,
    dnn_sync_frequency=DNN_SYNC
)
wandb.finish()

In [None]:
wandb.finish()