# Notebook to experiment with training:

https://docs.pytorch.org/tutorials/intermediate/reinforcement_q_learning.html

## Code:

In [1]:
import wandb
import numpy as np
from gymnasium import spaces
import random
import torch

In [2]:
SEED = 42
# Python RNG
random.seed(SEED)

# NumPy RNG
np.random.seed(SEED)

# PyTorch RNG (CPU + GPU)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


In [3]:
import sys
sys.path.append("/home/martina/codi2/4year/tfg")  # add parent folder of general.py

from general import prepare, GlioblastomaPositionalEncoding, testing
from training_dqn import DQNPositionalEncoding
from training_agents import DQNAgent
from training_buffers import ReplayBuffer

In [4]:
RUN_NAME = "Extension094"

CURRENT_CONFIG = {
    'grid_size': 12, # 6,
    'rewards': [10.0, -2.0, 2.5, -0.1], # [staying on tumor, staying off tumor, moving into tumor, movement cost] #[3.0, -1.0, -0.2],
    'action_space': spaces.Discrete(5), 
    'max_steps': 0
    # 'stop': False
}
    
ENVIRONMENT = GlioblastomaPositionalEncoding
NET = DQNPositionalEncoding
AGENT = DQNAgent
BUFFER = ReplayBuffer

NOTES = ""

LR = 5e-5
# LR = 0.0001 #From paper
MEMORY_SIZE = 15000 #From paper
MAX_EPISODES = 1000 #From paper

EPSILON = 1.0 #From paper
EPSILON_MIN = 0.3 #From paper
DECAY_TYPE = "exponential"
# DECAY_TYPE = "subtractive"
if DECAY_TYPE == "exponential":
    EPSILON_DECAY = 0.998 #Let's try exponential decay
    print(f"Starting at {EPSILON}, decaying {EPSILON_DECAY} each episode, will reach {EPSILON_MIN} after {int(np.log(EPSILON_MIN/EPSILON)/np.log(EPSILON_DECAY))} episodes")
else:
    EPSILON_DECAY = (EPSILON - EPSILON_MIN) / MAX_EPISODES
    print(f"Starting at {EPSILON}, decaying {EPSILON_DECAY}, will reach {EPSILON_MIN} after {MAX_EPISODES} episodes")


GAMMA = 0.99 #0.99
BATCH_SIZE = 128 #From paper
BURN_IN = 500 # 500
DNN_UPD = 4
DNN_SYNC = 200

Starting at 1.0, decaying 0.998 each episode, will reach 0.3 after 601 episodes


In [5]:
# train_pairs = prepare()
train_pairs = prepare(dataset=200)

Using dataset of 200 samples for training.
âœ… Found 200 pairs out of 200 listed in CSV.


In [8]:
env=ENVIRONMENT(*train_pairs[0], **CURRENT_CONFIG)
print(env.observation_space.shape)
print(env.action_space.n)
print(np.arange(env.action_space.n))
if env.observation_space.shape[1] * CURRENT_CONFIG['grid_size'] == 240:
    print(f"Using correct patch size {env.observation_space.shape[1]} given grid size {CURRENT_CONFIG['grid_size']}")

net = NET(env, learning_rate=LR, device='cpu')
buffer = BUFFER(capacity=MEMORY_SIZE)
agent = AGENT(env_config=CURRENT_CONFIG, dnnetwork=net, buffer_class=BUFFER, train_pairs=train_pairs, env_class=ENVIRONMENT,
                 epsilon=EPSILON, eps_decay=EPSILON_DECAY, eps_decay_type=DECAY_TYPE, epsilon_min=EPSILON_MIN,
                 batch_size=BATCH_SIZE, gamma=GAMMA, 
                 memory_size=MEMORY_SIZE, buffer_initial=BURN_IN,
                 save_name=RUN_NAME)

print(f"Using Glioblastoma class {ENVIRONMENT}, DQN class {NET}, Agent class {AGENT}, Buffer class {BUFFER}")

(3, 20, 20)
5
[0 1 2 3 4]
Using correct patch size 20 given grid size 12
Using Glioblastoma class <class 'general.GlioblastomaPositionalEncoding'>, DQN class <class 'training_dqn.DQNPositionalEncoding'>, Agent class <class 'training_agents.DQNAgent'>, Buffer class <class 'training_buffers.ReplayBuffer'>


In [9]:
wandb.login()
wandb.Settings(quiet=True)

wandb.init(project="TFG_Glioblastoma",
           name=RUN_NAME,
           id=RUN_NAME,
           config={
            "environment": ENVIRONMENT,
            "configuration": CURRENT_CONFIG,
            "model": NET,
            "agent": AGENT,
            "buffer": BUFFER,
            "notes": NOTES,
            "lr": LR,
            "MEMORY_SIZE": MEMORY_SIZE,
            "MAX_EPISODES": MAX_EPISODES,
            "EPSILON": EPSILON,
            "EPSILON_DECAY": EPSILON_DECAY,
            "Decay type": DECAY_TYPE,
            "EPSILON_MIN": EPSILON_MIN,
            "GAMMA": GAMMA,
            "BATCH_SIZE": BATCH_SIZE,
            "BURN_IN": BURN_IN,
            "DNN_UPD": DNN_UPD,
            "DNN_SYNC": DNN_SYNC, 
})

[34m[1mwandb[0m: Currently logged in as: [33mmartinacarrettab[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
agent.train(
    train_pairs=train_pairs,
    gamma=GAMMA,
    max_episodes=MAX_EPISODES,
    dnn_update_frequency=DNN_UPD,
    dnn_sync_frequency=DNN_SYNC
)
wandb.finish()

Filling replay buffer...
Buffer filled with 15000 experiences


Initializing:   0%|          | 0.00/1.00k [00:00<?, ?ep/s]

Training...
Episode 15 | Episode reward -2444.70 | Mean Rewards -1405.18 | Epsilon 0.9724 | Loss 0.1795
      Positive rewards: 120, Negative rewards: 4333
Episode 30 | Episode reward -5027.10 | Mean Rewards -2443.31 | Epsilon 0.9436 | Loss 0.0998
      Positive rewards: 216, Negative rewards: 8884
Episode 45 | Episode reward -10368.90 | Mean Rewards -3993.86 | Epsilon 0.9157 | Loss 0.0675
      Positive rewards: 335, Negative rewards: 18013
Episode 60 | Episode reward -12190.10 | Mean Rewards -5830.91 | Epsilon 0.8886 | Loss 0.0479
      Positive rewards: 402, Negative rewards: 21369
Episode 75 | Episode reward -21377.80 | Mean Rewards -8252.68 | Epsilon 0.8623 | Loss 0.0320
      Positive rewards: 516, Negative rewards: 36811


KeyboardInterrupt: 

Error in callback <bound method _WandbInit._post_run_cell_hook of <wandb.sdk.wandb_init._WandbInit object at 0x754ae0369f90>> (for post_run_cell), with arguments args (<ExecutionResult object at 7549d1608a60, execution_count=10 error_before_exec=None error_in_exec= info=<ExecutionInfo object at 7549d1609720, raw_cell="agent.train(
    train_pairs=train_pairs,
    gamm.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell://wsl%2Bubuntu-22.04/home/martina/codi2/4year/tfg/dqn/training.ipynb#X13sdnNjb2RlLXJlbW90ZQ%3D%3D> result=None>,),kwargs {}:


socket.send() raised exception.


In [None]:
wandb.finish()