# Notebook to experiment with testing:

## Code:

In [None]:
import numpy as np
import random
import os
import torch
from gymnasium import spaces

SEED = 42
# Python RNG
random.seed(SEED)

# NumPy RNG
np.random.seed(SEED)

# PyTorch RNG (CPU + GPU)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


In [None]:
from training_environments import prepare, Glioblastoma, Glioblastoma2
from training_dqn import DQN, DQN2, DQN3, DQN4
from training_agents import DQNAgent, DQNAgent2
from training_buffers import ReplayBuffer

In [None]:
test_pairs = prepare(mode='test')

# TESTING:

In [None]:
def test_agent_current(agent, test_pairs, num_episodes=10, print_results=True, env_config=None):
    """
    Test the trained agent using the current environment setup
    without any modifications to reward system or early termination
    """
    agent.dnnetwork.eval()  # Set to evaluation mode
    
    metrics = {
        'success_rate': [],
        'final_position_accuracy': [],
        'average_reward': [],
        'steps_to_find_tumor': [],
        'tumor_coverage': [],
        'total_tumor_reward': []
    }
    
    grid_size, rewards, action_space = env_config['grid_size'], env_config['rewards'], env_config['action_space']
    
    for i in range(num_episodes):
        img_path, mask_path = test_pairs[i]
        env = Glioblastoma2(img_path, mask_path, grid_size=grid_size, rewards=rewards, action_space=action_space)
        
        state, _ = env.reset()
        total_reward = 0
        found_tumor = False
        tumor_positions_visited = set()
        steps_to_find = env.max_steps  # Default: didn't find
        tumor_rewards = 0
        
        for step in range(env.max_steps):
            with torch.no_grad():
                action = agent.dnnetwork.get_action(state, epsilon=0.00)
            
            next_state, reward, terminated, truncated, _ = env.step(action)
            state = next_state
            total_reward += reward
            
            # Track tumor-related metrics
            current_overlap = env.current_patch_overlap_with_lesion()
            if current_overlap > 0:
                tumor_positions_visited.add(tuple(env.agent_pos))
                if not found_tumor:
                    found_tumor = True
                    steps_to_find = step + 1
                
                # Count positive rewards (when on tumor)
                if reward > 0:
                    tumor_rewards += 1
        
        # Calculate metrics for this episode
        final_overlap = env.current_patch_overlap_with_lesion()
        
        # Success: ended on tumor region
        success = final_overlap > 0
        metrics['success_rate'].append(success)
        
        # Final position accuracy
        metrics['final_position_accuracy'].append(final_overlap > 0)
        
        # Average reward
        metrics['average_reward'].append(total_reward)
        
        # Steps to find tumor
        metrics['steps_to_find_tumor'].append(steps_to_find)
        
        # Tumor coverage (percentage of tumor patches visited)
        total_tumor_patches = count_tumor_patches(env)
        coverage = len(tumor_positions_visited) / total_tumor_patches if total_tumor_patches > 0 else 0
        metrics['tumor_coverage'].append(coverage)
        
        # Total positive rewards from tumor
        metrics['total_tumor_reward'].append(tumor_rewards)
    
    # Calculate and print final results
    if print_results:
        print("\n" + "="*50)
        print("TEST RESULTS (Current Model)")
        print("="*50)
        print(f"Success Rate: {np.mean(metrics['success_rate'])*100:.2f}%")
        print(f"Final Position Accuracy: {np.mean(metrics['final_position_accuracy'])*100:.2f}%")
        print(f"Average Episode Reward: {np.mean(metrics['average_reward']):.2f}")
        print(f"Average Steps to Find Tumor: {np.mean(metrics['steps_to_find_tumor']):.2f}")
        print(f"Average Tumor Coverage: {np.mean(metrics['tumor_coverage'])*100:.2f}%")
        print(f"Average Tumor Rewards per Episode: {np.mean(metrics['total_tumor_reward']):.2f}")
        
        # Additional detailed statistics
        print("\nDetailed Statistics:")
        print(f"Best Episode Reward: {np.max(metrics['average_reward']):.2f}")
        print(f"Worst Episode Reward: {np.min(metrics['average_reward']):.2f}")
        print(f"Median Steps to Find Tumor: {np.median(metrics['steps_to_find_tumor']):.2f}")
    
    return metrics

def count_tumor_patches(env):
    """Count total number of patches that contain tumor"""
    tumor_patches = 0
    original_pos = env.agent_pos.copy()  # Save original position
    
    for i in range(env.grid_size):
        for j in range(env.grid_size):
            env.agent_pos = [i, j]
            if env.current_patch_overlap_with_lesion() > 0:
                tumor_patches += 1
    
    env.agent_pos = original_pos  # Restore original position
    return tumor_patches

def visualize_test_episode(agent, img_path, mask_path, episode_num=0):
    """Visualize a single test episode"""
    env = Glioblastoma2(img_path, mask_path, grid_size=4)
    state, _ = env.reset()
    
    positions = []
    actions = []
    rewards = []
    episode_reward = 0
    tumor_found = False
    
    print(f"\nVisualizing Test Episode {episode_num}")
    print("Image:", os.path.basename(img_path))
    
    for step in range(env.max_steps):
        with torch.no_grad():
            action = agent.dnnetwork.get_action(state, epsilon=0.01)
        
        next_state, reward, terminated, truncated, _ = env.step(action)
        
        positions.append(env.agent_pos.copy())
        actions.append(action)
        rewards.append(reward)
        
        # Check tumor status
        current_overlap = env.current_patch_overlap_with_lesion()
        if current_overlap > 0 and not tumor_found:
            tumor_found = True
            print(f"  Step {step+1}: Found tumor at position {env.agent_pos}")
        
        state = next_state
        
        # Render every step or at important moments
        if step == 0 or tumor_found or step == env.max_steps - 1:
            env.render()
    
    final_overlap = env.current_patch_overlap_with_lesion()
    print(f"Final position: {env.agent_pos}, On tumor: {final_overlap > 0}")
    print(f"Total reward: {sum(rewards):.2f}")
    
    print("\nStep-by-step rewards:")
    for idx, (pos, act, rew) in enumerate(zip(positions, actions, rewards)):
        print(f"  Step {idx+1}: Position {pos}, Action {act}, Reward {rew}")


In [None]:
# load model to test:
LR = 1e-4 #From paper
CURRENT_CONFIG = {
    'grid_size': 4,
    'rewards': [3.0, -1.5, -0.5], 
    'action_space': spaces.Discrete(5)
}

env = Glioblastoma2(*test_pairs[0], **CURRENT_CONFIG)

model = DQN4(env, learning_rate=LR, device='cpu')
model.load_state_dict(torch.load("Extension008.dat"))

agent = DQNAgent2(env_config=CURRENT_CONFIG, dnnetwork=model, buffer_class=ReplayBuffer, train_pairs=test_pairs,
                 env_class=Glioblastoma2,
                 epsilon=0.00)  # very low epsilon for testing


In [None]:
# for name, param in model.named_parameters():
#     if 'fc' in name and 'weight' in name:
#         print(f"Weights of layer {name}:")
#         print(param.data)
#         break  # print only the first fc layer weights

# for name, param in model2.named_parameters():
#     if 'fc' in name and 'weight' in name:
#         print(f"Weights of layer {name}:")
#         print(param.data)
#         break  # print only the first fc layer weights

In [None]:
metrics1 = test_agent_current(agent, test_pairs, num_episodes=len(test_pairs), env_config=CURRENT_CONFIG)

# metrics2 = test_agent_current(agent2, test_pairs, num_episodes=len(test_pairs), env_config=CONFIG2)

- Trial003: accuracy = 51.00%
- 004 - 27%
- 005 - 52%
- 006 - 14%
- 007 - 07%
- 008 - 36%
- 012 - 54%
- 013 - 25%
- 014 - 36%
- 014 - 53%
- 016 - 47%
- 017 - 22%
- 018 - 52%

In [None]:
LR = 1e-4 #From paper

# test each one of the models in the folder "other_models"
CONFIG1 = {
    'grid_size': 4,
    'rewards': [10.0, -2.0, -0.5],
    'action_space': spaces.Discrete(3)
}

CONFIG2 = {
    'grid_size': 4,
    'rewards': [5.0, -1.0, -0.2],
    'action_space': spaces.Discrete(3)
}

CONFIG3 = {
    'grid_size': 4,
    'rewards': [8.0, -1.5, -0.3],
    'action_space': spaces.Discrete(3)
}


env1 = Glioblastoma(*test_pairs[0], **CONFIG1)
model = DQN(env1, learning_rate=LR, device='cpu')
agent1 = DQNAgent(env_config=CONFIG1, dnnetwork=model, buffer_class=ReplayBuffer, train_pairs=test_pairs,
                 env_class=Glioblastoma,
                 epsilon=0.00)  # very low epsilon for testing


env2 = Glioblastoma(*test_pairs[0], **CONFIG2)
model2 = DQN(env2, learning_rate=LR, device='cpu')
agent2 = DQNAgent(env_config=CONFIG2, dnnetwork=model2, buffer_class=ReplayBuffer, train_pairs=test_pairs,
                 env_class=Glioblastoma,
                 epsilon=0.00)  # very low epsilon for testing

env3 = Glioblastoma(*test_pairs[0], **CONFIG3)
model3 = DQN(env3, learning_rate=LR, device='cpu')
agent3 = DQNAgent(env_config=CONFIG3, dnnetwork=model3, buffer_class=ReplayBuffer, train_pairs=test_pairs,
                 env_class=Glioblastoma,
                 epsilon=0.00)  # very low epsilon for testing



max_success_rate = 0  # initialize to zero
for model_file in os.listdir("grid_search"):
    if model_file.endswith(".dat"):
        # print(f"\nTesting model: {model_file}")
        trial_num = int(model_file.split("Trial")[-1].split(".")[0])

        if 19 <= trial_num <= 114:
            print("Using CONFIG1")
            env, agent, model, model_cfg = env1, agent1, model, CONFIG1
            model.load_state_dict(torch.load(os.path.join("grid_search", model_file)))
        elif 115 <= trial_num <= 210:
            print("Using CONFIG2")
            env, agent, model, model_cfg = env2, agent2, model2, CONFIG2
            model2.load_state_dict(torch.load(os.path.join("grid_search", model_file)))
        elif 211 <= trial_num <= 306:
            print("Using CONFIG3")
            env, agent, model, model_cfg = env3, agent3, model3, CONFIG3
            model3.load_state_dict(torch.load(os.path.join("grid_search", model_file)))
                        
        metrics = test_agent_current(agent, test_pairs, num_episodes=len(test_pairs), print_results=False)
        if np.mean(metrics['success_rate']) > max_success_rate:
            max_success_rate = np.mean(metrics['success_rate'])
            best_model_file = model_file
            print(f"New best model found: {best_model_file} with success rate {max_success_rate*100:.2f}%")
            
print(f"\nBest model overall: {best_model_file} with success rate {max_success_rate*100:.2f}%")

In [None]:
# for i in range(3):  # visualize 3 test episodes
#     visualize_test_episode(agent2, test_pairs[i][0], test_pairs[i][1], episode_num=1)