# Notebook to experiment with testing:

## Code:

In [1]:
import numpy as np
import os
import torch
from gymnasium import spaces


In [2]:
from training_environments import prepare, Glioblastoma
from training_dqn import DQN
from training_agents import DQNAgent
from training_buffers import ReplayBuffer

In [3]:
test_pairs = prepare(mode='test')

âœ… Found 100 pairs out of 100 listed in CSV.


# TESTING:

In [4]:
def test_agent_current(agent, test_pairs, num_episodes=10, print_results=True):
    """
    Test the trained agent using the current environment setup
    without any modifications to reward system or early termination
    """
    agent.dnnetwork.eval()  # Set to evaluation mode
    
    metrics = {
        'success_rate': [],
        'final_position_accuracy': [],
        'average_reward': [],
        'steps_to_find_tumor': [],
        'tumor_coverage': [],
        'total_tumor_reward': []
    }
    
    for i in range(num_episodes):
        img_path, mask_path = test_pairs[i]
        env = Glioblastoma(img_path, mask_path, grid_size=4)
        
        state, _ = env.reset()
        total_reward = 0
        found_tumor = False
        tumor_positions_visited = set()
        steps_to_find = env.max_steps  # Default: didn't find
        tumor_rewards = 0
        
        for step in range(env.max_steps):
            with torch.no_grad():
                action = agent.dnnetwork.get_action(state, epsilon=0.00)
            
            next_state, reward, terminated, truncated, _ = env.step(action)
            state = next_state
            total_reward += reward
            
            # Track tumor-related metrics
            current_overlap = env.current_patch_overlap_with_lesion()
            if current_overlap > 0:
                tumor_positions_visited.add(tuple(env.agent_pos))
                if not found_tumor:
                    found_tumor = True
                    steps_to_find = step + 1
                
                # Count positive rewards (when on tumor)
                if reward > 0:
                    tumor_rewards += 1
        
        # Calculate metrics for this episode
        final_overlap = env.current_patch_overlap_with_lesion()
        
        # Success: ended on tumor region
        success = final_overlap > 0
        metrics['success_rate'].append(success)
        
        # Final position accuracy
        metrics['final_position_accuracy'].append(final_overlap > 0)
        
        # Average reward
        metrics['average_reward'].append(total_reward)
        
        # Steps to find tumor
        metrics['steps_to_find_tumor'].append(steps_to_find)
        
        # Tumor coverage (percentage of tumor patches visited)
        total_tumor_patches = count_tumor_patches(env)
        coverage = len(tumor_positions_visited) / total_tumor_patches if total_tumor_patches > 0 else 0
        metrics['tumor_coverage'].append(coverage)
        
        # Total positive rewards from tumor
        metrics['total_tumor_reward'].append(tumor_rewards)
    
    # Calculate and print final results
    if print_results:
        print("\n" + "="*50)
        print("TEST RESULTS (Current Model)")
        print("="*50)
        print(f"Success Rate: {np.mean(metrics['success_rate'])*100:.2f}%")
        print(f"Final Position Accuracy: {np.mean(metrics['final_position_accuracy'])*100:.2f}%")
        print(f"Average Episode Reward: {np.mean(metrics['average_reward']):.2f}")
        print(f"Average Steps to Find Tumor: {np.mean(metrics['steps_to_find_tumor']):.2f}")
        print(f"Average Tumor Coverage: {np.mean(metrics['tumor_coverage'])*100:.2f}%")
        print(f"Average Tumor Rewards per Episode: {np.mean(metrics['total_tumor_reward']):.2f}")
        
        # Additional detailed statistics
        print("\nDetailed Statistics:")
        print(f"Best Episode Reward: {np.max(metrics['average_reward']):.2f}")
        print(f"Worst Episode Reward: {np.min(metrics['average_reward']):.2f}")
        print(f"Median Steps to Find Tumor: {np.median(metrics['steps_to_find_tumor']):.2f}")
    
    return metrics

def count_tumor_patches(env):
    """Count total number of patches that contain tumor"""
    tumor_patches = 0
    original_pos = env.agent_pos.copy()  # Save original position
    
    for i in range(env.grid_size):
        for j in range(env.grid_size):
            env.agent_pos = [i, j]
            if env.current_patch_overlap_with_lesion() > 0:
                tumor_patches += 1
    
    env.agent_pos = original_pos  # Restore original position
    return tumor_patches

def visualize_test_episode(agent, img_path, mask_path, episode_num=0):
    """Visualize a single test episode"""
    env = Glioblastoma(img_path, mask_path, grid_size=4)
    state, _ = env.reset()
    
    positions = []
    actions = []
    rewards = []
    episode_reward = 0
    tumor_found = False
    
    print(f"\nVisualizing Test Episode {episode_num}")
    print("Image:", os.path.basename(img_path))
    
    for step in range(env.max_steps):
        with torch.no_grad():
            action = agent.dnnetwork.get_action(state, epsilon=0.01)
        
        next_state, reward, terminated, truncated, _ = env.step(action)
        
        positions.append(env.agent_pos.copy())
        actions.append(action)
        rewards.append(reward)
        
        # Check tumor status
        current_overlap = env.current_patch_overlap_with_lesion()
        if current_overlap > 0 and not tumor_found:
            tumor_found = True
            print(f"  Step {step+1}: Found tumor at position {env.agent_pos}")
        
        state = next_state
        
        # Render every step or at important moments
        if step == 0 or tumor_found or step == env.max_steps - 1:
            env.render()
    
    final_overlap = env.current_patch_overlap_with_lesion()
    print(f"Final position: {env.agent_pos}, On tumor: {final_overlap > 0}")
    print(f"Total reward: {sum(rewards):.2f}")
    
    print("\nStep-by-step rewards:")
    for idx, (pos, act, rew) in enumerate(zip(positions, actions, rewards)):
        print(f"  Step {idx+1}: Position {pos}, Action {act}, Reward {rew}")


In [37]:
# load model to test:
LR = 1e-4 #From paper
CONFIG1 = {
    'grid_size': 4,
    'rewards': [10.0, -2.0, -0.5],
    'action_space': spaces.Discrete(3)
}

CONFIG2 = {
    'grid_size': 4,
    'rewards': [10.0, -2.0, -0.5], # [10.0, -2.0, -0.5],
    'action_space': spaces.Discrete(3)
}

env = Glioblastoma(*test_pairs[0], **CONFIG1)
env2 = Glioblastoma(*test_pairs[0], **CONFIG2)

model = DQN(env, learning_rate=LR, device='cpu')
model.load_state_dict(torch.load("Glioblastoma020_56.dat"))

model2 = DQN(env2, learning_rate=LR, device='cpu')
model2.load_state_dict(torch.load("Trial009.dat"))
# model2.load_state_dict(torch.load("Glioblastoma.dat"))

agent = DQNAgent(env_config=CONFIG1, dnnetwork=model, buffer_class=ReplayBuffer, train_pairs=test_pairs,
                 env_class=Glioblastoma,
                 epsilon=0.00)  # very low epsilon for testing

agent2 = DQNAgent(env_config=CONFIG2, dnnetwork=model2, buffer_class=ReplayBuffer, train_pairs=test_pairs,
                  env_class=Glioblastoma,
                  epsilon=0.00)  # very low epsilon for testing


In [38]:
for name, param in model.named_parameters():
    if 'fc' in name and 'weight' in name:
        print(f"Weights of layer {name}:")
        print(param.data)
        break  # print only the first fc layer weights

for name, param in model2.named_parameters():
    if 'fc' in name and 'weight' in name:
        print(f"Weights of layer {name}:")
        print(param.data)
        break  # print only the first fc layer weights

Weights of layer fc.0.weight:
tensor([[ 0.0457, -0.0255, -0.0067,  ..., -0.0145, -0.0194,  0.0553],
        [-0.0144,  0.0111, -0.0237,  ...,  0.0092,  0.0182,  0.0530],
        [ 0.0291, -0.0288, -0.0541,  ..., -0.0471, -0.0503, -0.0414],
        ...,
        [ 0.0490,  0.0193, -0.0301,  ..., -0.0445,  0.0345, -0.0869],
        [-0.0245, -0.0035, -0.0140,  ..., -0.0262,  0.0071, -0.0459],
        [-0.0112,  0.0424, -0.0333,  ..., -0.0602,  0.0015, -0.0723]])
Weights of layer fc.0.weight:
tensor([[ 0.0311, -0.0320,  0.0300,  ...,  0.0296,  0.0037,  0.0281],
        [-0.0145, -0.0473, -0.0158,  ..., -0.0107,  0.0320, -0.0575],
        [ 0.0157, -0.0318, -0.0100,  ..., -0.0154,  0.0036,  0.0527],
        ...,
        [-0.0248,  0.0158, -0.0228,  ..., -0.0108, -0.0287, -0.0621],
        [ 0.0176,  0.0514,  0.0318,  ..., -0.0024, -0.0001, -0.0003],
        [ 0.0014,  0.0527,  0.0293,  ...,  0.0138, -0.0045, -0.0708]])


In [39]:
metrics1 = test_agent_current(agent, test_pairs, num_episodes=len(test_pairs))

metrics2 = test_agent_current(agent2, test_pairs, num_episodes=len(test_pairs))


TEST RESULTS (Current Model)
Success Rate: 54.00%
Final Position Accuracy: 54.00%
Average Episode Reward: -10.03
Average Steps to Find Tumor: 10.53
Average Tumor Coverage: 26.21%
Average Tumor Rewards per Episode: 8.82

Detailed Statistics:
Best Episode Reward: 20.00
Worst Episode Reward: -35.50
Median Steps to Find Tumor: 3.00

TEST RESULTS (Current Model)
Success Rate: 36.00%
Final Position Accuracy: 36.00%
Average Episode Reward: -23.73
Average Steps to Find Tumor: 13.48
Average Tumor Coverage: 11.10%
Average Tumor Rewards per Episode: 4.41

Detailed Statistics:
Best Episode Reward: 20.00
Worst Episode Reward: -38.50
Median Steps to Find Tumor: 20.00


- Trial003: accuracy = 51.00%
- 004 - 27%
- 005 - 52%
- 006 - 14%
- 007 - 07%
- 008 - 36%

In [7]:
# # test each one of the models in the folder "other_models"
# env = Glioblastoma(*test_pairs[0], grid_size=4)
# model = DQN(env, learning_rate=LR, device='cpu')

# max_success_rate = 0  # initialize to zero
# for model_file in os.listdir("other_models"):
#     if model_file.endswith(".dat"):
#         # print(f"\nTesting model: {model_file}")
#         model.load_state_dict(torch.load(os.path.join("other_models", model_file)))
        
#         agent = DQNAgent(env=env, dnnetwork=model, buffer_class=ReplayBuffer, train_pairs=test_pairs,
#                          epsilon=0.00)  # very low epsilon for testing
        
#         metrics = test_agent_current(agent, test_pairs, num_episodes=len(test_pairs), print_results=False)
#         if np.mean(metrics['success_rate']) > max_success_rate:
#             max_success_rate = np.mean(metrics['success_rate'])
#             best_model_file = model_file
#             print(f"New best model found: {best_model_file} with success rate {max_success_rate*100:.2f}%")
            
# print(f"\nBest model overall: {best_model_file} with success rate {max_success_rate*100:.2f}%")

In [8]:
# for i in range(3):  # visualize 3 test episodes
#     visualize_test_episode(agent2, test_pairs[i][0], test_pairs[i][1], episode_num=1)