Real Time Radar Parameter Optimizer
===================================

Carson Anderson & Calvin Henggeler  
ASEN 5264 Decision Making Under Uncertainty - Spring 2025  
Semester Project  

In [1]:
import Pkg
Pkg.activate("$(homedir())/Documents/DMU_Project_Local/RadarEnv")

# # # pin POMDPs back to the 0.9.x series (which still exports initialstate_distribution)
# Pkg.add(Pkg.PackageSpec(name="POMDPs",       version="0.9.6"))

# # install the helper‐packages at the last releases built against POMDPs 0.9.x
# Pkg.add(Pkg.PackageSpec(name="POMDPModelTools", version="0.3.13"))
# Pkg.add(Pkg.PackageSpec(name="BeliefUpdaters",   version="0.2.2"))
# Pkg.add(Pkg.PackageSpec(name="POMDPPolicies",    version="0.4.2"))

# # now rebuild everything
# Pkg.precompile()

[32m[1m  Activating[22m[39m project at `~/Documents/DMU_Project_Local/RadarEnv`


In [2]:
using POMDPs
using POMDPTools: DiscreteUpdater, ImplicitDistribution, RolloutSimulator, EpsGreedyPolicy
using QuickPOMDPs: QuickPOMDP
using POMDPTesting: has_consistent_distributions
using QMDP: QMDPSolver
using Plots
using Statistics: std
using POMDPPolicies: alphavectors, FunctionPolicy
using Random, Distributions
using ParticleFilters
using BasicPOMCP
using POMDPModelTools  # for ParticleCollection
include("radarFunctions.jl")
include("radarSimulator.jl")

using Random

struct RadarState
    x::Float64
    y::Float64
end

struct RadarObservation
    x_belief::Float64
    y_belief::Float64
end

# How to make a random RadarState (normal rand)
Random.rand(rng::AbstractRNG, ::Type{RadarState}) = RadarState(rand(rng)*20000 - 10000, rand(rng)*20000 - 10000)

# Tell Julia how to sample from RadarState
Random.Sampler(::Type{RadarState}) = Random.SamplerTrivial(RadarState)

# Tell Julia how to sample when Random.Sampler is used (POMDPTools uses this!)
Base.rand(rng::AbstractRNG, sampler::Random.SamplerTrivial{RadarState, Any}) = RadarState(rand(rng)*20000 - 10000, rand(rng)*20000 - 10000)

# How to randomly generate a RadarObservation
Random.rand(rng::AbstractRNG, ::Type{RadarObservation}) = RadarObservation(randn(rng)*10000, randn(rng)*10000)

# Tell Julia how to sample
Random.Sampler(::Type{RadarObservation}) = Random.SamplerTrivial(RadarObservation)

# Define the random sampling behavior when used through SamplerTrivial
Base.rand(rng::AbstractRNG, sampler::Random.SamplerTrivial{RadarObservation, Any}) = RadarObservation(randn(rng)*10000, randn(rng)*10000)

## 1. Create Smart Radar POMDP

### Environment Parameters

In [3]:
# ===================
# --- ENVIRONMENT ---
# ===================

# Environment Grid
x_max_size = 10000.0
y_max_size = 10000.0    
divisions  = 500
x = collect(LinRange(-x_max_size, x_max_size, divisions))
y = collect(LinRange(-y_max_size, y_max_size, divisions))
global env = RadarEnvironment(x, y)

# Receiver (same as transmitter)
pos_rx = SVector(0.0, 0.0)
snapped_pos_rx = snap_to_grid(env.grid_x, pos_rx)
rx = Receiver(snapped_pos_rx, Float64[], 3.0)
add_receiver!(env, rx)

1-element Vector{Receiver}:
 Receiver([20.04008016031912, 20.04008016031912], Float64[], 3.0)

### Radar POMDP

In [4]:
dt = 1.0  # sampling interval for radar wave simulation (1 ms step)

# Define state space as sampled states for logging
states_list = [RadarState(x, y) for x in -10000:500:10000, y in -10000:500:10000]

radar = QuickPOMDP(
    states = states_list,
    statetype = RadarState,
    obstype = RadarObservation,

    discount = 0.95,

    actions = [(steering_angle, beamwidth, power) 
                for steering_angle in 0:5:355 
                for beamwidth in 5:5:45 
                for power in [100, 250, 500, 1000]],

        transition = function (s, a)
            return RadarState(s.x, s.y)
        end,

    observation = function (a, s)
        steer_ang, beamwidth, tx_power = a
        println("Observation called with state: ", s)

        xpos = s.x
        ypos = s.y

        # clear previous received powers
        env.receivers[1].received_power = Float64[]

        # create transmitter and add to environment
        tx = PointTransmitter(SVector(0.0, 0.0), 1e9, tx_power, 0.0, 0.6, steer_ang, beamwidth, false)
        add_transmitter!(env, tx)

        println("running simulator")
        for t = 1:95
            step!(env, dt)
        end
        rm_transmitter!(env, tx)

        powers = env.receivers[1].received_power
        noisy_powers = powers .+ abs.(1e-11 .* randn(length(powers)))
        return_power, return_index = findmax(noisy_powers)

        # convert index → arrival time (seconds)
        return_time = return_index * dt

        # calculate range (meters)
        range = return_time * 300.0  # 300 m/us approximation

        println("return delay (index): ", return_index)
        println("return time (s): ", return_time)
        println("range (m): ", range)

        belief_state = boresight_polar_2_cartesian(range, steer_ang + randn() * beamwidth / 2)

        return RadarObservation(belief_state[1], belief_state[2])
    end,

    reward = function (s, a, sp, o)
        xpos = s.x
        ypos = s.y
        steer_ang, beamwidth, tx_power = a

        tracking_error = sqrt((xpos - o.x_belief)^2 + (ypos - o.y_belief)^2)

        if tracking_error < 10000
            println("Tracking SUCCESS: error = ", tracking_error)
            return 100.0
        else
            power_cost = tx_power / 100e3
            norm_tracking_error = tracking_error / sqrt(2 * (x_max_size^2 + y_max_size^2))
            return -100 * (power_cost + norm_tracking_error)
        end
    end,

    initialstate = ImplicitDistribution(rng -> begin
        x = (2rand(rng)-1)*10000
        y = (2rand(rng)-1)*10000

        pos_ref = SVector(x, y)
        snapped_pos_ref = snap_to_grid(env.grid_x, pos_ref)
        rx_ref = Reflector(snapped_pos_ref, 0.9, 1e10, false)
        add_reflector!(env, rx_ref)

        return RadarState(x, y)   # ← no comma
    end),

    isterminal = s -> false
)


QuickPOMDP{Base.UUID("5ed31efa-ea1f-4e8f-9d88-1c9876e7b50b"), RadarState, Tuple{Int64, Int64, Int64}, RadarObservation, @NamedTuple{stateindex::Dict{RadarState, Int64}, isterminal::var"#30#38", states::Matrix{RadarState}, statetype::DataType, discount::Float64, actions::Vector{Tuple{Int64, Int64, Int64}}, obstype::DataType, observation::var"#27#35", actionindex::Dict{Tuple{Int64, Int64, Int64}, Int64}, transition::var"#26#34", reward::var"#28#36", initialstate::ImplicitDistribution{var"#29#37", Tuple{}}}}((stateindex = Dict{RadarState, Int64}(RadarState(-4500.0, 2000.0) => 996, RadarState(8000.0, -7000.0) => 283, RadarState(9500.0, 6500.0) => 1393, RadarState(-2000.0, -7000.0) => 263, RadarState(0.0, -6500.0) => 308, RadarState(-3000.0, -2500.0) => 630, RadarState(1000.0, -8500.0) => 146, RadarState(-3500.0, 3500.0) => 1121, RadarState(-500.0, 10000.0) => 1660, RadarState(-4000.0, 1000.0) => 915…), isterminal = var"#30#38"(), states = RadarState[RadarState(-10000.0, -10000.0) RadarStat

In [12]:
@show discount(radar)
@show actions(radar)[2000]
@show rand(initialstate(radar)) 

discount(radar) = 0.95
(actions(radar))[2000] = (275, 25, 1000)
rand(initialstate(radar)) = RadarState(5828.257691138699, -3563.866658926456)


RadarState(5828.257691138699, -3563.866658926456)

In [13]:
#using BenchmarkTools

policy = FunctionPolicy(o -> POMDPs.actions(radar)[1])
sim = RolloutSimulator(max_steps=1)
simulate(sim, radar, policy)
#@btime simulate(sim, radar, policy)
#bench_result = @benchmark simulate($sim, $radar, $policy)


Observation called with state: RadarState(4041.8235659650145, 1352.625206808656)
running simulator
return delay (index): 7
return time (s): 7.0
range (m): 2100.0


-95.41432842461268

## 2. Create Updater (Particle Filter)

In [15]:
pomdp = radar

# Build rollout policy (random greedy)
rollout_policy = EpsGreedyPolicy(pomdp, 0.1)

function pomcp_solve(m)
    solver = POMCPSolver(tree_queries=3,
                         c=1.0, max_depth=20,
                         default_action=rand(actions(m)),
                         estimate_value=estimate_value=FORollout(FunctionPolicy(s -> begin
                         println("FOROLLOUT CALLED")
                         pos_ref = snap_to_grid(env.grid_x, SVector(s.x, s.y))
                         refl = Reflector(pos_ref, 0.9, 1e10, false)
                         add_reflector!(env, refl)
                     
                         total_reward = 0.0
                         discount = 1.0
                         max_steps = 20  # rollout horizon
                         current_state = s
                     
                         for step in 1:max_steps
                             a = rand(actions(m))
                             sp = transition(m, current_state, a)
                             o = observation(m, a, sp)
                             r = reward(m, current_state, a, sp, o)

                             println("  [FORollout step $(step)] reward = ", r)
                     
                             total_reward += discount * r
                             discount *= m.discount
                     
                             println("  [FORollout step $(step)] reward = ", r, "  total_reward = ", total_reward)
                     
                             if r >= 100.0
                                 println("  [FORollout step $(step)] SUCCESS → terminating rollout.")
                                 #break
                             end
                     
                             current_state = sp
                         end
                     
                         env.reflectors = filter(r -> r != refl, env.reflectors)
                     
                         return total_reward
                     end))
                     
                         )
    return solve(solver, m)
end

# Solve POMDP
env.reflectors = Reflector[]
pomcp_p = pomcp_solve(pomdp)

rng = MersenneTwister(42)  # or no seed
b = ParticleCollection([rand(rng, initialstate(pomdp)) for _ in 1:100])

println("\n=== Getting action ===")
@time a = action(pomcp_p, b)

println("\n=== Chosen action ===")
println("Action chosen: ", a)




=== Getting action ===
Observation called with state: RadarState(-8476.4137400582, -8292.295129672248)
running simulator
return delay (index): 77
return time (s): 77.0
range (m): 23100.0
FOROLLOUT CALLED
Observation called with state: RadarState(-8476.4137400582, -8292.295129672248)
running simulator
return delay (index): 23
return time (s): 23.0
range (m): 6900.0
  [FORollout step 1] reward = -82.73418675384131
  0.607952 seconds (122.71 k allocations: 8.315 MiB, 10.85% compilation time)

=== Chosen action ===
Action chosen: (250, 30, 250)


In [16]:
function deep_rollout(m, s, policy, max_steps=20)
    total_reward = 0.0
    discount = 1.0
    current_state = s
    print("rolloutfn")
    for step in 1:max_steps
        a = rand(actions(m))
        sp = transition(m, current_state, a)
        o = observation(m, a, sp)
        r = reward(m, current_state, a, sp, o)

        println("  [Rollout step $(step)] reward = ", r)

        total_reward += discount * r
        discount *= m.discount

        if r >= 100.0
            println("  [Rollout step $(step)] SUCCESS → terminating.")
            break
        end

        current_state = sp
    end

    env.reflectors = filter(r -> r != refl, env.reflectors)

    return total_reward
end


function pomcp_solve(m)
    solver = POMCPSolver(tree_queries=1000,
                         c=1.0, max_depth=20,
                         default_action=rand(actions(m)),
                         estimate_value = FunctionPolicy(s -> deep_rollout(m, s, rollout_policy))
                         )
    return solve(solver, m)
end

# Solve POMDP
env.reflectors = Reflector[]
pomcp_p = pomcp_solve(pomdp)

# rng = MersenneTwister(42)  # or no seed
# b = ParticleCollection([rand(rng, initialstate(pomdp)) for _ in 1:100])

# println("\n=== Getting action ===")
# @time a = action(pomcp_p, b)

# println("\n=== Chosen action ===")
# println("Action chosen: ", a)


POMCPPlanner{QuickPOMDP{Base.UUID("91cc972e-1ca2-4a9b-8e3a-bcfadbe222e7"), RadarState, Tuple{Int64, Int64, Int64}, RadarObservation, @NamedTuple{stateindex::Dict{RadarState, Int64}, isterminal::var"#58#66", states::Matrix{RadarState}, statetype::DataType, discount::Float64, actions::Vector{Tuple{Int64, Int64, Int64}}, obstype::DataType, observation::var"#55#63", actionindex::Dict{Tuple{Int64, Int64, Int64}, Int64}, transition::var"#54#62", reward::var"#56#64", initialstate::ImplicitDistribution{var"#57#65", Tuple{}}}}, FunctionPolicy{var"#83#84"{QuickPOMDP{Base.UUID("91cc972e-1ca2-4a9b-8e3a-bcfadbe222e7"), RadarState, Tuple{Int64, Int64, Int64}, RadarObservation, @NamedTuple{stateindex::Dict{RadarState, Int64}, isterminal::var"#58#66", states::Matrix{RadarState}, statetype::DataType, discount::Float64, actions::Vector{Tuple{Int64, Int64, Int64}}, obstype::DataType, observation::var"#55#63", actionindex::Dict{Tuple{Int64, Int64, Int64}, Int64}, transition::var"#54#62", reward::var"#56#6

In [17]:
############################################
# === Manual rollout to inspect Q value ===
############################################

function run_manual_rollout(m, s::RadarState)
    pos_ref = snap_to_grid(env.grid_x, SVector(s.x, s.y))
    refl = Reflector(pos_ref, 0.9, 1e10, false)
    add_reflector!(env, refl)

    total_reward = 0.0
    discount_factor = 1.0
    max_steps = 20
    current_state = s

    println("\n=== Manual rollout starting from sampled state ===")

    for step in 1:max_steps
        a = rand(actions(m))
        sp = transition(m, current_state, a)
        o = observation(m, a, sp)
        r = reward(m, current_state, a, sp, o)

        total_reward += discount_factor * r
        discount_factor *= discount(m)  

        println("  [Manual rollout step $(step)] reward = ", r, "  total_reward = ", total_reward)

        if r >= 100.0
            println("  [Manual rollout step $(step)] SUCCESS → terminating rollout.")
            break
        end

        current_state = sp
    end

    env.reflectors = filter(r -> r != refl, env.reflectors)

    return total_reward
end

# Run manual rollout from sampled state
s = rand(b)
rollout_reward = run_manual_rollout(pomdp, s)

println("\nEstimated rollout reward (Q) from sampled state: ", rollout_reward)



=== Manual rollout starting from sampled state ===
Observation called with state: RadarState(-9240.707757690525, 1695.4713698231183)
running simulator
return delay (index): 48
return time (s): 48.0
range (m): 14400.0
  [Manual rollout step 1] reward = -58.92511866478149  total_reward = -58.92511866478149
Observation called with state: RadarState(-9240.707757690525, 1695.4713698231183)
running simulator
return delay (index): 56
return time (s): 56.0
range (m): 16800.0
  [Manual rollout step 2] reward = -131.00447134148683  total_reward = -183.37936643919397
Observation called with state: RadarState(-9240.707757690525, 1695.4713698231183)
running simulator
return delay (index): 79
return time (s): 79.0
range (m): 23700.0
  [Manual rollout step 3] reward = -82.7061706249007  total_reward = -258.02168542816685
Observation called with state: RadarState(-9240.707757690525, 1695.4713698231183)
running simulator
return delay (index): 13
return time (s): 13.0
range (m): 3900.0
  [Manual rollou