In [1]:
using POMDPs
using Random 
using Parameters
using StaticArrays
using Distributions
using MCTS
using POMDPSimulators

In [2]:
const Vec2 = SVector{2, Float64}
const Vec3 = SVector{3, Float64}

struct mystate
    uavPose::Vec3
    uavHeading::Float64 # radius
    targetPose::Vec2
end

struct myaction
    xy_speed::Float64 # m/s
    z_speed::Float64
    angle::Float64 # radius
end

@with_kw mutable struct UAVMDP <: MDP{mystate, myaction}

    target_velocity::Vec2 = SVector(0.5,0.5)
    target_std::Float64 = 0.1

    r_outScene::Float64 = -70
    r_action::Float64 = -10
    r_reach::Float64 = 100

    discount::Float64 = 0.95
    
    boundary::Int = 100
    landing_r::Int = 1

end

UAVMDP

In [3]:
function POMDPs.isterminal(p::UAVMDP,s::mystate,a::myaction)
    condition1 = (sqrt((s.uavPose[1]-s.targetPose[1])^2 + (s.uavPose[2]-s.targetPose[2])^2 + s.uavPose[3]^2) < p.landing_r)
    condition2 = s.uavPose[1] > p.boundary || s.uavPose[2]>p.boundary || s.uavPose[3]>p.boundary
    condition = condition1 || condition2
    return condition
end

In [4]:
function POMDPs.generate_s(p::UAVMDP, s::mystate, a::myaction)
    # calculate target state
    target_dt_distance = p.target_velocity
    sensor_noise = rand(Normal(0, p.target_std), 2)
    curr_targ = s.targetPose + target_dt_distance + sensor_noise # next_target_pos(p, s.targetPose)
    # calculate UAV state
    curr_angle = s.uavHeading + a.angle
    xy_dt_distance = a.xy_speed*SVector(cos(curr_angle), sin(curr_angle)) # careful
    z_dt_distance = a.z_speed
    xyz_dt_distance = SVector(xy_dt_distance[1], xy_dt_distance[2], z_dt_distance)
    curr_pos = s.uavPose + xyz_dt_distance
    return mystate(curr_pos, curr_angle, curr_targ)
end

In [5]:
function myreward(p::UAVMDP, s::mystate, a::myaction)
    
    distance_to_target = sqrt((s.uavPose[1]-s.targetPose[1])^2 + (s.uavPose[2]-s.targetPose[2])^2 + s.uavPose[3]^2)
    
    reward = p.r_action + 1/(distance_to_target+1)
#     out scene punish 
    if s.uavPose[1]>p.boundary || s.uavPose[2]>p.boundary
        reward = reward + p.r_outScene

    end
    
    if distance_to_target < p.landing_r
        reward = reward + p.r_reach

    end
    return reward
end

myreward (generic function with 1 method)

In [6]:
function POMDPs.actions(p::UAVMDP)
    action_space = [myaction(i, j, k) for i=-5.0:5.0,j=-pi:pi,k=-5.0:5.0]
    return action_space
end

In [7]:
function POMDPs.generate_sr(p::UAVMDP, s::mystate, a::myaction, rng::AbstractRNG)
    sp = generate_s(p, s, a)
    r = myreward(p, s, a)
    return sp, r
end

In [8]:
POMDPs.initialstate_distribution(p::UAVMDP) = mystate(SVector(10,10,10),0,SVector(0,0))
POMDPs.discount(p::UAVMDP) = p.discount
solver = MCTSSolver(n_iterations=50, depth=20, exploration_constant=5.0)
UAV = UAVMDP()

UAVMDP
  target_velocity: SArray{Tuple{2},Float64,1,2}
  target_std: Float64 0.1
  r_outScene: Float64 -70.0
  r_action: Float64 -10.0
  r_reach: Float64 100.0
  discount: Float64 0.95
  boundary: Int64 100
  landing_r: Int64 1


In [9]:
policy = solve(solver, UAV)

MCTSPlanner{UAVMDP,mystate,myaction,MCTS.SolvedRolloutEstimator{POMDPPolicies.RandomPolicy{MersenneTwister,UAVMDP,BeliefUpdaters.NothingUpdater},MersenneTwister},MersenneTwister}(MCTSSolver(50, 20, 5.0, MersenneTwister(UInt32[0x83ee6371, 0x174cdc46, 0x7daff883, 0x89f1118a], Random.DSFMT.DSFMT_state(Int32[-905514297, 1072999738, 1230926512, 1073651734, 987221518, 1072774405, 813077998, 1073214496, 461231546, 1073022810  …  -637725736, 1072985778, 960321467, 1072905774, 345256196, 341884571, -1152262157, 1418562127, 382, 0]), [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], UInt128[0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x0000000000000000000000000000000

In [10]:
###trial
a = action(policy,mystate(SVector(10,10,10),0,SVector(0,0)))

myaction(1.0, 0.8584073464102069, -5.0)

In [22]:
i=0
terminate = false
reward_sum = 0

current_state = mystate(SVector(10,10,10),0,SVector(0,0)) ##initialize state

while i<100 && !terminate
    current_action = action(policy,current_state)
    current_reward = myreward(UAV, current_state, current_action)
    current_state = generate_s(UAV, current_state, current_action)
    terminate = isterminal(UAV,current_state,current_action)
    reward_sum = reward_sum + current_reward
    i=i+1
    @show reward_sum
end

reward_sum = -9.945416360950874
reward_sum = -19.892611238537526
reward_sum = -29.839288352942816
reward_sum = -39.784183337634154
reward_sum = -49.728654132570036
reward_sum = -59.67523205830949
reward_sum = -69.62371946392753
reward_sum = -79.57281179026734
reward_sum = -89.52173553044395
reward_sum = -99.47154249664548
reward_sum = -109.42358243645722
reward_sum = -119.37747442365304
reward_sum = -129.33241726863
reward_sum = -139.28822210882902
reward_sum = -149.24527999388985
reward_sum = -159.20394359222382
reward_sum = -169.16403554690172
reward_sum = -179.12530266864397
reward_sum = -189.08761934082995
reward_sum = -199.05103138632967
reward_sum = -209.0155570092085
reward_sum = -218.98118732428375
reward_sum = -228.94790491293293
reward_sum = -238.91566509085757
reward_sum = -248.88429753496632
reward_sum = -258.85371805662163
reward_sum = -268.8239884537259
reward_sum = -278.79525490188644
reward_sum = -288.76741437963716
reward_sum = -298.740256035213
reward_sum = -308.71362