# Left turn scenario - abstraction and high fidelity environment

In [1]:
rng = MersenneTwister(1)

MersenneTwister(UInt32[0x00000001], Base.dSFMT.DSFMT_state(Int32[1749029653, 1072851681, 1610647787, 1072862326, 1841712345, 1073426746, -198061126, 1073322060, -156153802, 1073567984  …  1977574422, 1073209915, 278919868, 1072835605, 1290372147, 18858467, 1815133874, -1716870370, 382, 0]), [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 382)

In [2]:
include("../AutomotivePOMDPs/AutomotivePOMDPs.jl")
using AutomotivePOMDPs



In [3]:
using POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel

In [4]:
env = IntersectionEnv(TInterParams(nlanes_main=1, stop_line=16.0));

In [5]:
abs_pomdp = SingleOIPOMDP(); # discrete, explicit
pomdp = OIPOMDP(env=env);  # continuous, generative

In [6]:
policy = solve(RandomSolver(rng), pomdp)
up = updater(policy);

# Design a safe policy based on a deterministic model

The policy consists of one decision rule based on the time to collision (TTC)

Let $T_{complete}$ be the minimum time it takes for the ego vehicle to cross the intersection. Let $TTC$ be the time it takes for the other vehicle to reach the intersection. 
Then the deicison rule can be described as follows:

if $0<TTC<T_{complete}$

    WAIT
    
else

    GO
    
Let's compute $T_{complete}$:

In [7]:
# identify "safe" state to reach, the beginning of the exit lane
safe_pos = Frenet(pomdp.env.roadway[pomdp.ego_goal], 0.)

Frenet(RoadIndex({1, 0.000000}, {2, 1}), 0.000, 0.000, 0.000)

In [8]:
# always accelerate
struct AlwaysAccPolicy <: Policy
end

function POMDPs.action(policy::AlwaysAccPolicy, b)
    return OIAction(2.0)
end

policy = AlwaysAccPolicy()

# remove cars
pomdp.max_cars = 0

0

In [9]:
s0 = initial_state(pomdp, rng)
o0 = generate_o(pomdp, s0, rng)
hr = HistoryRecorder(rng=rng)
@time hist = simulate(hr, pomdp, policy, up, o0, s0);

  0.742065 seconds (618.10 k allocations: 30.437 MiB, 1.18% gc time)


In [10]:
step = nothing
state = nothing
for (i,s) in enumerate(hist.state_hist)
    ego = get_ego(s)
    if ego.state.posF.s > 0. && get_lane(pomdp.env.roadway, ego).tag == pomdp.ego_goal
        step = i
        state = s
        break
    end
end
T_complete = step*pomdp.ΔT
println("Ego has reached state $(get_ego(state).state.posF) in $step steps")
println("T_complete = $T_complete")

Ego has reached state Frenet(RoadIndex({1, 0.240044}, {2, 1}), 3.361, 0.000, 0.000) in 9 steps
T_complete = 4.5


** TTC Policy assuming constant velocity **

$$TTC = \frac{(s_{coll} - s)}{v}$$

where $s$ is the longitudinal position of the vehicle and $v$ its longitudinal.

In [11]:
#Assume full observability
pomdp.pos_obs_noise = 0.
pomdp.vel_obs_noise = 0.
up = FastPreviousObservationUpdater{obs_type(pomdp)}()

POMDPToolbox.FastPreviousObservationUpdater{Array{Float64,1}}()

In [12]:
# TTC Policy
struct TTCPolicy <: Policy
    pomdp::OIPOMDP
    T_complete::Float64
    
end

function POMDPs.action(policy::TTCPolicy, o::OIObs)
    o_scene = obs_to_scene(policy.pomdp, o)
    ego = get_ego(o_scene)
    if ego.state.v > 0.
        return OIAction(2.0)
    end
    min_TTC = Inf
    for veh in o_scene
        if veh.id != EGO_ID
            posG = veh.state.posG
            inter_x = policy.pomdp.env.params.inter_x
            # linear TTC
            TTC = (inter_x - posG.x)/(veh.state.v*cos(posG.θ))
            if 0 < TTC < min_TTC
                min_TTC = TTC
            end
        end
    end
    if 0 < min_TTC < policy.T_complete
        return OIAction(-4.0)
    else
        return OIAction(2.0)
    end
end

In [18]:
pomdp.max_cars = 100
pomdp.p_birth = 0.9
policy = TTCPolicy(pomdp, T_complete)
s0 = initial_state(pomdp, rng, 20)
o0 = generate_o(pomdp, s0, rng)
hr = HistoryRecorder(rng=rng)
@time hist = simulate(hr, pomdp, policy, up, o0, s0);

  0.049209 seconds (814.82 k allocations: 20.982 MiB, 12.02% gc time)


In [26]:
action_hist = [a.acc for a in hist.action_hist]
insert!(action_hist,1, 0.)
duration, fps, render_hist = animate_scenes(hist.state_hist, action_hist, env, 
                                            overlays=SceneOverlay[IDOverlay()], 
                                            sim_dt=pomdp.ΔT,
                                            cam=StaticCamera(0.,-10.,22.0))
film = roll(render_hist, fps = fps, duration = duration)

**Evaluate the TTC Policy**

In [15]:
using ProgressMeter

In [24]:
n_episodes = 100
max_steps = 300
crashes = 0
successes = 0
time_outs = 0
steps = zeros(n_episodes)

pomdp.max_cars = 100
pomdp.p_birth = 0.9
policy = TTCPolicy(pomdp, T_complete)
@showprogress for ep=1:n_episodes      
    s0 = initial_state(pomdp, rng, 20)
    o0 = generate_o(pomdp, s0, rng)
    hr = HistoryRecorder(rng=rng, max_steps=max_steps)
    hist = simulate(hr, pomdp, policy, up, o0, s0)
    outcome = nothing
    if is_crash(hist.state_hist[end])
        crashes += 1
        outcome = :crash
        break
    elseif length(hist.state_hist) == max_steps
        time_outs += 1
        outcome = :time_out
    else
        successes += 1
        outcome = :success
    end
    
#     println("Sim $ep : finished in $(length(hist.state_hist)), outcome is $outcome ")
end
@printf("Evaluation Results: %2.2f successes, %2.2f crashes, %2.2f time outs, %2.2f \pm %2.2f", 
         successes/n_episodes,
         crashes/n_episodes,
         time_outs/n_episodes,
         mean(steps),
         std(steps))


[32mProgress:   6%|██                                       |  ETA: 0:00:18[39m

Evaluation Results: 0.06 successes, 0.01 crashes, 0.00 time outs, 0.00 pm 0.00

[32mProgress:   7%|███                                      |  ETA: 0:00:17[39m[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m


In [18]:
?@printf

```
@printf([io::IOStream], "%Fmt", args...)
```

Print `args` using C `printf()` style format specification string, with some caveats: `Inf` and `NaN` are printed consistently as `Inf` and `NaN` for flags `%a`, `%A`, `%e`, `%E`, `%f`, `%F`, `%g`, and `%G`. Furthermore, if a floating point number is equally close to the numeric values of two possible output strings, the output string further away from zero is chosen.

Optionally, an `IOStream` may be passed as the first argument to redirect output.

# Examples

```jldoctest
julia> @printf("%f %F %f %F\n", Inf, Inf, NaN, NaN)
Inf Inf NaN NaN


julia> @printf "%.0f %.1f %f\n" 0.5 0.025 -0.0078125
1 0.0 -0.007813
```


In [20]:
methods(AutomotivePOMDPs.is_crash)