Here we provide some baseline CVaR algorithm.

In [1]:
using DataFrames
using CSV 

# Convert a data frame to MDP.
function df2MDP(df;γ = 0.90)
    S = unique([df.idstatefrom;df.idstateto])
    A = unique(df.idaction)
    lSl = length(S)
    lAl = length(A)
    P = zeros((lSl,lAl,lSl))
    R = zeros((lSl,lAl,lSl))
    for i in eachrow(df)
        P[i.idstatefrom,i.idaction,i.idstateto] += i.probability
        R[i.idstatefrom,i.idaction,i.idstateto] += i.reward
    end
    return (S=S,A=A,P=P,R=R,lSl=lSl,lAl=lAl,γ=γ)
end

function distribution(X,p)
    d = DataFrame(X = X, p = p)
    d = d[d.p .> 0,:]
    d = combine(groupby(d, ["X"]),df -> DataFrame(p = sum(df.p)) ) 
    sort!(d,["X"]) 
    return d
end
# CVaR_Search function takes in a distribution (d) and a vector of risk (Alpha)
function search_CVaR(d,α)
    # Set lambda
    λ = 1-α
    if λ == 0
        return(minimum(d[d.p .> 0,:].X))
    end
    αi = min(searchsortedfirst(d.Psum,λ),length(d.Psum))
    return( ( d.XTP[αi] + d.X[αi] * (λ - d.Psum[αi]) ) / λ )
end

# Solve CVaR for multiple Alphas.
function search_CVaR_Vec(d,Alpha)
    # Here we precompute repeatedly used values, Psum and XTP.
    d.Psum = cumsum(d.p)
    d.XTP = cumsum(d.X .* d.p)
    
    return [search_CVaR(d,α) for α in Alpha]
end
# Conditional X
X_1 = [1,1,2,2]
X_2 = [50,60,70,80]
# Conditional Pmf
p1j = [0.1,0.2,0.5,0.2]
p2j = [0.5,0.3,0.1,0.1]
# Condition probability
p_1 = 0.1
p_2 = 0.9
# Joint Pmf
X = [X_1; X_2]
p = [p_1 * p1j; p_2 * p2j]
# Risk of interest
α = 0.7
d = distribution(X,p)

Row,X,p
Unnamed: 0_level_1,Int64,Float64
1,1,0.03
2,2,0.07
3,50,0.45
4,60,0.27
5,70,0.09
6,80,0.09


# Targeted Value MDP

In this document we focus on CVaR definition of
$$\text{CVaR}^\pi_\alpha (R_N) = \sup_{x \in \mathbb{R}}\{ x + \frac{1}{1-\alpha}\mathbb{E}[(R_N-x)_{-}]\}$$
where $(x)_{-} = \max(0,x)$ represents the negative part of $x$, and the optimal point hold when $x^\star = \text{VaR}_\alpha(R_N)$.
$$\text{CVaR}^\pi_\alpha (R_N) = \text{VaR}_\alpha(R_N) + \frac{1}{1-\alpha}\mathbb{E}[(R_N-\text{VaR}_\alpha(R_N))_{-}]$$
We denote $\text{VaR}_\alpha(R_N) = F_{R_N}^{-1}(1-\alpha) = Q_{R_N}(1-\alpha) = \inf\{r | F_{R_N}(r) \geq 1-\alpha\}$.

We can denote the CVaR objective in MDP similarly as:

$$\max_{\pi \in \Pi_{D}}\text{CVaR}^\pi_\alpha (R_N) = \max_{\pi \in \Pi_{D}}\sup_{x \in \mathbb{R}}\{ x + \frac{1}{1-\alpha}\mathbb{E}[(R_N-x)_{-}]\} = \max_{\pi \in \Pi_{D}}\{ x ^\star + \frac{1}{1-\alpha}\mathbb{E}[(R_N-x^\star)_{-}]\}$$

Note that, $x^\star = \text{VaR}_\alpha(R_N) = F_{R_N}^{-1}(1-\alpha)$ is a function of $\alpha$. In this framework, instead of optimizes for a given $\alpha$, user defined a target value of interest $x^\star$ and we provide an CVaR MDP which optimizes the objective.

In [2]:
function targetMean(d, X)
    return( [(transpose(d.p) * min.(zeros(nrow(d)), d.X .- x )) for x in X] )
end
function targetCVaR(d, Alpha,X = d.X)
    g = targetMean(d, X )
    V = [ ( α >= 1 ? minimum(d[d.p .> 0,:].X) : maximum(X' .+ (g' ./ (1-α))) ) for α in Alpha]
    return( V )
end
Alpha = collect(LinRange(0,1,1001));
CVaR = targetCVaR(d,Alpha)
CVaR2 = search_CVaR_Vec(d,Alpha)
maximum(abs.(CVaR .- CVaR2))

1.865174681370263e-14

Given from Baurle that 
$$w_{n\pi}(s,x) = \mathbb{E}^\pi[(R_n(s)-x)_{-}] \qquad, \forall s \in S, x \in \mathbb{R},\pi \in \Pi$$
$$w_{n}(s,x) = \max_{\pi \in \Pi} w_{n\pi}(s,x) \qquad, \forall s \in S, x \in \mathbb{R}$$

In [3]:
df = CSV.read("C:/GITHUB/rmdp-jl-2/data/TabMDP/riverswim.csv", DataFrame)
# The document uses "zero index" so we need to change to "one index for julia"
df[:,["idstatefrom","idaction","idstateto"]] = df[:,["idstatefrom","idaction","idstateto"]] .+ 1
mdp = df2MDP(df;γ = 0.90)

(S = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], A = [1, 2], P = [1.0 0.3911739113271663; 1.0 0.1293625065352247; … ; 0.0 0.0; 0.0 0.0;;; 0.0 0.6088260886728338; 0.0 0.2618114047919416; … ; 0.0 0.0; 0.0 0.0;;; 0.0 0.0; 0.0 0.6088260886728338; … ; 0.0 0.0; 0.0 0.0;;; … ;;; 0.0 0.0; 0.0 0.0; … ; 1.0 0.1293625065352247; 0.0 0.0;;; 0.0 0.0; 0.0 0.0; … ; 0.0 0.2618114047919416; 1.0 0.1293625065352247;;; 0.0 0.0; 0.0 0.0; … ; 0.0 0.6088260886728338; 0.0 0.8706374934647755], R = [5.0 0.0; 5.0 0.0; … ; 0.0 0.0; 0.0 0.0;;; 0.0 0.0; 0.0 0.0; … ; 0.0 0.0; 0.0 0.0;;; 0.0 0.0; 0.0 0.0; … ; 0.0 0.0; 0.0 0.0;;; … ;;; 0.0 0.0; 0.0 0.0; … ; 5.0 0.0; 0.0 0.0;;; 0.0 0.0; 0.0 0.0; … ; 0.0 0.0; 5.0 0.0;;; 0.0 0.0; 0.0 0.0; … ; 0.0 0.0; 0.0 87.06374934647752], lSl = 20, lAl = 2, γ = 0.9)

In [6]:
function target_agg(mdp; digits = 3)
    m = round(minimum(mdp.R)/(1-mdp.γ),digits = digits)
    M = round(maximum(mdp.R)/(1-mdp.γ),digits = digits)
    X = round.(m:(10.0^(-digits)):M,digits = digits)
    return X    
end
function I(xs,X;digits=3,lXl = length(X))
    return( min.(lXl,max.(1, round.(Int, ((xs .- X[1]) .* (10.0^digits)) .+ 1 ))) )
end
function L(MDP,v,a,X;vnew=zeros(size(v)), digits = 3)
    for s in MDP.S
        for (i,x) in enumerate(X)
            for s2 in MDP.S
                x2 = I( ((X .- MDP.R[s,a,s2])/MDP.γ) ,X,digits = digits )
                vnew[s,i] += sum(v[s2,x2] .* MDP.P[s,a,s2])
            end
        end
    end
    return(vnew)
end

L (generic function with 1 method)

In [7]:
digits = 0
X = target_agg(mdp; digits = digits)
lXl = length(X)
T = 3
v = [ zeros(mdp.lSl,lXl) for n in 1:(T+1) ];

In [10]:
for t in 1:T
    Q = zeros(mdp.lSl,lXl,mdp.lAl)
    for a in mdp.A
        L(mdp,v[t+1],a,X;vnew=Q[:,:,a], digits = digits)
    end
    v[t] = mdp.γ .* maximum(Q,dims = 3)[:,:,1]
end

In [11]:
v

4-element Vector{Matrix{Float64}}:
 [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]
 [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]
 [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]
 [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

In [None]:


function CVaR_MDP(MDP;T=10,M=10001)
    Alpha = LinRange(1,0,M)
    V = [[DataFrame() for s in MDP.S] for t in 1:(T+1)]
    V[T+1] = [DataFrame(X = [0], p = [1]) for s in MDP.S]
    V̂ = [zeros(MDP.lSl,M) for t in 1:T]
    π = [zeros(Int64,MDP.lSl,M) for t in 1:T]
    Q = [zeros(MDP.lSl,M,MDP.lAl) for t in 1:T]
    for t in T:-1:1
        for s in MDP.S
            for a in MDP.A
                Q[t][s,:,a] = CVaR(RtγV(MDP.R[s,a,:],MDP.γ,V[t+1],MDP.P[s,a,:]),Alpha)
            end
        end
        opt = findmax(Q[t],dims=3)
        V̂[t] = opt[1][:,:,1]
        π[t] = [i[3] for i in opt[2][:,:,1]]
        for s in MDP.S
            V[t][s] = CVaR2Distribution(V̂[t][s,:],Alpha)
        end
    end
    return (Q=Q,π=π,V=V,V̂=V̂)
end

In [None]:
round( π ,digits=3 )