In [1]:

using POMDPs, QuickPOMDPs, MCTS, DiscreteValueIteration, POMDPSimulators, POMDPModels, POMDPPolicies, POMDPModelTools
using Distributions, Combinatorics, StaticArrays, Statistics
using FileIO, JLD2, TickTock
global Number_level = 10;

## State Functions

In [2]:
function state_cnt(n, S) #n = number of units; S = number of states
    if S==1
        return 1
    end
    return binomial(n+S-1, S-1)
end

function state_index(S,s) #s = state vector
    #S = length(s)
    if S == 1
        return 1
    end
    if s[S]==0
        return state_index(S-1, s[1:(S-1)])
    end
    
    n_prev = sum(s)
    prev = state_cnt(n_prev, S-1) # count of all states with s[S]=0
    inc = prev
    for i in 1:(s[S]-1)
        inc = inc/(n_prev+S-2)*n_prev #count of all states with s[S]=i
        prev = prev + inc
        n_prev = n_prev - 1
    end
    return prev + state_index(S-1, s[1:(S-1)])
end

function state_vec(n, S, ind) # n = number of units; S = number of states; ind = state index
    if ind < 0.5 || ind > state_cnt(n, S) + 0.5
        println("index is out of range!")
        return -1
    end
    if S==1 
        return [n]
    end
    if ind < state_cnt(n, S-1) + 0.5
        return push!(state_vec(n, S-1, ind), 0)
    end
    prev = state_cnt(n, S-1)
    inc = prev
    last_state = 0
    n_prev = n
    while ind > prev + 0.5
        inc = inc/(n_prev+S-2)*n_prev #count of all sta_tes with s[S]=last_state
        prev += inc
        n_prev -= 1
        last_state += 1
    end
    return push!(state_vec(n-last_state, S-1, ind - prev + inc), last_state)
end

#using hueristic rule for rollout
mutable struct nNRollout <: Policy
    n::Int64
    N::Int64
end

mutable struct nmNRollout <: Policy
    n::Int64
    m::Int64
    N::Int64
end

mutable struct mystate
    state::Vector{Int64};
end

In [3]:
function POMDPs.action(p::nNRollout, s::mystate)
        local_a = 0;
        local_s = s.state;
        nN = sample(1:Number_level, 2, replace = true);
        p.N = maximum(nN);
        p.n = minimum(nN);
        if sum(local_s[p.N:Number_level])>=1
        local_a = p.n;
        end
        return local_a; 
end

function POMDPs.action(p::nmNRollout, s::mystate)
    local_a = 0;
    local_s = s.state;
    nmN = sample(1:Number_level, 3, replace = true);
    p.N = maximum(nmN);
    p.n = minimum(nmN);
    p.m = sum(nmN)-p.N-p.n;
    if sum(local_s[p.N:Number_level])>=1 || sum(local_s[p.m:Number_level])>=2
        local_a = p.n;
    end
    return local_a; 
end

In [4]:
function findNn()
    println("Finding best nN policy...")
for N in 2:(Number_level)
    for n in 1:N
       trials=10000;
        simsteps = 100;
        results=zeros(trials)*0.1
        Threads.@threads for h in 1:trials
        s=repeat(1:1,NumberUnits)
        r1=[]
        a1=[]
        nN=[n,N]
        r=repeat(1:1,NumberUnits)*1.0
        failedcnt = 0 # count failed components
         for k in 1:simsteps
            a =decison(nN,s)
                    r2=0.0
            if 1 in a # if any replacement occurs
            for i in 1:NumberUnits
                if s[i] == Number_level
                    failedcnt += 1
                end
                 s[i],r[i]=generative(s[i],a[i]+2,i,234); 
                 # a=2 in generative means component continueing; a=3 in generative means component replaced
                  r2+=r[i]
                    end 
            else # if no replacement 
               for i in 1:NumberUnits
                if s[i] == Number_level
                    failedcnt += 1
                end
                 s[i],r[i]=generative(s[i],1,i,234);

                  r2+=r[i]
                    end              
            end
            if failedcnt >= limit # system fails
                r2 += system_penalty
            end
            s=s;
            append!(a1,a)
            append!(r1,r2)
        end
        rk=0
        for i in 1:simsteps
            rk=0.95*rk+r1[simsteps-i+1]
        end
            results[h]=rk
        end
        rewards_nN[n,N] = mean(results);
            rewards_nN_std[n,N] = var(results)^(0.5);
    end
end
(max_rward,nN) = findmax(rewards_nN);
println("Max rewards of (N, n) policy is ", max_rward, "  n is ", nN[1], "  N is ",nN[2]);
return  max_rward,rewards_nN_std[nN[1],nN[2]],nN;
end


findNn (generic function with 1 method)

In [5]:
# decisions are made given (n, N)
function decison(nN, s,n=0.2)
    a_final=[]
     if length(s[s .>= nN[2]])>0
         for i in 1:NumberUnits
             append!(a_final,Int(s[i]>=nN[1]))
             end
         return a_final
         # a_final is binary vector
     end
     a_0=repeat(0:0,NumberUnits);
      return a_0
 end

decison (generic function with 2 methods)

In [6]:
Number_level=10
T=zeros(Number_level,Number_level,3,100);

for i in 1:100
    #fullname = "/Users/huhan/Library/CloudStorage/GoogleDrive-hhu98@wisc.edu/My Drive/Master's Thesis/Heterogeneous/TM_MATRIX/tm"*string(i);
    fullname = "J:/Master's Thesis/Heterogeneous/TM_MATRIX/tm"*string(i);
    # generate_transition_matrix(Number_level, fullname) 
    Transition_matrix1 = load(fullname*".jld2","transition_matrix");
    
    T[:,:,1,i].=Transition_matrix1;
    T[:,:,2,i].=Transition_matrix1;
    for j in 1:10
        T[j,:,3,i].=Transition_matrix1[1,:];
        end
    T[10,:,1,i].=Transition_matrix1[1,:]
T[10,:,2,i].=Transition_matrix1[1,:]
end

In [7]:
T[:,:,1,1]

10×10 Matrix{Float64}:
 0.232649  0.229122  0.130585  0.0954644  …  0.0507595  0.0416229  0.00861626
 0.0       0.218724  0.163234  0.128353      0.0899794  0.0431818  0.00868773
 0.0       0.0       0.166395  0.182476      0.111027   0.0773355  0.043199
 0.0       0.0       0.0       0.269557      0.125296   0.0981466  0.0533485
 0.0       0.0       0.0       0.0           0.125828   0.109077   0.0611228
 0.0       0.0       0.0       0.0        …  0.215491   0.17866    0.0619123
 0.0       0.0       0.0       0.0           0.352355   0.221208   0.0646198
 0.0       0.0       0.0       0.0           0.471891   0.449917   0.0781922
 0.0       0.0       0.0       0.0           0.0        0.591116   0.408884
 0.232649  0.229122  0.130585  0.0954644     0.0507595  0.0416229  0.00861626

In [8]:
function generative(s, a,k, rng=23)       #s is a vector of number units at each level and a is the number of units we will repair
    #     if s==Number_level # component fails
    #     s=1
    #     crd = Categorical(T[s,:,1,k]);
    #         s = rand(crd);
    #         r=failure_penalty + normal_operation +  setup_cost/NumberUnits;
    #     return (sp=s, r=r)
    #    end
        r=0
     if a==1 # no replacement in system
            crd = Categorical(T[s,:,1,k]);
            s = rand(crd);
            r = normal_operation ;
         return (sp=s, r=r)
        end
    if a==2 # replacement occurs in system, component not replaced
            crd = Categorical(T[s,:,1,k]); # transition same as a=1
            s = rand(crd);
            r = normal_operation +  setup_cost/NumberUnits ;
             return (sp=s, r=r)
        end
    if a==3 # replacement occurs in system, component replaced 
      
        if s==Number_level
        s=1
        crd = Categorical(T[s,:,3,k]);
        s = rand(crd);
       
        r=failure_penalty + normal_operation +  setup_cost/NumberUnits ;
        return (sp=s, r=r)
        else
        s1=deepcopy(s)
        s=1
        r = maintenance_penalty+  setup_cost/NumberUnits
        crd = Categorical(T[s,:,3,k]);
        s = rand(crd);
        r+= normal_operation  ;
#            r*=(s1/10);
        return (sp=s, r=r)
#         end
end
    end
end

generative (generic function with 2 methods)

In [9]:
using DataFrames
df= DataFrame(u=[],K=[],n=[],s=[],m=[],f=[],p=[],lamb=[],mean=[],std=[])

cost=[ [0	-200	-100	-1200 -2000],
    [0	-400	-100	-1200 -2000],
    [0	-600	-100	-1200 -2000],
    [0	-800	-100	-1200 -2000],
    [0	-1000	-100	-1200 -2000],
    [0	-1200	-100	-1200 -2000],
    [0	-1400	-100	-1200 -2000],
    [0	-1600	-100	-1200 -2000],
    [0	-1800	-100	-1200 -2000],
    [0	-2000	-100	-1200 -2000],
    [0  -2400   -100    -1200 -2000],
    [0  -2800   -100    -1200 -2000],
    [0  -3200   -100    -1200 -2000]
] 


for units in [60,50,40,30,25,20]
    for K in 2:2:convert(Int64, floor((units+1)/2))
 
    global NumberUnits=convert(Int64,units)


# global fullname = "tm10.jld2";
# fullname = "tm10";
global limit=convert(Int64,K)

for c in cost
    tick()
   print(units)  
   print("K : ")
   println(K)
   global normal_operation,setup_cost,maintenance_penalty,failure_penalty,system_penalty=c
   println(c)   
   global  simsteps = 100;
global repetition = 10000;
global rewards_nN = zeros(Number_level,Number_level);
global rewards_nN = rewards_nN.+(-100000000.0);
global rewards_nN_std = zeros(Number_level,Number_level);
global rewards_nN_std = rewards_nN_std.+(-100000000.0);
global discount_factor =0.95
global temp_rewards = zeros(repetition,1);

rewards,std, nN=findNn()

trials=10000;
simsteps = 100;
results=zeros(trials)*0.1
Threads.@threads for h in 1:trials
s=repeat(1:1,NumberUnits)
r1=[]
a1=[]   
r=repeat(1:1,NumberUnits)*1.0
 for k in 1:simsteps
    a =decison(nN,s)
            r2=0.0
            failedcnt = 0 
    if 1 in a
    for i in 1:NumberUnits
        if s[i] == Number_level
            failedcnt += 1
        end
         s[i],r[i]=generative(s[i],a[i]+2,i,234);
          r2+=r[i]
            end 
    else
       for i in 1:NumberUnits
        if s[i] == Number_level
            failedcnt += 1
        end
         s[i],r[i]=generative(s[i],1,i,234);
          r2+=r[i]
            end              
    end
    s=s;
    if failedcnt >= limit # system fails
        r2 += system_penalty
    end
    append!(a1,a)
    append!(r1,r2)
end
rk=0
for i in 1:simsteps
    rk=0.95*rk+r1[simsteps-i+1]
end
    results[h]=rk
end
println(units)
        println(var(results)^0.5)
println(mean(results))


pushfirst!(df,[NumberUnits,limit,normal_operation,setup_cost,maintenance_penalty,failure_penalty,system_penalty,nN,mean(results),var(results)^0.5])
        
tock() 
    end
    println("-------------------------")
end
end
end
end

60K : 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-17T20:23:48.827


2
[0 -200 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -100315.81104375812  n is 6  N is 6
60
5752.830743633014
-76883.81622237063
60K : 2
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3766.4046763s: 1 hour, 2 minutes, 46 seconds, 404 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-17T21:26:35.531


Max rewards of (N, n) policy is -104075.8175516111  n is 6  N is 6
60
5778.422878882232
-80708.13097445577
60K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3748.4449889s: 1 hour, 2 minutes, 28 seconds, 444 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-17T22:29:04.047


Max rewards of (N, n) policy is -107846.59964169294  n is 6  N is 6
60
5758.896506948104
-84483.48568532671
60K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3899.5379288s: 1 hour, 4 minutes, 59 seconds, 537 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-17T23:34:03.660


Max rewards of (N, n) policy is -111670.11708236134  n is 6  N is 8
60
5759.480554745403
-88275.99170382285
60K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3709.9359623s: 1 hour, 1 minute, 49 seconds, 935 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T00:35:53.677


Max rewards of (N, n) policy is -115432.40574962935  n is 6  N is 6
60
5699.859147645025
-92106.28016278362
60K : 2
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3702.9992519s: 1 hour, 1 minute, 42 seconds, 999 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T01:37:36.746


Max rewards of (N, n) policy is -119173.9645120039  n is 6  N is 7
60
5714.6825024582395
-95843.27804027966
60K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3838.5395401s: 1 hour, 3 minutes, 58 seconds, 539 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T02:41:35.359


Max rewards of (N, n) policy is -122934.01521074596  n is 6  N is 9
60
5811.876218599622
-99695.24570388404
60K : 2
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3663.8787706s: 1 hour, 1 minute, 3 seconds, 878 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T03:42:39.309


Max rewards of (N, n) policy is -126323.2768471546  n is 5  N is 10
60
5207.302886010806
-104852.68757283039
60K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         3661.661467s: 1 hour, 1 minute, 1 second, 661 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T04:43:41.051


Max rewards of (N, n) policy is -128681.1911255897  n is 5  N is 10
60
5302.122432063477
-107211.53754489699
60K : 2
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3754.9532568s: 1 hour, 2 minutes, 34 seconds, 953 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T05:46:16.076


Max rewards of (N, n) policy is -131066.22479776033  n is 5  N is 10
60
5362.304338385801
-109666.88357421852
60K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3621.0034276s: 1 hour, 21 seconds, 3 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T06:46:37.149


Max rewards of (N, n) policy is -135881.14764689884  n is 5  N is 10
60
5553.313070931014
-114449.15992750025
60K : 2
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3612.7174786s: 1 hour, 12 seconds, 717 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T07:46:49.936


Max rewards of (N, n) policy is -140633.0388596771  n is 5  N is 10
60
5747.071829288331
-119252.87717577863
60K : 2
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3696.5378149s: 1 hour, 1 minute, 36 seconds, 537 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T08:48:26.546


Max rewards of (N, n) policy is -145532.8911540935  n is 5  N is 10
60
5965.436649459975
-123948.8634321107
-------------------------
60K : 4
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3579.7890502s: 59 minutes, 39 seconds, 789 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T09:48:06.403


Max rewards of (N, n) policy is -96672.7001487576  n is 5  N is 6
60
3128.252099548955
-68282.73040218427
60K : 4
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3571.2487097s: 59 minutes, 31 seconds, 248 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T10:47:37.720


Max rewards of (N, n) policy is -100435.58310279893  n is 5  N is 5
60
3126.883354111354
-72091.98349112482
60K : 4
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3645.0389756s: 1 hour, 45 seconds, 38 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T11:48:22.829


Max rewards of (N, n) policy is -104278.7552797461  n is 5  N is 5
60
3163.84821740778
-75852.44768034274
60K : 4
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3547.4961671s: 59 minutes, 7 seconds, 496 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T12:47:30.393


Max rewards of (N, n) policy is -108162.41157182891  n is 5  N is 5
60
3104.9592764745375
-79580.44226045551
60K : 4
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3538.7782033s: 58 minutes, 58 seconds, 778 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T13:46:29.250


Max rewards of (N, n) policy is -111804.97170548563  n is 5  N is 8
60
3154.151091956853
-83454.11474311647
60K : 4
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3590.8847224s: 59 minutes, 50 seconds, 884 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T14:46:20.203


Max rewards of (N, n) policy is -115564.30418839295  n is 5  N is 5
60
3116.912002418009
-87212.97380147343
60K : 4
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3555.9386594s: 59 minutes, 15 seconds, 938 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T15:45:36.209


Max rewards of (N, n) policy is -119401.78090929551  n is 5  N is 7
60
3109.652566119675
-90876.20176004138
60K : 4

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3521.6642674s: 58 minutes, 41 seconds, 664 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T16:44:17.951



[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -123111.16636645666  n is 5  N is 9
60
3557.738721490225
-94147.4975358251
60K : 4
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3561.8566368s: 59 minutes, 21 seconds, 856 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T17:43:39.874


Max rewards of (N, n) policy is -126432.56234302542  n is 5  N is 10
60
5121.541536865007
-98309.05883459658
60K : 4
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3578.6156838s: 59 minutes, 38 seconds, 615 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T18:43:18.557


Max rewards of (N, n) policy is -128786.36068059511  n is 5  N is 10
60
5215.17066491288
-100655.25640348249
60K : 4
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3522.7150063s: 58 minutes, 42 seconds, 715 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T19:42:01.350


Max rewards of (N, n) policy is -133583.9583646761  n is 5  N is 10
60
5413.857258957955
-105410.3115757246
60K : 4
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3569.7285726s: 59 minutes, 29 seconds, 728 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T20:41:31.145


Max rewards of (N, n) policy is -138387.77376239325  n is 5  N is 10
60
5607.236694722676
-110230.19293715277
60K : 4
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3539.0239399s: 58 minutes, 59 seconds, 23 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T21:40:30.236


Max rewards of (N, n) policy is -143252.88959964126  n is 4  N is 10
60
5764.106387801374
-115097.53169202524


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3508.0721845s: 58 minutes, 28 seconds, 72 milliseconds


-------------------------
60K : 6
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T22:38:58.397


Max rewards of (N, n) policy is -93012.79844403769  n is 5  N is 5
60
2964.0176703788816
-68078.32462236486
60K : 6
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3571.2052039s: 59 minutes, 31 seconds, 205 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-18T23:38:29.669


Max rewards of (N, n) policy is -96816.70240939067  n is 5  N is 5
60
2934.5840760339347
-71833.16356997383
60K : 6
[0 -600 -100 -1200 -2000]


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3503.1897064s: 58 minutes, 23 seconds, 189 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T00:36:52.924


Finding best nN policy...
Max rewards of (N, n) policy is -100603.71777895464  n is 5  N is 7
60
2945.1701390078106
-75639.08306287568
60K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3507.2956279s: 58 minutes, 27 seconds, 295 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T01:35:20.296


Max rewards of (N, n) policy is -104315.45772794557  n is 5  N is 5
60
2941.849927754653
-79356.20055976158
60K : 6
[0 -1000 -100 -1200 -2000]

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3561.8083488s: 59 minutes, 21 seconds, 808 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T02:34:42.179



Finding best nN policy...
Max rewards of (N, n) policy is -108150.7985557345  n is 5  N is 5
60
2923.1013016851794
-83140.98008475489
60K : 6
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3487.1683165s: 58 minutes, 7 seconds, 168 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T03:32:49.414


Max rewards of (N, n) policy is -112008.54117324186  n is 5  N is 5
60
2947.3043464794814
-86914.96453675901
60K : 6
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3489.7471376s: 58 minutes, 9 seconds, 747 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T04:30:59.225


Max rewards of (N, n) policy is -115813.8254052401  n is 5  N is 5
60
2984.520027034483
-90751.5810961326
60K : 6
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         3539.878455s: 58 minutes, 59 seconds, 878 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T05:29:59.170


Max rewards of (N, n) policy is -119547.82394993506  n is 5  N is 8
60
2982.456503796411
-94471.06481708762


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3487.5420456s: 58 minutes, 7 seconds, 542 milliseconds


60K : 6
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T06:28:06.777


Max rewards of (N, n) policy is -123286.92523157208  n is 5  N is 9
60
3378.7163082435873
-97241.53939921426
60K : 6
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3493.6657795s: 58 minutes, 13 seconds, 665 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T07:26:20.508


Max rewards of (N, n) policy is -126655.10920843427  n is 5  N is 10
60
4755.344462791306
-96965.18751269543
60K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3538.4014667s: 58 minutes, 58 seconds, 401 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T08:25:18.986


Max rewards of (N, n) policy is -131527.21555677784  n is 5  N is 10
60
4971.552484041488
-101815.40880667431
60K : 6
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3479.1092717s: 57 minutes, 59 seconds, 109 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T09:23:18.161


Max rewards of (N, n) policy is -136195.13385345007  n is 5  N is 10
60
5233.229996593207
-106692.853283247
60K : 6
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3484.4879643s: 58 minutes, 4 seconds, 487 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T10:21:22.715


Max rewards of (N, n) policy is -141031.52538380137  n is 4  N is 10
60
5372.223746332436
-111641.31811881917


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         3521.843052s: 58 minutes, 41 seconds, 843 milliseconds


-------------------------
60K : 8
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T11:20:04.636


Max rewards of (N, n) policy is -89904.99520157413  n is 5  N is 6
60
2931.962260903818
-68103.1069210333
60K : 8
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3467.4828472s: 57 minutes, 47 seconds, 482 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T12:17:52.186


Max rewards of (N, n) policy is -93688.21227553391  n is 5  N is 6
60
2933.200899296793
-71853.4456145814
60K : 8
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3476.0396194s: 57 minutes, 56 seconds, 39 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T13:15:48.303


Max rewards of (N, n) policy is -97396.78617569557  n is 5  N is 5
60
2934.8674505910276
-75655.68015513133
60K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3517.7011781s: 58 minutes, 37 seconds, 701 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T14:14:26.070


Max rewards of (N, n) policy is -101153.38927011874  n is 5  N is 8
60
2955.747502095184
-79386.00727427246


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3468.0979272s: 57 minutes, 48 seconds, 97 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T15:12:14.234


60K : 8
[0 -1000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -105050.32492975761  n is 5  N is 7
60
2951.633100356391
-83169.13783223815
60K : 8
[0 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3484.6590967s: 58 minutes, 4 seconds, 659 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T16:10:18.960


-1200 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -108752.88900922546  n is 5  N is 8
60
2966.9380285725797
-86943.2600545325
60K : 8
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3513.3677714s: 58 minutes, 33 seconds, 367 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T17:08:52.394


Max rewards of (N, n) policy is -112545.82724706111  n is 5  N is 5
60
2965.279480172258
-90688.00649287565
60K : 8
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3472.6485459s: 57 minutes, 52 seconds, 648 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T18:06:45.108


Max rewards of (N, n) policy is -116329.1379943859  n is 5  N is 7
60
2930.0902790005653
-94528.44127463417
60K : 8
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3480.8785749s: 58 minutes, 878 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T19:04:46.053


Max rewards of (N, n) policy is -120017.92452658982  n is 5  N is 9
60
3361.6756579709895
-97094.7726486882
60K : 8
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         3524.068336s: 58 minutes, 44 seconds, 68 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T20:03:30.187


Max rewards of (N, n) policy is -123863.92044084182  n is 5  N is 9
60
3395.1014641854936
-100636.43436380822
60K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3461.7766704s: 57 minutes, 41 seconds, 776 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T21:01:12.043


Max rewards of (N, n) policy is -129417.73434365113  n is 5  N is 10
60
4684.487605833785
-100848.14107365234
60K : 8


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3483.8772322s: 58 minutes, 3 seconds, 877 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T21:59:15.997


[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -134194.4997795326  n is 5  N is 10
60
4976.30808996945
-105541.09352270952
60K : 8
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3495.8372781s: 58 minutes, 15 seconds, 837 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T22:57:31.899


Max rewards of (N, n) policy is -138832.7545440486  n is 4  N is 10
60
5163.827137225563
-110687.06939587762
-------------------------
60K : 10
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3463.1046702s: 57 minutes, 43 seconds, 104 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-19T23:55:15.069


Max rewards of (N, n) policy is -87162.20861923262  n is 5  N is 5
60
2915.552469601156
-68093.80817474017
60K : 10
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         3479.388599s: 57 minutes, 59 seconds, 388 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-20T00:53:14.533


Max rewards of (N, n) policy is -90836.12128718958  n is 5  N is 7
60
2944.3395993067506
-71926.9232454376
60K : 10
[

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3482.8375358s: 58 minutes, 2 seconds, 837 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-20T01:51:17.435


0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -94668.1486432997  n is 5  N is 7
60
2933.278141517516
-75598.88083984777


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3473.3076473s: 57 minutes, 53 seconds, 307 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-20T02:49:10.808


60K : 10
[0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -98416.98998764218  n is 5  N is 8
60
2935.9307898147736
-79415.73023082766
60K : 10
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3463.3146741s: 57 minutes, 43 seconds, 314 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-20T03:46:54.199


Max rewards of (N, n) policy is -102154.78085288551  n is 5  N is 7
60
2917.4494189022384
-83205.74103758116
60K : 10
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3481.9657055s: 58 minutes, 1 second, 965 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-20T04:44:56.229


Max rewards of (N, n) policy is -106020.69098131797  n is 5  N is 5
60
2963.1833908673248
-86948.57918244503
60K : 10
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3468.4153234s: 57 minutes, 48 seconds, 415 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-20T05:42:44.711


Max rewards of (N, n) policy is -109759.13639954962  n is 5  N is 7
60
2951.141733771381
-90710.5966612292


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3471.7492311s: 57 minutes, 51 seconds, 749 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-20T06:40:36.537


60K : 10
[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -113532.64533545077  n is 5  N is 6
60
2941.751274492191
-94530.7620399972


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3498.6977817s: 58 minutes, 18 seconds, 697 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-20T07:38:55.300


60K : 10
[0 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -117350.31504984514  n is 5  N is 8
60
2931.3829545289855
-98184.79937527623
60K : 10
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         3488.936362s: 58 minutes, 8 seconds, 936 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-20T08:37:04.314


Max rewards of (N, n) policy is -121096.2103372097  n is 5  N is 9
60
3430.068098254444
-100632.3984748372
60K : 10
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m        3501.6396532s: 58 minutes, 21 seconds, 639 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-20T09:35:26.021


In [None]:
df

In [None]:
using CSV
CSV.write("Hetero (n,N)-policy.csv", df)

In [None]:
# Transition_matrix = mean(T[:,:,1,1:3],dims=3);
# Transition_matrix=(Transition_matrix./sum(Transition_matrix,dims=2))
# global state_number = state_cnt(NumberUnits,Number_level);

# global crd = Array{Categorical}(undef,Number_level)
# for i in 1:Number_level
#     global crd[i] = Categorical(Transition_matrix[i,:]);
# end

# global multiunit2 = QuickMDP(
#     gen = function (s, a, rng)       #s is a vector of number units at each level and a is the number of units we will repair
#         local_s = s.state;
#         # println(local_s)
#         # based on s, create a status vector corresponding to each unit
#         degradation_state = repeat(1:1,NumberUnits);
#         k = 1;
#         for i in 1:Number_level
#             for j in 1:local_s[i]
#                 degradation_state[k]=i;
#                 k = k+1;
#             end
#         end
#         r = 0.0;
#         prevent_repair = false;
#         if a!=0
#         number_reset = sum(local_s[a:Number_level]);
#         else
#         number_reset = local_s[Number_level];
#         end
#         #using a for loop to compute next state for each unit

#         for i in 1:(NumberUnits-number_reset)  #a is the number of units we want to preventively repair
#             #in this loop, all units continues
#             degradation_state[i] = rand(crd[degradation_state[i]]);
#             r = r+normal_operation;
#         end
        
#         for i in (NumberUnits-number_reset+1):NumberUnits
#             if degradation_state[i] == Number_level
#                 r = r + failure_penalty;
#                 if prevent_repair == false
#                     r = r+setup_cost;
#                     prevent_repair = true;
#                 end
#             else
#                 r = r + maintenance_penalty;
#                 if prevent_repair == false
#                     r = r+setup_cost;
#                     prevent_repair = true;
#                 end
#             end
#             degradation_state[i] = rand(crd[1]); #reset status; add additional transition
#             r = r+normal_operation; #add operation benefit
#          end
#         #collect degradation state to form the state
#         sp = repeat(0:0,Number_level);
#         for i in 1:NumberUnits
#             sp[degradation_state[i]] = sp[degradation_state[i]]+1;
#         end
#         return (sp=mystate(sp), r=r)
#     end,
#     actions = 0:(Number_level-1), 
#     actiontype = function()
#         return Int64;
#     end,

#     initialstate = function()
#         POMDPModelTools.ImplicitDistribution() do rng
#             return (mystate(state_vec(NumberUnits, Number_level, 1)))
#         end
#     end, #all u #all units start fresh. Need to change according to unit number and level number. ##For simulation, we need to use ImplicitDistribution
#     discount = 0.95,
#     isterminal = false              # no ending
# )