In [2]:

using POMDPs, QuickPOMDPs, MCTS, DiscreteValueIteration, POMDPSimulators, POMDPModels, POMDPPolicies, POMDPModelTools
using Distributions, Combinatorics, StaticArrays, Statistics
using FileIO, JLD2, TickTock

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPrecompiling POMDPModelTools [08074719-1b2a-587c-a292-00f91cc44415]
[33m[1m│ [22m[39mThis may mean POMDPTools [7588e00f-9cae-40de-98dc-e0c70c48cdd7] does not support precompilation but is imported by a module that does.
[33m[1m└ [22m[39m[90m@ Base loading.jl:1948[39m
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mSkipping precompilation since __precompile__(false). Importing POMDPModelTools [08074719-1b2a-587c-a292-00f91cc44415].


## State Functions

In [3]:
function state_cnt(n, S) #n = number of units; S = number of states
    if S==1
        return 1
    end
    return binomial(n+S-1, S-1)
end

function state_index(S,s) #s = state vector
    #S = length(s)
    if S == 1
        return 1
    end
    if s[S]==0
        return state_index(S-1, s[1:(S-1)])
    end
    
    n_prev = sum(s)
    prev = state_cnt(n_prev, S-1) # count of all states with s[S]=0
    inc = prev
    for i in 1:(s[S]-1)
        inc = inc/(n_prev+S-2)*n_prev #count of all states with s[S]=i
        prev = prev + inc
        n_prev = n_prev - 1
    end
    return prev + state_index(S-1, s[1:(S-1)])
end

function state_vec(n, S, ind) # n = number of units; S = number of states; ind = state index
    if ind < 0.5 || ind > state_cnt(n, S) + 0.5
        println("index is out of range!")
        return -1
    end
    if S==1 
        return [n]
    end
    if ind < state_cnt(n, S-1) + 0.5
        return push!(state_vec(n, S-1, ind), 0)
    end
    prev = state_cnt(n, S-1)
    inc = prev
    last_state = 0
    n_prev = n
    while ind > prev + 0.5
        inc = inc/(n_prev+S-2)*n_prev #count of all sta_tes with s[S]=last_state
        prev += inc
        n_prev -= 1
        last_state += 1
    end
    return push!(state_vec(n-last_state, S-1, ind - prev + inc), last_state)
end

#using hueristic rule for rollout
mutable struct nNRollout <: Policy
    n::Int64
    N::Int64
end

mutable struct nmNRollout <: Policy
    n::Int64
    m::Int64
    N::Int64
end

mutable struct mystate
    state::Vector{Int64};
end

In [4]:
function POMDPs.action(p::nNRollout, s::mystate)
        local_a = 0;
        local_s = s.state;
        nN = sample(1:Number_level, 2, replace = true);
        p.N = maximum(nN);
        p.n = minimum(nN);
        if sum(local_s[p.N:Number_level])>=1
        local_a = p.n;
        end
        return local_a; 
end

function POMDPs.action(p::nmNRollout, s::mystate)
    local_a = 0;
    local_s = s.state;
    nmN = sample(1:Number_level, 3, replace = true);
    p.N = maximum(nmN);
    p.n = minimum(nmN);
    p.m = sum(nmN)-p.N-p.n;
    if sum(local_s[p.N:Number_level])>=1 || sum(local_s[p.m:Number_level])>=2
        local_a = p.n;
    end
    return local_a; 
end

In [5]:
function findNn()
    println("Finding best nN policy...")

for N in 2:(Number_level)
    for n in 1:N
#         println("N = ",N, "  n = ",n);
        Threads.@threads  for j in 1:repetition
            history = sim(multiunit2,max_steps=simsteps,initialstate=mystate(state_vec(NumberUnits, Number_level, 1))) do s
            local_s = s.state;
            local_a = 0;
                if sum(local_s[N:Number_level])>=1 
                local_a = n;
                end
                return local_a; 
            end
            temp_rewards[j] = discounted_reward(history);
        end
        rewards_nN[n,N] = mean(temp_rewards);
        rewards_nN_std[n,N] = std(temp_rewards);
        
    end
end
(max_rward,nN) = findmax(rewards_nN);
println("Max rewards of (N, n) policy is ", max_rward, "  n is ", nN[1], "  N is ",nN[2]);
return  max_rward,rewards_nN_std[nN] ,nN;
end


findNn (generic function with 1 method)

In [6]:
using DataFrames
df= DataFrame(u=[],K=[],n=[],s=[],m=[],f=[],p=[],mean=[],std=[],nN=[])

cost=[
    [0	-600	-100	-1200 -2000],
    [0	-800	-100	-1200 -2000],
    [0	-1000	-100	-1200 -2000],
    [0	-1400	-100	-1200 -2000],
    [0	-1800	-100	-1200 -2000],
    [0  -2400   -100    -1200 -2000]
] 

for units in [150,140,130,120,110,100,90,80,70,60,50,40,30,20]
    for K in 2:convert(Int64, floor((units+1)/10)):convert(Int64, floor((units+1)/2))
        
    global NumberUnits=convert(Int64,units)
        global limit=convert(Int64,K)

#action_level = 0;
#Transition_matrix = [0.86 0.14 0.0 0.0; 0.0 0.83 0.17 0.0;0.0 0.0 0.89 0.11;1.0 0.0 0.0 0.0];
#fullname = "C:/Users/yongchen/OneDrive - University of Iowa/Codes/Julia/ReinforcementLearning/tm6.jld2"
#fullname = "C:/Users/yongchen/Dropbox/reinforcement learning for maintenance/Matlab/MultipleUnits/tm6.jld2";
global Number_level = 4;
global Transition_matrix = [
    0.8571 0.1429 0.0    0.0;
    0.0    0.8571 0.1429 0.0;
    0.0    0.0    0.8    0.2;
    0.0    0.0    0.0    1.0
]



global state_number = state_cnt(NumberUnits,Number_level);
# global arrayofstates = Vector{mystate}(undef, state_number);

# Threads.@threads for i in 1:state_number
#     global arrayofstates[i]  = mystate(state_vec(NumberUnits, Number_level, i));
# end

global crd = Array{Categorical}(undef,Number_level)
for i in 1:Number_level
    global crd[i] = Categorical(Transition_matrix[i,:]);
end
    
    
    
global multiunit2 = QuickMDP(
    gen = function (s, a, rng)       #s is a vector of number units at each level and a is the number of units we will repair
        local_s = s.state;
        # println(local_s)
        # based on s, create a status vector corresponding to each unit
        degradation_state = repeat(1:1,NumberUnits);
        k = 1;
        for i in 1:Number_level
            for j in 1:local_s[i]
                degradation_state[k]=i;
                k = k+1;
            end
        end
        r = 0.0;
        number_reset=0
        prevent_repair = false;
        if a!=0
        number_reset = sum(local_s[a:Number_level]);
        end
        #using a for loop to compute next state for each unit
        if local_s[Number_level] >= limit && a==0
                        r+=system_penalty
                    number_reset=local_s[Number_level]
                    end 
        if local_s[Number_level] >= limit && a!=0
                        r+=system_penalty
                     number_reset = sum(local_s[a:Number_level]);
                    end 
        for i in 1:(NumberUnits-number_reset)  #a is the number of units we want to preventively repair
            #in this loop, all units continues
                    if degradation_state[i] != Number_level
            degradation_state[i] = rand(crd[degradation_state[i]]);
            r = r+normal_operation;
                    end
            # if degradation_state[i] == Number_level
            #     r+=system_penalty/limit
            # end
        end
        
        for i in (NumberUnits-number_reset+1):NumberUnits
                if degradation_state[i] == Number_level
                    r = r + failure_penalty  ;
                if prevent_repair == false
                    r = r+setup_cost;
                    prevent_repair = true;
                            end    
                    else
                    r = r + maintenance_penalty;
                if prevent_repair == false
                    r = r+setup_cost;
                    prevent_repair = true;
                end
                    end
            degradation_state[i] = rand(crd[1]); #reset status; add additional transition
            r = r+normal_operation; #add operation benefit
         end
        #collect degradation state to form the state
        sp = repeat(0:0,Number_level);
        for i in 1:NumberUnits
            sp[degradation_state[i]] = sp[degradation_state[i]]+1;
        end
        return (sp=mystate(sp), r=r)
    end,
    actions = 0:(Number_level-1), 
    actiontype = function()
        return Int64;
    end,
#     states = arrayofstates,
    initialstate = function()
        POMDPModelTools.ImplicitDistribution() do rng
            return (mystate(state_vec(NumberUnits, Number_level, 1)))
        end
    end, #all u #all units start fresh. Need to change according to unit number and level number. ##For simulation, we need to use ImplicitDistribution
    discount = 0.95,
    isterminal = false              # no ending
    )


    
    
    
for c in cost
        tick()
   println(units)  
             print("K : ")
                    println(K)
    global normal_operation,setup_cost,maintenance_penalty,failure_penalty,system_penalty=c
     println(c)     
   global  simsteps = 100;
global repetition = 10000;
global rewards_nN = zeros(Number_level,Number_level);
global rewards_nN = rewards_nN.+(-100000000.0);
global rewards_nN_std = zeros(Number_level,Number_level);
global rewards_nN_std = rewards_nN_std.+(-100000000.0);

global discount_factor =0.95
global temp_rewards = zeros(repetition,1);
rewards,std, nN=findNn()
pushfirst!(df,[units,limit,normal_operation,setup_cost,maintenance_penalty,failure_penalty,system_penalty,rewards,std, nN])
        tock()   
    end
 
    
        end   
    end

150
K : 2


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T21:55:54.426


[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -27799.082500539833  n is 3  N is 3
150
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.8733447s: 3 minutes, 15 seconds, 873 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T21:59:10.444


Max rewards of (N, n) policy is -31382.818446455003  n is 3  N is 3
150
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         191.9859704s: 3 minutes, 11 seconds, 985 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:02:22.431


Max rewards of (N, n) policy is -34963.01542804874  n is 3  N is 3
150
K : 2


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         192.2308591s: 3 minutes, 12 seconds, 230 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:05:34.684


[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -42106.37908066124  n is 3  N is 3
150
K : 2


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         190.9543744s: 3 minutes, 10 seconds, 954 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:08:45.660


[0 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -49260.39367499466  n is 3  N is 3
150
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         190.9950033s: 3 minutes, 10 seconds, 995 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:11:56.657


Max rewards of (N, n) policy is -56287.15526519325  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         191.2315807s: 3 minutes, 11 seconds, 231 milliseconds


150
K : 17
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:15:07.918


Max rewards of (N, n) policy is -27803.18840755681  n is 3  N is 3
150
K : 17
[0 -800 -100 -1200 -2000]

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          191.229874s: 3 minutes, 11 seconds, 229 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:18:19.150



Finding best nN policy...
Max rewards of (N, n) policy is -31369.207644108235  n is 3  N is 3
150
K : 17
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         194.4007595s: 3 minutes, 14 seconds, 400 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:21:33.553


Max rewards of (N, n) policy is -34966.252779240676  n is 3  N is 3
150
K : 17
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          191.348418s: 3 minutes, 11 seconds, 348 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:24:44.919


Max rewards of (N, n) policy is -42103.96881447467  n is 3  N is 3
150

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         191.1023956s: 3 minutes, 11 seconds, 102 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:27:56.023



K : 17
[0 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -48942.78667898046  n is 2  N is 4
150
K : 17
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         191.6583794s: 3 minutes, 11 seconds, 658 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:31:07.683


Max rewards of (N, n) policy is -51787.17044064259  n is 2  N is 4
150
K : 32
[0 -600 -100 -1200 -2000]
Finding best nN policy...

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         191.2999376s: 3 minutes, 11 seconds, 299 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:34:18.993



Max rewards of (N, n) policy is -27795.52990404694  n is 3  N is 3
150
K : 32
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         189.6017701s: 3 minutes, 9 seconds, 601 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:37:28.597


Max rewards of (N, n) policy is -31377.231211156624  n is 3  N is 3
150
K : 32
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         190.1265098s: 3 minutes, 10 seconds, 126 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:40:38.724


Max rewards of (N, n) policy is -34939.20896782943  n is 3  N is 3
150
K : 32
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         190.3047454s: 3 minutes, 10 seconds, 304 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:43:49.031


Max rewards of (N, n) policy is -42116.532119047915  n is 3  N is 3
150
K : 32
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         189.3499163s: 3 minutes, 9 seconds, 349 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:46:58.400


Max rewards of (N, n) policy is -48942.83638159016  n is 2  N is 4
150
K : 32
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         189.7266324s: 3 minutes, 9 seconds, 726 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:50:08.128


Max rewards of (N, n) policy is -51802.10465766542  n is 2  N is 4
150
K : 47
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         190.3604316s: 3 minutes, 10 seconds, 360 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:53:18.496


Max rewards of (N, n) policy is -27800.778987918235  n is 3  N is 3
150
K : 47
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         189.3071656s: 3 minutes, 9 seconds, 307 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:56:27.805


Max rewards of (N, n) policy is -31360.360726250827  n is 3  N is 3
150
K : 47
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         189.2500808s: 3 minutes, 9 seconds, 250 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T22:59:37.057


Max rewards of (N, n) policy is -34951.81817065409  n is 3  N is 3
150
K : 47
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          188.584245s: 3 minutes, 8 seconds, 584 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:02:45.642


Max rewards of (N, n) policy is -42106.76784698857  n is 3  N is 3
150
K : 47
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         189.6549887s: 3 minutes, 9 seconds, 654 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:05:55.299


Max rewards of (N, n) policy is -48902.279208038126  n is 2  N is 4
150
K : 47
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         189.6409061s: 3 minutes, 9 seconds, 640 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:09:04.958


Max rewards of (N, n) policy is -51793.9683394732  n is 2  N is 4
150
K : 62
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         189.6390814s: 3 minutes, 9 seconds, 639 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:12:14.605


Max rewards of (N, n) policy is -27797.045652911554  n is 3  N is 3
150
K : 62
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         189.6051419s: 3 minutes, 9 seconds, 605 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:15:24.226


Max rewards of (N, n) policy is -31389.25364480328  n is 3  N is 3
150
K : 62
[0 -1000 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         189.1124857s: 3 minutes, 9 seconds, 112 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:18:33.351


-100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -34948.4103936849  n is 3  N is 3
150
K : 62
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         190.2309835s: 3 minutes, 10 seconds, 230 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:21:43.603


Max rewards of (N, n) policy is -42103.032642802704  n is 3  N is 3
150
K : 62
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         189.2299811s: 3 minutes, 9 seconds, 229 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:24:52.834


Max rewards of (N, n) policy is -48920.13725733044  n is 2  N is 4
150
K : 62
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         189.5351205s: 3 minutes, 9 seconds, 535 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:28:02.371


Max rewards of (N, n) policy is -51826.386058597214  n is 2  N is 4
140
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         188.5613806s: 3 minutes, 8 seconds, 561 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:31:10.954


Max rewards of (N, n) policy is -26662.817916945085  n is 3  N is 3
140
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         178.2975025s: 2 minutes, 58 seconds, 297 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:34:09.253


Max rewards of (N, n) policy is -30238.015469852668  n is 3  N is 3
140
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.7466273s: 2 minutes, 57 seconds, 746 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:37:07.001


Max rewards of (N, n) policy is -33800.850703210876  n is 3  N is 3
140
K : 2
[0 -1400 -100 -1200 -2000

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         180.5961754s: 3 minutes, 596 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:40:07.599


]
Finding best nN policy...
Max rewards of (N, n) policy is -40950.889019912094  n is 3  N is 3
140
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.9161721s: 2 minutes, 57 seconds, 916 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:43:05.516


Max rewards of (N, n) policy is -48108.02782460818  n is 3  N is 3
140
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.6586386s: 2 minutes, 57 seconds, 658 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:46:03.177


Max rewards of (N, n) policy is -53486.47769084853  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.0849349s: 2 minutes, 57 seconds, 84 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:49:00.294


140
K : 16
[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -26660.860272276725  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           177.58868s: 2 minutes, 57 seconds, 588 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:51:57.884


140
K : 16
[0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -30231.773664265012  n is 3  N is 3
140
K : 16
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.8396958s: 2 minutes, 57 seconds, 839 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:54:55.743


Max rewards of (N, n) policy is -33811.369891728  n is 3  N is 3
140
K : 16
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.2944139s: 2 minutes, 57 seconds, 294 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-27T23:57:53.038


Max rewards of (N, n) policy is -40954.27361953544  n is 3  N is 3
140
K : 16
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.9583807s: 2 minutes, 57 seconds, 958 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:00:51.011


Max rewards of (N, n) policy is -46325.3483190105  n is 2  N is 4
140
K : 16
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.7200233s: 2 minutes, 57 seconds, 720 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:03:48.747


Max rewards of (N, n) policy is -49163.33365545396  n is 2  N is 4
140
K : 30
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.6390274s: 2 minutes, 57 seconds, 639 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:06:46.407


Max rewards of (N, n) policy is -26649.816724818713  n is 3  N is 3
140
K : 30
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.0109112s: 2 minutes, 57 seconds, 10 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:09:43.434


Max rewards of (N, n) policy is -30228.81245067067  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.6310659s: 2 minutes, 57 seconds, 631 milliseconds


140
K : 30
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:12:41.066


Max rewards of (N, n) policy is -33810.241463338876  n is 3  N is 3
140

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.3766433s: 2 minutes, 57 seconds, 376 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:15:38.444



K : 30
[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -40972.23317675083  n is 3  N is 3
140
K : 30
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.6731236s: 2 minutes, 57 seconds, 673 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:18:36.119


Max rewards of (N, n) policy is -46331.236232884636  n is 2  N is 4
140
K : 30
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         178.4765865s: 2 minutes, 58 seconds, 476 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:21:34.597


Max rewards of (N, n) policy is -49136.83659357592  n is 2  N is 4
140

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.3754215s: 2 minutes, 57 seconds, 375 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:24:31.978



K : 44
[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -26669.81123506076  n is 3  N is 3
140
K : 44
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.5934567s: 2 minutes, 57 seconds, 593 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:27:29.573


Max rewards of (N, n) policy is -30223.419521382184  n is 3  N is 3
140
K : 44
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.0877374s: 2 minutes, 57 seconds, 87 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:30:26.673


Max rewards of (N, n) policy is -33793.621652765876  n is 3  N is 3
140
K : 44
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          178.953657s: 2 minutes, 58 seconds, 953 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:33:25.627


Max rewards of (N, n) policy is -40947.24247796733  n is 3  N is 3
140
K : 44
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.7107999s: 2 minutes, 57 seconds, 710 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:36:23.340


Max rewards of (N, n) policy is -46306.327007888955  n is 2  N is 4
140
K : 44
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.5967719s: 2 minutes, 57 seconds, 596 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:39:20.949


Max rewards of (N, n) policy is -49198.113680044815  n is 2  N is 4
140
K : 58
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         178.0715338s: 2 minutes, 58 seconds, 71 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:42:19.026


Max rewards of (N, n) policy is -26652.77112654732  n is 3  N is 3
140
K : 58
[0

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.9084548s: 2 minutes, 57 seconds, 908 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:45:16.935


 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -30228.443439293624  n is 3  N is 3
140
K : 58
[0 -1000 -100 -1200

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         176.4650832s: 2 minutes, 56 seconds, 465 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:48:13.402


 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -33815.71358742294  n is 3  N is 3
140

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         178.0949146s: 2 minutes, 58 seconds, 94 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:51:11.498



K : 58
[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -40941.502002893736  n is 3  N is 3
140
K : 58
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.3909341s: 2 minutes, 57 seconds, 390 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:54:08.906


Max rewards of (N, n) policy is -46349.919306139804  n is 2  N is 4
140
K : 58
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.6391719s: 2 minutes, 57 seconds, 639 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T00:57:06.546


Max rewards of (N, n) policy is -49177.97246841324  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.1887994s: 2 minutes, 57 seconds, 188 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:00:03.741


130
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -25511.727822452183  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.5773638s: 2 minutes, 55 seconds, 577 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:02:59.319


130
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -29074.462207089804  n is 3  N is 3
130
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         166.0596889s: 2 minutes, 46 seconds, 59 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:05:45.395


Max rewards of (N, n) policy is -32649.312495053004  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.5981963s: 2 minutes, 45 seconds, 598 milliseconds


130
K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:08:30.994


Max rewards of (N, n) policy is -39794.9839100501  n is 3  N is 3
130
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          165.994864s: 2 minutes, 45 seconds, 994 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:11:16.990


Max rewards of (N, n) policy is -46926.19389440799  n is 3  N is 3
130


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.0422712s: 2 minutes, 45 seconds, 42 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:14:02.034


K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -50621.076796697846  n is 2  N is 4
130
K : 15
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          165.460535s: 2 minutes, 45 seconds, 460 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:16:47.500


Max rewards of (N, n) policy is -25508.017529680084  n is 3  N is 3
130
K : 15
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          166.398564s: 2 minutes, 46 seconds, 398 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:19:33.899


Max rewards of (N, n) policy is -29078.807801251256  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         170.6044597s: 2 minutes, 50 seconds, 604 milliseconds


130
K : 15
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:22:24.521


Max rewards of (N, n) policy is -32642.466484488512  n is 3  N is 3
130
K : 15
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         166.6449742s: 2 minutes, 46 seconds, 644 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:25:11.167


Max rewards of (N, n) policy is -39786.5417628084  n is 3  N is 3
130
K : 15
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         167.1283291s: 2 minutes, 47 seconds, 128 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:27:58.310


Max rewards of (N, n) policy is -43747.78566354227  n is 2  N is 4
130


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.4522559s: 2 minutes, 45 seconds, 452 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:30:43.778


K : 15
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -46560.44266341902  n is 2  N is 4
130
K : 28
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.5929986s: 2 minutes, 45 seconds, 592 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:33:29.378


Max rewards of (N, n) policy is -25500.960890085953  n is 3  N is 3
130
K : 28
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m            166.3318s: 2 minutes, 46 seconds, 331 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:36:15.710


Max rewards of (N, n) policy is -29084.944805383195  n is 3  N is 3
130
K : 28
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.8034224s: 2 minutes, 45 seconds, 803 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:39:01.515


Max rewards of (N, n) policy is -32647.1164643663  n is 3  N is 3
130
K : 28
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         166.1239838s: 2 minutes, 46 seconds, 123 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:41:47.640


Max rewards of (N, n) policy is -39790.41009674554  n is 3  N is 3
130
K : 28
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.4379701s: 2 minutes, 45 seconds, 437 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:44:33.079


Max rewards of (N, n) policy is -43709.18457756302  n is 2  N is 4
130
K : 28
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.6149047s: 2 minutes, 45 seconds, 614 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:47:18.696


Max rewards of (N, n) policy is -46532.43461473734  n is 2  N is 4
130
K : 41
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.6514835s: 2 minutes, 45 seconds, 651 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:50:04.369


Max rewards of (N, n) policy is -25513.867644720136  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         164.9835036s: 2 minutes, 44 seconds, 983 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:52:49.379


130
K : 41
[0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -29090.806024569632  n is 3  N is 3
130
K : 41
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.8059504s: 2 minutes, 45 seconds, 805 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:55:35.186


Max rewards of (N, n) policy is -32658.728014542376  n is 3  N is 3
130
K : 41
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         166.4322754s: 2 minutes, 46 seconds, 432 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T01:58:21.632


Max rewards of (N, n) policy is -39789.86091197364  n is 3  N is 3
130

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.5622319s: 2 minutes, 45 seconds, 562 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:01:07.195



K : 41
[0 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -43744.84696299856  n is 2  N is 4
130
K : 41
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         167.0301117s: 2 minutes, 47 seconds, 30 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:03:54.243


Max rewards of (N, n) policy is -46536.549768214834  n is 2  N is 4
130
K : 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.9936107s: 2 minutes, 45 seconds, 993 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:06:40.242


54
[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -25506.80083900056  n is 3  N is 3
130
K : 54
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.7365689s: 2 minutes, 45 seconds, 736 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:09:25.995


Max rewards of (N, n) policy is -29086.33763714313  n is 3  N is 3
130
K : 54
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.6736828s: 2 minutes, 45 seconds, 673 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:12:11.683


Max rewards of (N, n) policy is -32653.230511360758  n is 3  N is 3
130
K : 54
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.8158172s: 2 minutes, 45 seconds, 815 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:14:57.511


Max rewards of (N, n) policy is -39796.44039393376  n is 3  N is 3
130
K : 54


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         165.7050084s: 2 minutes, 45 seconds, 705 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:17:43.217


[0 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -43696.38804751214  n is 2  N is 4
130
K : 54
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         166.8072524s: 2 minutes, 46 seconds, 807 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:20:30.043


Max rewards of (N, n) policy is -46554.48762854811  n is 2  N is 4
120

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          165.721876s: 2 minutes, 45 seconds, 721 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:23:15.785



K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -24351.3428900853  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.6451269s: 2 minutes, 34 seconds, 645 milliseconds


120
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:25:50.452


Max rewards of (N, n) policy is -27928.963100135697  n is 3  N is 3
120
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.5622203s: 2 minutes, 34 seconds, 562 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:28:25.031


Max rewards of (N, n) policy is -31495.40277195407  n is 3  N is 3
120
K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          153.989241s: 2 minutes, 33 seconds, 989 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:30:59.021


Max rewards of (N, n) policy is -38628.901962181764  n is 3  N is 3
120
K : 2
[0

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.0741569s: 2 minutes, 34 seconds, 74 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:33:33.096


 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -44845.942412242366  n is 2  N is 3
120

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.4058007s: 2 minutes, 34 seconds, 405 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:36:07.518



K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -47829.12600776145  n is 2  N is 4
120
K : 14
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.1047936s: 2 minutes, 34 seconds, 104 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:38:41.651


Max rewards of (N, n) policy is -24355.336164282948  n is 3  N is 3
120
K : 14
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          154.177624s: 2 minutes, 34 seconds, 177 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:41:15.850


Max rewards of (N, n) policy is -27929.130547236116  n is 3  N is 3
120
K : 14
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          154.621207s: 2 minutes, 34 seconds, 621 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:43:50.491


Max rewards of (N, n) policy is -31482.822307601273  n is 3  N is 3
120
K : 14
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.3123018s: 2 minutes, 34 seconds, 312 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:46:24.804


Max rewards of (N, n) policy is -38606.16261164397  n is 3  N is 3
120
K : 14
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.4072966s: 2 minutes, 34 seconds, 407 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:48:59.213


Max rewards of (N, n) policy is -41131.530137738584  n is 2  N is 4
120
K : 14
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.4173172s: 2 minutes, 34 seconds, 417 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:51:33.631


Max rewards of (N, n) policy is -43904.808512086245  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.0373373s: 2 minutes, 34 seconds, 37 milliseconds


120
K : 26
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:54:07.686


Max rewards of (N, n) policy is -24360.98405133395  n is 3  N is 3
120
K : 26
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.5394261s: 2 minutes, 34 seconds, 539 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:56:42.227


Max rewards of (N, n) policy is -27918.04426508378  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.6477956s: 2 minutes, 34 seconds, 647 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T02:59:16.875


120
K : 26
[0 -1000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -31497.164080388604  n is 3  N is 3
120
K : 26
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.9788562s: 2 minutes, 34 seconds, 978 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:01:51.855


Max rewards of (N, n) policy is -38633.005005173596  n is 3  N is 3
120
K : 26
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.1659226s: 2 minutes, 34 seconds, 165 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:04:26.022


Max rewards of (N, n) policy is -41126.557808324746  n is 2  N is 4
120
K : 26
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.4655142s: 2 minutes, 34 seconds, 465 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:07:00.492


Max rewards of (N, n) policy is -43914.39559079201  n is 2  N is 4
120
K : 38
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.5763325s: 2 minutes, 34 seconds, 576 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:09:35.079


Max rewards of (N, n) policy is -24354.44495550435  n is 3  N is 3
120
K : 38
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.6624279s: 2 minutes, 34 seconds, 662 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:12:09.747


Max rewards of (N, n) policy is -27937.280055076837  n is 3  N is 3
120
K : 38
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         153.9152872s: 2 minutes, 33 seconds, 915 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:14:43.689


Max rewards of (N, n) policy is -31484.537657750203  n is 3  N is 3
120
K : 38
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.4797155s: 2 minutes, 34 seconds, 479 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:17:18.174


Max rewards of (N, n) policy is -38623.042934574354  n is 3  N is 3
120
K : 38
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.6887562s: 2 minutes, 35 seconds, 688 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:19:53.868


Max rewards of (N, n) policy is -41148.899553903786  n is 2  N is 4
120
K : 38


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.9477373s: 2 minutes, 34 seconds, 947 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:22:28.822


[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -43877.86122747338  n is 2  N is 4
120
K : 50
[0 -600 -100 -1200 -2000]
Finding best nN policy...

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.2813345s: 2 minutes, 34 seconds, 281 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:25:03.137



Max rewards of (N, n) policy is -24358.058055893096  n is 3  N is 3
120
K : 50
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.4794393s: 2 minutes, 34 seconds, 479 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:27:37.621


Max rewards of (N, n) policy is -27918.99586157565  n is 3  N is 3
120
K : 50
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.5641528s: 2 minutes, 34 seconds, 564 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:30:12.206


Max rewards of (N, n) policy is -31504.40319422087  n is 3  N is 3
120
K : 50
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.2631522s: 2 minutes, 34 seconds, 263 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:32:46.492


Max rewards of (N, n) policy is -38622.18098212663  n is 3  N is 3
120
K : 50
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.5912109s: 2 minutes, 34 seconds, 591 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:35:21.103


Max rewards of (N, n) policy is -41130.92687796825  n is 2  N is 4
120
K : 50
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.5233502s: 2 minutes, 34 seconds, 523 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:37:55.656


Max rewards of (N, n) policy is -43860.52737756665  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.0945015s: 2 minutes, 35 seconds, 94 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:40:30.777


110
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -23200.216934718872  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.4028678s: 2 minutes, 23 seconds, 402 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:42:54.184


110
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -26763.714037500984  n is 3  N is 3
110
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         144.5588402s: 2 minutes, 24 seconds, 558 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:45:18.747


Max rewards of (N, n) policy is -30329.74970664942  n is 3  N is 3
110
K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         142.9688795s: 2 minutes, 22 seconds, 968 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:47:41.735


Max rewards of (N, n) policy is -37446.5537886295  n is 3  N is 3
110
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.6517534s: 2 minutes, 23 seconds, 651 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:50:05.390


Max rewards of (N, n) policy is -42218.64033071483  n is 2  N is 3
110
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.2054816s: 2 minutes, 23 seconds, 205 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:52:28.600


Max rewards of (N, n) policy is -44958.24575665922  n is 2  N is 4
110
K : 13
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.2201302s: 2 minutes, 23 seconds, 220 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:54:51.829


Max rewards of (N, n) policy is -23187.880476457958  n is 3  N is 3
110
K : 13
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.3019523s: 2 minutes, 23 seconds, 301 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:57:15.157


Max rewards of (N, n) policy is -26763.258082997196  n is 3  N is 3
110
K : 13
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          143.296622s: 2 minutes, 23 seconds, 296 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T03:59:38.457


Max rewards of (N, n) policy is -30334.04306937717  n is 3  N is 3
110
K : 13
[0 -1400 -100 -1200

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.2369711s: 2 minutes, 23 seconds, 236 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:02:01.709


 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -36684.61439547966  n is 2  N is 4
110
K : 13
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.1425321s: 2 minutes, 23 seconds, 142 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:04:24.855


Max rewards of (N, n) policy is -38483.28584886385  n is 2  N is 4
110
K : 13
[0 -2400 -100

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.9513022s: 2 minutes, 23 seconds, 951 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:06:48.811


 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -41229.723300772945  n is 2  N is 4
110
K : 24
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.3130102s: 2 minutes, 23 seconds, 313 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:09:12.132


Max rewards of (N, n) policy is -23197.76992894804  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         146.2477996s: 2 minutes, 26 seconds, 247 milliseconds


110
K : 24
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:11:38.398


Max rewards of (N, n) policy is -26766.877857704152  n is 3  N is 3
110
K : 24
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         150.5451181s: 2 minutes, 30 seconds, 545 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:14:08.947


Max rewards of (N, n) policy is -30327.36063964876  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         151.2000575s: 2 minutes, 31 seconds, 200 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:16:40.170


110
K : 24
[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -36670.576895862236  n is 2  N is 4
110
K : 24
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         150.1257372s: 2 minutes, 30 seconds, 125 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:19:10.299


Max rewards of (N, n) policy is -38498.7746688282  n is 2  N is 4
110
K : 24
[0 -2400 -100 -1200 -2000

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.8613327s: 2 minutes, 23 seconds, 861 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:21:34.164


]
Finding best nN policy...
Max rewards of (N, n) policy is -41193.40120408553  n is 2  N is 4
110
K : 35
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.4674114s: 2 minutes, 23 seconds, 467 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:23:57.655


Max rewards of (N, n) policy is -23202.229892548865  n is 3  N is 3
110
K : 35
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.0889669s: 2 minutes, 23 seconds, 88 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:26:20.748


Max rewards of (N, n) policy is -26770.49056397326  n is 3  N is 3
110
K : 35
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.6065895s: 2 minutes, 23 seconds, 606 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:28:44.373


Max rewards of (N, n) policy is -30330.47816179663  n is 3  N is 3
110
K : 35
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.7087121s: 2 minutes, 23 seconds, 708 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:31:08.109


Max rewards of (N, n) policy is -36673.64417277024  n is 2  N is 4
110
K : 35
[0

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.3229935s: 2 minutes, 23 seconds, 322 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:33:31.435


 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -38531.36517242651  n is 2  N is 4
110
K : 35
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          143.911492s: 2 minutes, 23 seconds, 911 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:35:55.349


Max rewards of (N, n) policy is -41196.1835964369  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.8000089s: 2 minutes, 23 seconds, 800 milliseconds


110
K : 46
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:38:19.158


Max rewards of (N, n) policy is -23199.33963761964  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.5932655s: 2 minutes, 23 seconds, 593 milliseconds


110
K : 46
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:40:42.754


Max rewards of (N, n) policy is -26758.856901652867  n is 3  N is 3
110
K : 46
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.6791015s: 2 minutes, 23 seconds, 679 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:43:06.436


Max rewards of (N, n) policy is -30334.783311369687  n is 3  N is 3
110
K : 46
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.5503185s: 2 minutes, 23 seconds, 550 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:45:29.989


Max rewards of (N, n) policy is -36682.65456986402  n is 2  N is 4
110
K : 46
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         144.4672709s: 2 minutes, 24 seconds, 467 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:47:54.476


Max rewards of (N, n) policy is -38488.4174213312  n is 2  N is 4
110
K : 46


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.7964748s: 2 minutes, 23 seconds, 796 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:50:18.275


[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -41248.73880364704  n is 2  N is 4
100
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         143.4292605s: 2 minutes, 23 seconds, 429 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:52:41.713


Max rewards of (N, n) policy is -22034.77347009586  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.4894575s: 2 minutes, 12 seconds, 489 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:54:54.230


100
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -25599.093473155677  n is 3  N is 3
100

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         131.9919906s: 2 minutes, 11 seconds, 991 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:57:06.224



K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -29140.494336803717  n is 3  N is 3
100
K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.0877784s: 2 minutes, 12 seconds, 87 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T04:59:18.331


Max rewards of (N, n) policy is -36078.56419134449  n is 2  N is 3
100
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.5619666s: 2 minutes, 12 seconds, 561 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:01:30.913


Max rewards of (N, n) policy is -39320.49064573304  n is 2  N is 4
100
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.2493352s: 2 minutes, 12 seconds, 249 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:03:43.181


Max rewards of (N, n) policy is -42004.9614540879  n is 2  N is 4
100
K : 12
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          132.968706s: 2 minutes, 12 seconds, 968 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:05:56.170


Max rewards of (N, n) policy is -22035.10577205937  n is 3  N is 3
100
K : 12
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.7305095s: 2 minutes, 12 seconds, 730 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:08:08.904


Max rewards of (N, n) policy is -25591.58008628383  n is 3  N is 3
100
K : 12
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         131.9839054s: 2 minutes, 11 seconds, 983 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:10:20.890


Max rewards of (N, n) policy is -29150.514725530367  n is 3  N is 3
100
K : 12
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.1755661s: 2 minutes, 12 seconds, 175 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:12:33.081


Max rewards of (N, n) policy is -34044.95936019778  n is 2  N is 4
100
K : 12
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.2603332s: 2 minutes, 12 seconds, 260 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:14:45.343


Max rewards of (N, n) policy is -35837.877619652536  n is 2  N is 4
100
K : 12
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         131.8924466s: 2 minutes, 11 seconds, 892 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:16:57.238


Max rewards of (N, n) policy is -38471.1684752811  n is 2  N is 4
100
K : 22


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         131.8918115s: 2 minutes, 11 seconds, 891 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:19:09.155


[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -22053.6848577339  n is 3  N is 3
100
K : 22
[0 -800 -100 -1200 -2000]

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         133.5924183s: 2 minutes, 13 seconds, 592 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:21:22.749



Finding best nN policy...
Max rewards of (N, n) policy is -25585.063976049103  n is 3  N is 3
100
K : 22
[

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         133.4750095s: 2 minutes, 13 seconds, 475 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:23:36.227


0 -1000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -29142.197739579547  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         135.0409087s: 2 minutes, 15 seconds, 40 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:25:51.271


100
K : 22
[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -34093.3563182613  n is 2  N is 4
100
K : 22
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.4399621s: 2 minutes, 12 seconds, 439 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:28:03.736


Max rewards of (N, n) policy is -35847.463803645325  n is 2  N is 4
100
K : 22
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.3941329s: 2 minutes, 12 seconds, 394 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:30:16.132


Max rewards of (N, n) policy is -38507.543435933614  n is 2  N is 4
100
K : 32


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.1674111s: 2 minutes, 12 seconds, 167 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:32:28.306


[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -22038.15194020966  n is 3  N is 3
100
K : 32

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.9320448s: 2 minutes, 12 seconds, 932 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:34:41.240



[0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -25598.424345063384  n is 3  N is 3
100
K : 32
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         133.1176279s: 2 minutes, 13 seconds, 117 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:36:54.385


Max rewards of (N, n) policy is -29152.95688425048  n is 3  N is 3
100
K : 32
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.9384691s: 2 minutes, 12 seconds, 938 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:39:07.325


Max rewards of (N, n) policy is -34071.83052391714  n is 2  N is 4
100

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.3278098s: 2 minutes, 12 seconds, 327 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:41:19.655



K : 32
[0 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -35841.53137481741  n is 2  N is 4
100

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.6199844s: 2 minutes, 12 seconds, 619 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:43:32.298



K : 32
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -38555.82052598315  n is 2  N is 4
100
K : 42
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.1998435s: 2 minutes, 12 seconds, 199 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:45:44.504


Max rewards of (N, n) policy is -22044.57499744802  n is 3  N is 3
100
K : 42
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.6211043s: 2 minutes, 12 seconds, 621 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:47:57.128


Max rewards of (N, n) policy is -25589.39684108309  n is 3  N is 3
100
K : 42
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.4857213s: 2 minutes, 12 seconds, 485 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:50:09.615


Max rewards of (N, n) policy is -29155.47278521618  n is 3  N is 3
100
K : 42
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.1466615s: 2 minutes, 12 seconds, 146 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:52:21.789


Max rewards of (N, n) policy is -34073.41293259152  n is 2  N is 4
100
K : 42
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         133.0324448s: 2 minutes, 13 seconds, 32 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:54:34.823


Max rewards of (N, n) policy is -35829.727582601685  n is 2  N is 4
100
K : 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.2155452s: 2 minutes, 12 seconds, 215 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:56:47.040


42
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -38490.59758030933  n is 2  N is 4
90
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         132.4210253s: 2 minutes, 12 seconds, 421 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T05:58:59.468


Max rewards of (N, n) policy is -20862.162154511625  n is 3  N is 3
90
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.0916798s: 2 minutes, 1 second, 91 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:01:00.578


Max rewards of (N, n) policy is -24401.97753926979  n is 3  N is 3
90
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.0504107s: 2 minutes, 1 second, 50 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:03:01.630


Max rewards of (N, n) policy is -27955.87677800027  n is 3  N is 3
90
K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.7965468s: 2 minutes, 1 second, 796 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:05:03.428


Max rewards of (N, n) policy is -33415.934248451136  n is 2  N is 3
90
K : 2
[0 -1800 -100 -1200 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         120.8233854s: 2 minutes, 823 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:07:04.270


-2000]
Finding best nN policy...
Max rewards of (N, n) policy is -36489.148553297164  n is 2  N is 4
90
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         120.6728375s: 2 minutes, 672 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:09:04.945


Max rewards of (N, n) policy is -39063.57100561278  n is 2  N is 4
90
K : 11
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.2135867s: 2 minutes, 1 second, 213 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:11:06.184


Max rewards of (N, n) policy is -20868.15832559148  n is 3  N is 3
90
K : 11
[

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         120.9390991s: 2 minutes, 939 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:13:07.125


0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -24412.730514944364  n is 3  N is 3
90
K : 11
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          121.888362s: 2 minutes, 1 second, 888 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:15:09.026


Max rewards of (N, n) policy is -27949.81167452296  n is 3  N is 3
90
K : 11
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          121.121414s: 2 minutes, 1 second, 121 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:17:10.149


Max rewards of (N, n) policy is -31436.29782126304  n is 2  N is 4
90
K : 11
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.0504021s: 2 minutes, 1 second, 50 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:19:11.217


Max rewards of (N, n) policy is -33170.546153441814  n is 2  N is 4
90
K : 11
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         122.4183746s: 2 minutes, 2 seconds, 418 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:21:13.655


Max rewards of (N, n) policy is -35746.03884019281  n is 2  N is 4
90
K : 20
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         122.9256437s: 2 minutes, 2 seconds, 925 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:23:16.600


Max rewards of (N, n) policy is -20871.731463802047  n is 3  N is 3
90
K : 20
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.3395411s: 2 minutes, 1 second, 339 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:25:17.955


Max rewards of (N, n) policy is -24416.909883430137  n is 3  N is 3
90
K : 20
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         120.9789843s: 2 minutes, 978 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:27:18.956


Max rewards of (N, n) policy is -27952.926394593407  n is 3  N is 3
90
K : 20
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.1770941s: 2 minutes, 1 second, 177 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:29:20.135


Max rewards of (N, n) policy is -31415.406388778883  n is 2  N is 4
90
K : 20
[0 -1800 -100 -1200 -2000]
Finding best nN policy...

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.1433605s: 2 minutes, 1 second, 143 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:31:21.300



Max rewards of (N, n) policy is -33199.17708299922  n is 2  N is 4
90
K : 20

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.6617885s: 2 minutes, 1 second, 661 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:33:22.978



[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -35799.43152266072  n is 2  N is 4
90
K : 29
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.3280941s: 2 minutes, 1 second, 328 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:35:24.312


Max rewards of (N, n) policy is -20871.122486702057  n is 3  N is 3
90
K : 29
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.5201357s: 2 minutes, 1 second, 520 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:37:25.855


Max rewards of (N, n) policy is -24410.93145007033  n is 3  N is 3
90

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         122.3131522s: 2 minutes, 2 seconds, 313 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:39:28.169



K : 29
[0 -1000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -27968.539854608607  n is 3  N is 3
90
K : 29
[0 -1400 -100 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.3284705s: 2 minutes, 1 second, 328 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:41:29.515


-1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -31428.376949562156  n is 2  N is 4
90
K : 29
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         122.1865455s: 2 minutes, 2 seconds, 186 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:43:31.703


Max rewards of (N, n) policy is -33201.35401072427  n is 2  N is 4
90
K : 29
[0 -2400 -100 -1200 -2000]


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.0935253s: 2 minutes, 1 second, 93 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:45:32.814


Finding best nN policy...
Max rewards of (N, n) policy is -35786.173622720824  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         120.9850742s: 2 minutes, 985 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:47:33.807


90
K : 38
[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -20866.36140672557  n is 3  N is 3
90
K : 38
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.2991538s: 2 minutes, 1 second, 299 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:49:35.108


Max rewards of (N, n) policy is -24409.61622346443  n is 3  N is 3
90
K : 38
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.4395624s: 2 minutes, 1 second, 439 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:51:36.548


Max rewards of (N, n) policy is -27950.85272085545  n is 3  N is 3
90
K : 38
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         122.1050486s: 2 minutes, 2 seconds, 105 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:53:38.655


Max rewards of (N, n) policy is -31438.8238319414  n is 2  N is 4
90
K : 38
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.0544595s: 2 minutes, 1 second, 54 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:55:39.711


Max rewards of (N, n) policy is -33186.97633391205  n is 2  N is 4
90
K : 38
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         121.0493112s: 2 minutes, 1 second, 49 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:57:40.761


Max rewards of (N, n) policy is -35782.109229609734  n is 2  N is 4
80
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         120.9421311s: 2 minutes, 942 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T06:59:41.732



Max rewards of (N, n) policy is -19689.346348571937  n is 3  N is 3
80
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         110.5760077s: 1 minute, 50 seconds, 576 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:01:32.322



Max rewards of (N, n) policy is -23200.63894249689  n is 3  N is 3
80
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.8308258s: 1 minute, 49 seconds, 830 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:03:22.153


Max rewards of (N, n) policy is -26731.7670175533  n is 3  N is 3
80
K : 2


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.6238674s: 1 minute, 49 seconds, 623 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:05:11.795


[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -30706.734979597437  n is 2  N is 3
80
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.6292195s: 1 minute, 49 seconds, 629 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:07:01.425


Max rewards of (N, n) policy is -33508.90434425577  n is 2  N is 4
80
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         110.6823514s: 1 minute, 50 seconds, 682 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:08:52.108


Max rewards of (N, n) policy is -36040.71943638798  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         110.0164759s: 1 minute, 50 seconds, 16 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:10:42.131


80
K : 10
[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -19687.212093834703  n is 3  N is 3
80
K : 10
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          109.854847s: 1 minute, 49 seconds, 854 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:12:31.987


Max rewards of (N, n) policy is -23209.425014340646  n is 3  N is 3
80
K : 10
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         110.3755632s: 1 minute, 50 seconds, 375 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:14:22.379


Max rewards of (N, n) policy is -26724.34294318478  n is 3  N is 3
80
K : 10
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.5333576s: 1 minute, 49 seconds, 533 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:16:11.913


Max rewards of (N, n) policy is -28759.694647935154  n is 2  N is 4
80
K : 10
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.4497974s: 1 minute, 49 seconds, 449 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:18:01.364


Max rewards of (N, n) policy is -30457.16229270352  n is 2  N is 4
80
K : 10
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.4061203s: 1 minute, 49 seconds, 406 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:19:50.771


Max rewards of (N, n) policy is -33028.37106876485  n is 2  N is 4
80
K : 18
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         111.2097741s: 1 minute, 51 seconds, 209 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:21:42.011


Max rewards of (N, n) policy is -19683.03313883677  n is 3  N is 3
80
K : 18
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.7479571s: 1 minute, 49 seconds, 747 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:23:31.774


Max rewards of (N, n) policy is -23214.496441816995  n is 3  N is 3
80
K : 18
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.7882239s: 1 minute, 49 seconds, 788 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:25:21.588


Max rewards of (N, n) policy is -26741.826791284748  n is 3  N is 3
80
K : 18
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         110.9211662s: 1 minute, 50 seconds, 921 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:27:12.510


Max rewards of (N, n) policy is -28760.52748413087  n is 2  N is 4
80
K : 18
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.6258153s: 1 minute, 49 seconds, 625 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:29:02.152


Max rewards of (N, n) policy is -30466.534807395543  n is 2  N is 4
80
K : 18
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.7971707s: 1 minute, 49 seconds, 797 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:30:51.950


Max rewards of (N, n) policy is -32988.215603014454  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.9539758s: 1 minute, 49 seconds, 953 milliseconds


80
K : 26
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:32:41.925


Max rewards of (N, n) policy is -19684.160511117796  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         110.8928461s: 1 minute, 50 seconds, 892 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:34:32.844


80
K : 26
[0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -23213.15701752436  n is 3  N is 3
80
K : 26
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.6398929s: 1 minute, 49 seconds, 639 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:36:22.484


Max rewards of (N, n) policy is -26734.275877247368  n is 3  N is 3
80
K : 26
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          109.779733s: 1 minute, 49 seconds, 779 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:38:12.281


Max rewards of (N, n) policy is -28761.424610366146  n is 2  N is 4
80
K : 26
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         110.3103045s: 1 minute, 50 seconds, 310 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:40:02.592


Max rewards of (N, n) policy is -30457.753746625604  n is 2  N is 4
80
K : 26

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          110.689063s: 1 minute, 50 seconds, 689 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:41:53.298



[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -33001.37632853629  n is 2  N is 4
80
K : 34
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.7986763s: 1 minute, 49 seconds, 798 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:43:43.117


Max rewards of (N, n) policy is -19678.93760955299  n is 3  N is 3
80
K : 34
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.5898381s: 1 minute, 49 seconds, 589 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:45:32.709


Max rewards of (N, n) policy is -23201.91172398358  n is 3  N is 3
80
K : 34
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         110.3583354s: 1 minute, 50 seconds, 358 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:47:23.068


Max rewards of (N, n) policy is -26730.866196175302  n is 3  N is 3
80
K : 34
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.7777447s: 1 minute, 49 seconds, 777 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:49:12.867


Max rewards of (N, n) policy is -28772.923671398723  n is 2  N is 4
80
K : 34
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.7934752s: 1 minute, 49 seconds, 793 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:51:02.661


Max rewards of (N, n) policy is -30471.421658403386  n is 2  N is 4
80
K : 34
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         110.3689036s: 1 minute, 50 seconds, 368 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:52:53.056


Max rewards of (N, n) policy is -33029.917290728044  n is 2  N is 4
70
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         109.5401785s: 1 minute, 49 seconds, 540 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:54:42.602


Max rewards of (N, n) policy is -18473.781237047257  n is 3  N is 3
70
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.3497366s: 1 minute, 38 seconds, 349 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:56:20.952


Max rewards of (N, n) policy is -21970.248698955453  n is 3  N is 3
70
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          99.1175871s: 1 minute, 39 seconds, 117 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:58:00.071


Max rewards of (N, n) policy is -24703.059688725378  n is 2  N is 3
70
K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           98.642373s: 1 minute, 38 seconds, 642 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T07:59:38.715


Max rewards of (N, n) policy is -27941.003085267002  n is 2  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           98.247304s: 1 minute, 38 seconds, 247 milliseconds


70
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:01:16.980


Max rewards of (N, n) policy is -30513.699257945234  n is 2  N is 4
70
K : 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.9296219s: 1 minute, 38 seconds, 929 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:02:55.921


2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -32993.75306311834  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.6910376s: 1 minute, 38 seconds, 691 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:04:34.616


70
K : 9
[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -18473.31123625884  n is 3  N is 3
70
K : 9
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.4273421s: 1 minute, 38 seconds, 427 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:06:13.045


Max rewards of (N, n) policy is -21964.489444238297  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.7629437s: 1 minute, 38 seconds, 762 milliseconds


70
K : 9
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:07:51.828


Max rewards of (N, n) policy is -24436.44771557994  n is 2  N is 4
70
K : 9
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.2947875s: 1 minute, 38 seconds, 294 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:09:30.123


Max rewards of (N, n) policy is -26090.798021607538  n is 2  N is 4
70
K : 9
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.6731374s: 1 minute, 38 seconds, 673 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:11:08.812


Max rewards of (N, n) policy is -27708.56465018441  n is 2  N is 4
70
K : 9
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.9379867s: 1 minute, 38 seconds, 937 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:12:47.769


Max rewards of (N, n) policy is -30218.856594610177  n is 2  N is 4
70
K : 16
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.5829759s: 1 minute, 38 seconds, 582 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:14:26.359


Max rewards of (N, n) policy is -18464.047660558288  n is 3  N is 3
70
K : 16
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.9292362s: 1 minute, 38 seconds, 929 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:16:05.301


Max rewards of (N, n) policy is -21971.80274107406  n is 3  N is 3
70
K : 16
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.4348585s: 1 minute, 38 seconds, 434 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:17:43.755


Max rewards of (N, n) policy is -24412.945092990183  n is 2  N is 4
70
K : 16
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.4204437s: 1 minute, 38 seconds, 420 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:19:22.191


Max rewards of (N, n) policy is -26060.05814398321  n is 2  N is 4
70
K : 16
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          99.9078407s: 1 minute, 39 seconds, 907 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:21:02.100


Max rewards of (N, n) policy is -27696.7549119771  n is 2  N is 4
70
K : 16
[0 -2400 -100 -1200 -2000

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.1085213s: 1 minute, 38 seconds, 108 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:22:40.225


]
Finding best nN policy...
Max rewards of (N, n) policy is -30211.790102590396  n is 2  N is 4
70
K : 23
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.2788256s: 1 minute, 38 seconds, 278 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:24:18.509


Max rewards of (N, n) policy is -18464.018999088614  n is 3  N is 3
70
K : 23
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          99.3262709s: 1 minute, 39 seconds, 326 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:25:57.836


Max rewards of (N, n) policy is -21961.83287722465  n is 3  N is 3
70
K : 23
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.4298645s: 1 minute, 38 seconds, 429 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:27:36.291


Max rewards of (N, n) policy is -24404.24220902658  n is 2  N is 4
70

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.2964197s: 1 minute, 38 seconds, 296 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:29:14.605



K : 23
[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -26062.08363995077  n is 2  N is 4
70
K : 23
[0

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.9835935s: 1 minute, 38 seconds, 983 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:30:53.589


 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -27704.851475822223  n is 2  N is 4
70
K : 23
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.2649198s: 1 minute, 38 seconds, 264 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:32:31.854


Max rewards of (N, n) policy is -30172.28231118244  n is 2  N is 4
70
K : 30
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          98.4093225s: 1 minute, 38 seconds, 409 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:34:10.283


Max rewards of (N, n) policy is -18464.779211201923  n is 3  N is 3
70
K : 30
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         104.5400049s: 1 minute, 44 seconds, 540 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:35:54.839


Max rewards of (N, n) policy is -21965.12440919351  n is 3  N is 3
70
K : 30
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         103.8735911s: 1 minute, 43 seconds, 873 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:37:38.714


Max rewards of (N, n) policy is -24413.98578436705  n is 2  N is 4
70
K : 30
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           104.73559s: 1 minute, 44 seconds, 735 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:39:23.451


Max rewards of (N, n) policy is -26104.28542081225  n is 2  N is 4
70
K : 30
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         104.2515088s: 1 minute, 44 seconds, 251 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:41:07.703


Max rewards of (N, n) policy is -27753.61418874588  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         103.6558165s: 1 minute, 43 seconds, 655 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:42:51.360


70
K : 30
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -30218.852344985415  n is 2  N is 4
60
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         103.1809522s: 1 minute, 43 seconds, 180 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:44:34.567


Max rewards of (N, n) policy is -17200.53227036466  n is 3  N is 3
60
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          94.0027554s: 1 minute, 34 seconds, 2 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:46:08.571


Max rewards of (N, n) policy is -20391.972049577682  n is 2  N is 3
60
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          97.0502331s: 1 minute, 37 seconds, 50 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:47:45.643


Max rewards of (N, n) policy is -21968.740715312822  n is 2  N is 3
60

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          96.3500802s: 1 minute, 36 seconds, 350 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:49:22.008



K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -25134.741516732  n is 2  N is 3
60
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           96.951709s: 1 minute, 36 seconds, 951 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:50:58.962


Max rewards of (N, n) policy is -27448.943770209342  n is 2  N is 4
60
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          92.7268148s: 1 minute, 32 seconds, 726 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:52:31.714



Max rewards of (N, n) policy is -29851.376989476317  n is 2  N is 4
60
K : 8


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          92.3917433s: 1 minute, 32 seconds, 391 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:54:04.115


[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -17201.968040584605  n is 3  N is 3
60
K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          92.8326909s: 1 minute, 32 seconds, 832 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:55:36.950


Max rewards of (N, n) policy is -20383.57874973322  n is 2  N is 3
60
K : 8
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          92.7231582s: 1 minute, 32 seconds, 723 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:57:09.675


Max rewards of (N, n) policy is -21747.78030711641  n is 2  N is 4
60
K : 8
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          92.1820562s: 1 minute, 32 seconds, 182 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T08:58:41.879


Max rewards of (N, n) policy is -23353.278368652434  n is 2  N is 4
60
K : 8
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          93.0291017s: 1 minute, 33 seconds, 29 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:00:14.910


Max rewards of (N, n) policy is -24928.181514147567  n is 2  N is 4
60
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          92.5265048s: 1 minute, 32 seconds, 526 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:01:47.464


Max rewards of (N, n) policy is -27300.714885130692  n is 2  N is 4
60
K : 14
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          91.4468571s: 1 minute, 31 seconds, 446 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:03:18.917


Max rewards of (N, n) policy is -17207.158806737774  n is 3  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          93.6923096s: 1 minute, 33 seconds, 692 milliseconds


60
K : 14
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:04:52.638


Max rewards of (N, n) policy is -20393.76299915849  n is 2  N is 3
60
K : 14
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          91.5808436s: 1 minute, 31 seconds, 580 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:06:24.220


Max rewards of (N, n) policy is -21729.07673774649  n is 2  N is 4
60
K : 14
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          93.6329105s: 1 minute, 33 seconds, 632 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:07:57.855


Max rewards of (N, n) policy is -23314.347910343087  n is 2  N is 4
60
K : 14
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          92.9895035s: 1 minute, 32 seconds, 989 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:09:30.848


Max rewards of (N, n) policy is -24918.91899727269  n is 2  N is 4
60
K : 14
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          92.9551071s: 1 minute, 32 seconds, 955 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:11:03.821


Max rewards of (N, n) policy is -27314.926989123713  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          93.9212135s: 1 minute, 33 seconds, 921 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:12:37.751


60
K : 20
[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -17199.58237966972  n is 3  N is 3
60
K : 20
[0 -800 -100 -1200 -2000]
Finding best nN policy...

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          92.8491577s: 1 minute, 32 seconds, 849 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:14:10.622



Max rewards of (N, n) policy is -20418.401118487203  n is 2  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          93.1749708s: 1 minute, 33 seconds, 174 milliseconds


60
K : 20
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:15:43.811


Max rewards of (N, n) policy is -21734.72547745807  n is 2  N is 4
60
K : 20
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          92.2507256s: 1 minute, 32 seconds, 250 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:17:16.063


Max rewards of (N, n) policy is -23323.329787471415  n is 2  N is 4
60
K : 20


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          92.4528863s: 1 minute, 32 seconds, 452 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:18:48.534


[0 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -24900.749004565754  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           92.890743s: 1 minute, 32 seconds, 890 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:20:21.442


60
K : 20
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -27300.853877037353  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          93.3593255s: 1 minute, 33 seconds, 359 milliseconds


60
K : 26
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:21:54.810


Max rewards of (N, n) policy is -17207.396851769357  n is 3  N is 3
60
K : 26


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          93.8301645s: 1 minute, 33 seconds, 830 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:23:28.642


[0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -20403.474133192183  n is 2  N is 3
60
K : 26
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          92.1885453s: 1 minute, 32 seconds, 188 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:25:00.833


Max rewards of (N, n) policy is -21739.418779680003  n is 2  N is 4
60
K : 26
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          93.0692845s: 1 minute, 33 seconds, 69 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:26:33.920


Max rewards of (N, n) policy is -23349.49248738502  n is 2  N is 4
60
K : 26
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          94.7036511s: 1 minute, 34 seconds, 703 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:28:08.626


Max rewards of (N, n) policy is -24904.978496158215  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          93.0840058s: 1 minute, 33 seconds, 84 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:29:41.736


60
K : 26
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -27273.054704580893  n is 2  N is 4
50
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          93.4835628s: 1 minute, 33 seconds, 483 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:31:15.242


Max rewards of (N, n) policy is -15834.329161577663  n is 3  N is 3
50
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          80.9760801s: 1 minute, 20 seconds, 976 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:32:36.238


Max rewards of (N, n) policy is -17686.33384941631  n is 2  N is 3
50
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          81.7858926s: 1 minute, 21 seconds, 785 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:33:58.026


Max rewards of (N, n) policy is -19166.10327871887  n is 2  N is 3
50

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          81.2144555s: 1 minute, 21 seconds, 214 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:35:19.242



K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -22201.076921980835  n is 2  N is 3
50
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          81.3093729s: 1 minute, 21 seconds, 309 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:36:40.572


Max rewards of (N, n) policy is -24280.595637521415  n is 2  N is 4
50

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          80.5450174s: 1 minute, 20 seconds, 545 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:38:01.119



K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -26594.871056141015  n is 2  N is 4
50
K : 7
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          81.2794382s: 1 minute, 21 seconds, 279 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:39:22.421


Max rewards of (N, n) policy is -15845.000984915725  n is 3  N is 3
50
K : 7


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          80.7226404s: 1 minute, 20 seconds, 722 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:40:43.145


[0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -17666.850751799433  n is 2  N is 3
50
K : 7
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          82.0213534s: 1 minute, 22 seconds, 21 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:42:05.168


Max rewards of (N, n) policy is -19000.354639844056  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          80.8324508s: 1 minute, 20 seconds, 832 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:43:26.003


50
K : 7
[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -20529.45275775629  n is 2  N is 4
50
K : 7
[0 -1800 -100 -1200 -2000]
Finding best nN policy...

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          80.8798715s: 1 minute, 20 seconds, 879 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:44:46.909



Max rewards of (N, n) policy is -22041.954232537584  n is 2  N is 4
50
K : 7
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          81.7304204s: 1 minute, 21 seconds, 730 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:46:08.642


Max rewards of (N, n) policy is -24284.944968623142  n is 2  N is 4
50
K : 12
[0 -600 -100 -1200 -2000]
Finding best nN policy...

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          80.0707293s: 1 minute, 20 seconds, 70 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:47:28.732



Max rewards of (N, n) policy is -15853.193431023983  n is 3  N is 3
50
K : 12
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          81.4496952s: 1 minute, 21 seconds, 449 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:48:50.184


Max rewards of (N, n) policy is -17667.59584116318  n is 2  N is 3
50
K : 12
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m            80.46607s: 1 minute, 20 seconds, 466 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:50:10.667


Max rewards of (N, n) policy is -18995.100135717017  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          81.9178005s: 1 minute, 21 seconds, 917 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:51:32.586


50
K : 12
[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -20510.574005429502  n is 2  N is 4
50

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          80.7852216s: 1 minute, 20 seconds, 785 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:52:53.388



K : 12
[0 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -22065.24897995966  n is 2  N is 4
50
K : 12
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          81.5584005s: 1 minute, 21 seconds, 558 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:54:14.963


Max rewards of (N, n) policy is -24314.724938024534  n is 2  N is 4
50
K : 17
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          80.4289499s: 1 minute, 20 seconds, 428 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:55:35.398


Max rewards of (N, n) policy is -15844.723344427037  n is 3  N is 3
50
K : 17
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          82.1794455s: 1 minute, 22 seconds, 179 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:56:57.594


Max rewards of (N, n) policy is -17657.894217464953  n is 2  N is 3
50
K : 17

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          80.8871558s: 1 minute, 20 seconds, 887 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:58:18.483



[0 -1000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -18998.036412041438  n is 2  N is 4
50
K : 17
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          81.6214874s: 1 minute, 21 seconds, 621 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T09:59:40.106


Max rewards of (N, n) policy is -20541.428102117887  n is 2  N is 4
50
K : 17
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          81.0323607s: 1 minute, 21 seconds, 32 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:01:01.162


Max rewards of (N, n) policy is -22038.927174883414  n is 2  N is 4
50
K : 17
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          82.1219827s: 1 minute, 22 seconds, 121 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:02:23.304


Max rewards of (N, n) policy is -24305.430143335747  n is 2  N is 4
50
K : 22
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          81.1300885s: 1 minute, 21 seconds, 130 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:03:44.464


Max rewards of (N, n) policy is -15848.054674198305  n is 3  N is 3
50
K : 22
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          81.5132021s: 1 minute, 21 seconds, 513 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:05:05.980


Max rewards of (N, n) policy is -17665.55644807798  n is 2  N is 3
50
K : 22
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          80.4582665s: 1 minute, 20 seconds, 458 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:06:26.439


Max rewards of (N, n) policy is -19036.38610761897  n is 2  N is 4
50
K : 22
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          81.1519512s: 1 minute, 21 seconds, 151 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:07:47.592


Max rewards of (N, n) policy is -20548.196841983205  n is 2  N is 4
50

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          80.5381759s: 1 minute, 20 seconds, 538 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:09:08.132



K : 22
[0 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -22027.044014931012  n is 2  N is 4
50
K : 22
[0 -2400 -100 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           81.342466s: 1 minute, 21 seconds, 342 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:10:29.501


-1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -24297.452416426884  n is 2  N is 4
40
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          80.2169964s: 1 minute, 20 seconds, 216 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:11:49.724


Max rewards of (N, n) policy is -13432.268394889683  n is 2  N is 3
40
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          70.2389232s: 1 minute, 10 seconds, 238 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:12:59.964


Max rewards of (N, n) policy is -14873.89999285963  n is 2  N is 3
40
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.3320617s: 1 minute, 9 seconds, 332 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:14:09.315


Max rewards of (N, n) policy is -16297.62252410773  n is 2  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.6570669s: 1 minute, 9 seconds, 657 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:15:18.989


40
K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -19152.356649846486  n is 2  N is 3
40
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          68.8429722s: 1 minute, 8 seconds, 842 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:16:27.833


Max rewards of (N, n) policy is -21016.08688774993  n is 2  N is 4
40
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.9688705s: 1 minute, 9 seconds, 968 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:17:37.803


Max rewards of (N, n) policy is -23114.798499651784  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.6075515s: 1 minute, 9 seconds, 607 milliseconds


40
K : 6
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:18:47.435


Max rewards of (N, n) policy is -13453.567401939881  n is 2  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.6624614s: 1 minute, 9 seconds, 662 milliseconds


40
K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:19:57.115


Max rewards of (N, n) policy is -14876.248808342787  n is 2  N is 3
40
K : 6
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.9363458s: 1 minute, 9 seconds, 936 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:21:07.069


Max rewards of (N, n) policy is -16207.292035684803  n is 2  N is 4
40
K : 6
[0 -1400 -100 -1200 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.3160405s: 1 minute, 9 seconds, 316 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:22:16.401


-2000]
Finding best nN policy...
Max rewards of (N, n) policy is -17644.887700754163  n is 2  N is 4
40

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          71.4740159s: 1 minute, 11 seconds, 474 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:23:27.876



K : 6
[0 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -19053.475423516385  n is 2  N is 4
40
K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.1720183s: 1 minute, 9 seconds, 172 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:24:37.049


Max rewards of (N, n) policy is -21204.912514037056  n is 2  N is 4
40
K : 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.8633265s: 1 minute, 9 seconds, 863 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:25:46.941


[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -13449.27307679919  n is 2  N is 3
40
K : 10
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.1198564s: 1 minute, 9 seconds, 119 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:26:56.062


Max rewards of (N, n) policy is -14870.598625509718  n is 2  N is 3
40
K : 10
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.6993839s: 1 minute, 9 seconds, 699 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:28:05.784


Max rewards of (N, n) policy is -16192.32536255905  n is 2  N is 4
40
K : 10
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.2331075s: 1 minute, 9 seconds, 233 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:29:15.018


Max rewards of (N, n) policy is -17634.873323555672  n is 2  N is 4
40
K : 10
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.5274742s: 1 minute, 9 seconds, 527 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:30:24.546


Max rewards of (N, n) policy is -19057.19492318474  n is 2  N is 4
40
K : 10

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.9331413s: 1 minute, 9 seconds, 933 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:31:34.481



[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -21200.404792386762  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.0692728s: 1 minute, 9 seconds, 69 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:32:43.556


40
K : 14
[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -13440.909597751697  n is 2  N is 3
40
K : 14
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          70.1699556s: 1 minute, 10 seconds, 169 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:33:53.727


Max rewards of (N, n) policy is -14874.614666492154  n is 2  N is 3
40
K : 14
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.0944219s: 1 minute, 9 seconds, 94 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:35:02.837


Max rewards of (N, n) policy is -16216.650694304732  n is 2  N is 4
40
K : 14
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           69.976125s: 1 minute, 9 seconds, 976 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:36:12.832


Max rewards of (N, n) policy is -17640.011098754592  n is 2  N is 4
40
K : 14
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          68.7686064s: 1 minute, 8 seconds, 768 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:37:21.602


Max rewards of (N, n) policy is -19045.90262344839  n is 2  N is 4
40
K : 14
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.8660079s: 1 minute, 9 seconds, 866 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:38:31.469


Max rewards of (N, n) policy is -21266.288737222952  n is 2  N is 4
40
K : 18
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.1875886s: 1 minute, 9 seconds, 187 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:39:40.662


Max rewards of (N, n) policy is -13436.687523065746  n is 2  N is 3
40
K : 18
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.8465863s: 1 minute, 9 seconds, 846 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:40:50.510


Max rewards of (N, n) policy is -14853.60397671615  n is 2  N is 3
40
K : 18
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          70.2171269s: 1 minute, 10 seconds, 217 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:42:00.728


Max rewards of (N, n) policy is -16229.635386677757  n is 2  N is 4
40
K : 18
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          68.8983308s: 1 minute, 8 seconds, 898 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:43:09.639


Max rewards of (N, n) policy is -17651.209348685017  n is 2  N is 4
40
K : 18
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.9530062s: 1 minute, 9 seconds, 953 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:44:19.593


Max rewards of (N, n) policy is -19061.34873709715  n is 2  N is 4
40

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          68.9280134s: 1 minute, 8 seconds, 928 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:45:28.522



K : 18
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -21197.871249960463  n is 2  N is 4
30
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          69.9277465s: 1 minute, 9 seconds, 927 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:46:38.456


Max rewards of (N, n) policy is -10651.626969502497  n is 2  N is 3
30
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          57.4979184s: 57 seconds, 497 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:47:35.971


Max rewards of (N, n) policy is -11961.716918485647  n is 2  N is 3
30
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.4495957s: 58 seconds, 449 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:48:34.421


Max rewards of (N, n) policy is -13281.302593866381  n is 2  N is 3
30
K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.6152193s: 58 seconds, 615 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:49:33.037


Max rewards of (N, n) policy is -15916.31727162482  n is 2  N is 3
30
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          57.6979655s: 57 seconds, 697 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:50:30.736


Max rewards of (N, n) policy is -17478.4093998942  n is 2  N is 4
30
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.5438687s: 58 seconds, 543 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:51:29.281


Max rewards of (N, n) policy is -19465.215592819382  n is 2  N is 4
30
K : 5
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.3635172s: 58 seconds, 363 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:52:27.666


Max rewards of (N, n) policy is -10666.76912356302  n is 2  N is 3
30
K : 5
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.6416268s: 58 seconds, 641 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:53:26.309


Max rewards of (N, n) policy is -11957.799972898933  n is 2  N is 3
30
K : 5
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.3994229s: 58 seconds, 399 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:54:24.710


Max rewards of (N, n) policy is -13280.43297926168  n is 2  N is 3
30
K : 5
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           57.265354s: 57 seconds, 265 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:55:21.992


Max rewards of (N, n) policy is -14618.843170022214  n is 2  N is 4
30
K : 5
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.2006831s: 58 seconds, 200 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:56:20.193


Max rewards of (N, n) policy is -15923.327527152156  n is 2  N is 4
30
K : 5
[0 -2400 -100 -1200 -2000]
Finding best nN policy...

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.2697895s: 58 seconds, 269 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:57:18.464



Max rewards of (N, n) policy is -17939.48156347615  n is 2  N is 4
30
K : 8
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          57.2665738s: 57 seconds, 266 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:58:15.735


Max rewards of (N, n) policy is -10651.357499837459  n is 2  N is 3
30
K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.5280229s: 58 seconds, 528 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T10:59:14.265


Max rewards of (N, n) policy is -11961.552298576316  n is 2  N is 3
30
K : 8
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.6047432s: 58 seconds, 604 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:00:12.870


Max rewards of (N, n) policy is -13271.100338495597  n is 2  N is 3
30

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          57.2679048s: 57 seconds, 267 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:01:10.154



K : 8
[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -14621.548484865754  n is 2  N is 4
30
K : 8
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.4508664s: 58 seconds, 450 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:02:08.606


Max rewards of (N, n) policy is -15970.175177986532  n is 2  N is 4
30
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          57.8807148s: 57 seconds, 880 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:03:06.488


Max rewards of (N, n) policy is -17879.78088117489  n is 2  N is 4
30
K : 11
[0 -600 -100 -1200 -2000]

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.0571762s: 58 seconds, 57 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:04:04.552



Finding best nN policy...
Max rewards of (N, n) policy is -10648.080250233734  n is 2  N is 3
30
K : 11
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           58.490301s: 58 seconds, 490 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:05:03.044


Max rewards of (N, n) policy is -11971.35457483044  n is 2  N is 3
30
K : 11
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          57.4399508s: 57 seconds, 439 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:06:00.485


Max rewards of (N, n) policy is -13261.5035110625  n is 2  N is 3
30
K : 11
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.2081289s: 58 seconds, 208 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:06:58.706


Max rewards of (N, n) policy is -14600.433114089346  n is 2  N is 4
30
K : 11
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.1402538s: 58 seconds, 140 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:07:56.847


Max rewards of (N, n) policy is -15929.091080403692  n is 2  N is 4
30
K : 11
[0 -2400 -100

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          57.3777105s: 57 seconds, 377 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:08:54.226


 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -17937.178037127822  n is 2  N is 4
30
K : 14
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.3481339s: 58 seconds, 348 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:09:52.580


Max rewards of (N, n) policy is -10649.460070316472  n is 2  N is 3
30
K : 14
[0 -800 -100 -1200 -2000

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.3611718s: 58 seconds, 361 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:10:50.941


]
Finding best nN policy...
Max rewards of (N, n) policy is -11967.42538942913  n is 2  N is 3
30
K : 14
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           57.630267s: 57 seconds, 630 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:11:48.583


Max rewards of (N, n) policy is -13282.47466180126  n is 2  N is 3
30
K : 14
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.3050453s: 58 seconds, 305 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:12:46.889


Max rewards of (N, n) policy is -14606.15887209218  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          57.5072905s: 57 seconds, 507 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:13:44.413


30
K : 14
[0 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -15944.919645943219  n is 2  N is 4
30
K : 14
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.5613308s: 58 seconds, 561 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:14:42.975


Max rewards of (N, n) policy is -17906.992940140655  n is 2  N is 4
20
K : 2

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          58.7083314s: 58 seconds, 708 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:15:41.689



[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -7732.024864811136  n is 2  N is 3
20
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.4452592s: 46 seconds, 445 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:16:28.134


Max rewards of (N, n) policy is -8895.512949010585  n is 2  N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.8419463s: 46 seconds, 841 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:17:14.993


20
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -10033.652011398202  n is 2  N is 3
20
K : 2

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          47.0903902s: 47 seconds, 90 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:18:02.100



[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -12322.664019484746  n is 2  N is 3
20
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          45.8632372s: 45 seconds, 863 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:18:47.979


Max rewards of (N, n) policy is -13649.62072593429  n is 2  N is 4
20
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          47.0158862s: 47 seconds, 15 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:19:34.996


Max rewards of (N, n) policy is -15390.117654377287  n is 2  N is 4
20
K : 4
[0 -600 -100 -1200 -2000]


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          47.0158606s: 47 seconds, 15 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:20:22.032


Finding best nN policy...
Max rewards of (N, n) policy is -7755.381582725236  n is 2  N is 3
20
K : 4
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          47.0260156s: 47 seconds, 26 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:21:09.059


Max rewards of (N, n) policy is -8891.305629351848  n is 2  N is 3
20
K : 4
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.4632776s: 46 seconds, 463 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:21:55.522


Max rewards of (N, n) policy is -10022.864466767716  n is 2  N is 3
20
K : 4
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.7387558s: 46 seconds, 738 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:22:42.262


Max rewards of (N, n) policy is -11385.96231563367  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.7575321s: 46 seconds, 757 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:23:29.021


20
K : 4
[0 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -12532.582117159438  n is 2  N is 4
20

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.0485937s: 46 seconds, 48 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:24:15.092



K : 4
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -14316.193121888184  n is 2  N is 4
20
K : 6
[0 -600

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.8738879s: 46 seconds, 873 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:25:01.989


 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -7747.00329004487  n is 2  N is 3
20
K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          47.2139986s: 47 seconds, 213 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:25:49.203


Max rewards of (N, n) policy is -8889.763086191451  n is 2  N is 3
20
K : 6


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           46.591818s: 46 seconds, 591 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:26:35.812


[0 -1000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -10060.204702119934  n is 2  N is 3
20
K : 6
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.0534699s: 46 seconds, 53 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:27:21.866


Max rewards of (N, n) policy is -11358.220756132905  n is 2  N is 4
20
K : 6
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.6989788s: 46 seconds, 698 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:28:08.566


Max rewards of (N, n) policy is -12509.610871310186  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.8804628s: 46 seconds, 880 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:28:55.446


20
K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -14259.78009995701  n is 2  N is 4
20
K : 8
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           46.146842s: 46 seconds, 146 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:29:41.619


Max rewards of (N, n) policy is -7750.502011346373  n is 2  N is 3
20
K : 8


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.9537151s: 46 seconds, 953 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:30:28.595


[0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -8896.37376061644  n is 2  N is 3
20
K : 8
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          47.0729019s: 47 seconds, 72 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:31:15.668


Max rewards of (N, n) policy is -10040.677765475413  n is 2  N is 3
20
K : 8
[0 -1400

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          45.8651423s: 45 seconds, 865 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:32:01.550


 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -11386.23556189665  n is 2  N is 4
20
K : 8
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.8294579s: 46 seconds, 829 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:32:48.380


Max rewards of (N, n) policy is -12541.043951835358  n is 2  N is 4
20
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.8048525s: 46 seconds, 804 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:33:35.185


Max rewards of (N, n) policy is -14288.273537096062  n is 2  N is 4
20
K : 10
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.8085357s: 46 seconds, 808 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:34:21.999


Max rewards of (N, n) policy is -7740.552203023195  n is 2  N is 3
20
K : 10
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.0684473s: 46 seconds, 68 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:35:08.068


Max rewards of (N, n) policy is -8878.175201205942  n is 2  N is 3
20
K : 10
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          47.2407327s: 47 seconds, 240 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:35:55.335


Max rewards of (N, n) policy is -10027.617855020597  n is 2  N is 3
20
K : 10
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.9639384s: 46 seconds, 963 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:36:42.300


Max rewards of (N, n) policy is -11355.291053500227  n is 2  N is 4
20
K : 10
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m            46.18179s: 46 seconds, 181 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:37:28.507


Max rewards of (N, n) policy is -12520.118643904092  n is 2  N is 4
20
K : 10
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          46.9309805s: 46 seconds, 930 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-28T11:38:15.461


Max rewards of (N, n) policy is -14284.614387567794  n is 2  N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          47.0243495s: 47 seconds, 24 milliseconds


In [7]:
df

Row,u,K,n,s,m,f,p,mean,std,nN
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any
1,20,10,0,-2400,-100,-1200,-2000,-14284.6,1678.47,"CartesianIndex(2, 4)"
2,20,10,0,-1800,-100,-1200,-2000,-12520.1,1471.75,"CartesianIndex(2, 4)"
3,20,10,0,-1400,-100,-1200,-2000,-11355.3,1320.66,"CartesianIndex(2, 4)"
4,20,10,0,-1000,-100,-1200,-2000,-10027.6,935.315,"CartesianIndex(2, 3)"
5,20,10,0,-800,-100,-1200,-2000,-8878.18,808.533,"CartesianIndex(2, 3)"
6,20,10,0,-600,-100,-1200,-2000,-7740.55,693.263,"CartesianIndex(2, 3)"
7,20,8,0,-2400,-100,-1200,-2000,-14288.3,1686.6,"CartesianIndex(2, 4)"
8,20,8,0,-1800,-100,-1200,-2000,-12541.0,1477.94,"CartesianIndex(2, 4)"
9,20,8,0,-1400,-100,-1200,-2000,-11386.2,1354.53,"CartesianIndex(2, 4)"
10,20,8,0,-1000,-100,-1200,-2000,-10040.7,920.823,"CartesianIndex(2, 3)"


In [9]:
using CSV
CSV.write("nN_simple(case study).csv", df)

"nN_simple(case study).csv"

In [None]:
#= using Plots
 =#

In [None]:
#= plot(df[df.s.==-250,:].u,df[df.s.==-250,:].mean) =#

In [None]:
#= convert(Int64,df.mean[1]) =#

In [None]:
#= plot(df.u,df.mean.+1100.0,yticks=df.mean,xticks=df.u,ytickfontrotation=10.0)
scatter!(df.u,df.mean.+1100.0)
plot!(xscale= :identity
    
    ,yscale=:log10,ytickfontrotation=10.0)
 =#

In [None]:
#= plot(df[df.s.==-300,:].u,df[df.s.==-300,:].mean,label="nN") =#