In [1]:

using POMDPs, QuickPOMDPs, MCTS, DiscreteValueIteration, POMDPSimulators, POMDPModels, POMDPPolicies, POMDPModelTools
using Distributions, Combinatorics, StaticArrays, Statistics
using FileIO, JLD2, TickTock

## State Functions

In [2]:
function state_cnt(n, S) #n = number of units; S = number of states
    if S==1
        return 1
    end
    return binomial(n+S-1, S-1)
end

function state_index(S,s) #s = state vector
    #S = length(s)
    if S == 1
        return 1
    end
    if s[S]==0
        return state_index(S-1, s[1:(S-1)])
    end
    
    n_prev = sum(s)
    prev = state_cnt(n_prev, S-1) # count of all states with s[S]=0
    inc = prev
    for i in 1:(s[S]-1)
        inc = inc/(n_prev+S-2)*n_prev #count of all states with s[S]=i
        prev = prev + inc
        n_prev = n_prev - 1
    end
    return prev + state_index(S-1, s[1:(S-1)])
end

function state_vec(n, S, ind) # n = number of units; S = number of states; ind = state index
    if ind < 0.5 || ind > state_cnt(n, S) + 0.5
        println("index is out of range!")
        return -1
    end
    if S==1 
        return [n]
    end
    if ind < state_cnt(n, S-1) + 0.5
        return push!(state_vec(n, S-1, ind), 0)
    end
    prev = state_cnt(n, S-1)
    inc = prev
    last_state = 0
    n_prev = n
    while ind > prev + 0.5
        inc = inc/(n_prev+S-2)*n_prev #count of all sta_tes with s[S]=last_state
        prev += inc
        n_prev -= 1
        last_state += 1
    end
    return push!(state_vec(n-last_state, S-1, ind - prev + inc), last_state)
end

#using hueristic rule for rollout
mutable struct nNRollout <: Policy
    n::Int64
    N::Int64
end

mutable struct nmNRollout <: Policy
    n::Int64
    m::Int64
    N::Int64
end

mutable struct mystate
    state::Vector{Int64};
end

In [3]:
function POMDPs.action(p::nNRollout, s::mystate)
        local_a = 0;
        local_s = s.state;
        nN = sample(1:Number_level, 2, replace = true);
        p.N = maximum(nN);
        p.n = minimum(nN);
        if sum(local_s[p.N:Number_level])>=1
        local_a = p.n;
        end
        return local_a; 
end

function POMDPs.action(p::nmNRollout, s::mystate)
    local_a = 0;
    local_s = s.state;
    nmN = sample(1:Number_level, 3, replace = true);
    p.N = maximum(nmN);
    p.n = minimum(nmN);
    p.m = sum(nmN)-p.N-p.n;
    if sum(local_s[p.N:Number_level])>=1 || sum(local_s[p.m:Number_level])>=2
        local_a = p.n;
    end
    return local_a; 
end

In [4]:
function findNn()
    println("Finding best nN policy...")

for N in 2:(Number_level)
    for n in 1:N
#         println("N = ",N, "  n = ",n);
        Threads.@threads  for j in 1:repetition
            history = sim(multiunit2,max_steps=simsteps,initialstate=mystate(state_vec(NumberUnits, Number_level, 1))) do s
            local_s = s.state;
            local_a = 0;
                if sum(local_s[N:Number_level])>=1 
                local_a = n;
                end
                return local_a; 
            end
            temp_rewards[j] = discounted_reward(history);
        end
        rewards_nN[n,N] = mean(temp_rewards);
        rewards_nN_std[n,N] = std(temp_rewards);
        
    end
end
(max_rward,nN) = findmax(rewards_nN);
println("Max rewards of (N, n) policy is ", max_rward, "  n is ", nN[1], "  N is ",nN[2]);
return  max_rward,rewards_nN_std[nN] ,nN;
end


findNn (generic function with 1 method)

In [5]:
using DataFrames
df= DataFrame(u=[],K=[],n=[],s=[],m=[],f=[],p=[],mean=[],std=[],nN=[])

cost=[ [0	-200	-100	-1200 -2000],
    [0	-400	-100	-1200 -2000],
    [0	-600	-100	-1200 -2000],
    [0	-800	-100	-1200 -2000],
    [0	-1000	-100	-1200 -2000],
    [0	-1200	-100	-1200 -2000],
    [0	-1400	-100	-1200 -2000],
    [0	-1600	-100	-1200 -2000],
    [0	-1800	-100	-1200 -2000],
    [0	-2000	-100	-1200 -2000],
    [0  -2400   -100    -1200 -2000],
    [0  -2800   -100    -1200 -2000],
    [0  -3200   -100    -1200 -2000]
] 



for units in [60,50,40,30,25,20]
    for K in 2:2:convert(Int64, floor((units+1)/2))
        
    global NumberUnits=convert(Int64,units)
        global limit=convert(Int64,K)
global Number_level = 10;
#action_level = 0;
#Transition_matrix = [0.86 0.14 0.0 0.0; 0.0 0.83 0.17 0.0;0.0 0.0 0.89 0.11;1.0 0.0 0.0 0.0];
#fullname = "C:/Users/yongchen/OneDrive - University of Iowa/Codes/Julia/ReinforcementLearning/tm6.jld2"
#fullname = "C:/Users/yongchen/Dropbox/reinforcement learning for maintenance/Matlab/MultipleUnits/tm6.jld2";
global fullname = "tm10.jld2";
global Transition_matrix = load(fullname,"transition_matrix");



global state_number = state_cnt(NumberUnits,Number_level);
# global arrayofstates = Vector{mystate}(undef, state_number);

# Threads.@threads for i in 1:state_number
#     global arrayofstates[i]  = mystate(state_vec(NumberUnits, Number_level, i));
# end

global crd = Array{Categorical}(undef,Number_level)
for i in 1:Number_level
    global crd[i] = Categorical(Transition_matrix[i,:]);
end
    
    
    
global multiunit2 = QuickMDP(
    gen = function (s, a, rng)       #s is a vector of number units at each level and a is the number of units we will repair
        local_s = s.state;
        # println(local_s)
        # based on s, create a status vector corresponding to each unit
        degradation_state = repeat(1:1,NumberUnits);
        k = 1;
        for i in 1:Number_level
            for j in 1:local_s[i]
                degradation_state[k]=i;
                k = k+1;
            end
        end
        r = 0.0;
        number_reset=0
        prevent_repair = false;
        if a!=0
        number_reset = sum(local_s[a:Number_level]);
        end
        #using a for loop to compute next state for each unit
        if local_s[Number_level] >= limit && a==0
                        r+=system_penalty
                    number_reset=local_s[Number_level]
                    end 
        if local_s[Number_level] >= limit && a!=0
                        r+=system_penalty
                     number_reset = sum(local_s[a:Number_level]);
                    end 
        for i in 1:(NumberUnits-number_reset)  #a is the number of units we want to preventively repair
            #in this loop, all units continues
                    if degradation_state[i] != Number_level
            degradation_state[i] = rand(crd[degradation_state[i]]);
            r = r+normal_operation;
                    end
            # if degradation_state[i] == Number_level
            #     r+=system_penalty/limit
            # end
        end
        
        for i in (NumberUnits-number_reset+1):NumberUnits
                if degradation_state[i] == Number_level
                    r = r + failure_penalty  ;
                if prevent_repair == false
                    r = r+setup_cost;
                    prevent_repair = true;
                            end    
                    else
                    r = r + maintenance_penalty;
                if prevent_repair == false
                    r = r+setup_cost;
                    prevent_repair = true;
                end
                    end
            degradation_state[i] = rand(crd[1]); #reset status; add additional transition
            r = r+normal_operation; #add operation benefit
         end
        #collect degradation state to form the state
        sp = repeat(0:0,Number_level);
        for i in 1:NumberUnits
            sp[degradation_state[i]] = sp[degradation_state[i]]+1;
        end
        return (sp=mystate(sp), r=r)
    end,
    actions = 0:(Number_level-1), 
    actiontype = function()
        return Int64;
    end,
#     states = arrayofstates,
    initialstate = function()
        POMDPModelTools.ImplicitDistribution() do rng
            return (mystate(state_vec(NumberUnits, Number_level, 1)))
        end
    end, #all u #all units start fresh. Need to change according to unit number and level number. ##For simulation, we need to use ImplicitDistribution
    discount = 0.95,
    isterminal = false              # no ending
    )


    
    
    
for c in cost
        tick()
   println(units)  
             print("K : ")
                    println(K)
    global normal_operation,setup_cost,maintenance_penalty,failure_penalty,system_penalty=c
     println(c)     
   global  simsteps = 100;
global repetition = 10000;
global rewards_nN = zeros(Number_level,Number_level);
global rewards_nN = rewards_nN.+(-100000000.0);
global rewards_nN_std = zeros(Number_level,Number_level);
global rewards_nN_std = rewards_nN_std.+(-100000000.0);

global discount_factor =0.95
global temp_rewards = zeros(repetition,1);
rewards,std, nN=findNn()
pushfirst!(df,[units,limit,normal_operation,setup_cost,maintenance_penalty,failure_penalty,system_penalty,rewards,std, nN])
        tock()   
    end
 
    
        end   
    end

60
K : 2

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T09:40:32.042



[0 -200 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -50460.11309353139  n is 7  N is 7
60
K : 2
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         335.5007896s: 5 minutes, 35 seconds, 500 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T09:46:07.690


Max rewards of (N, n) policy is -54242.808237289035  n is 7  N is 8
60
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         332.6027159s: 5 minutes, 32 seconds, 602 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T09:51:40.299


Max rewards of (N, n) policy is -58064.80888574195  n is 7  N is 8
60
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         332.8951362s: 5 minutes, 32 seconds, 895 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T09:57:13.201


Max rewards of (N, n) policy is -61862.69440961362  n is 7  N is 8
60
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         334.1861103s: 5 minutes, 34 seconds, 186 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T10:02:47.395


Max rewards of (N, n) policy is -65594.63063971937  n is 7  N is 8
60
K : 2
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         333.3786323s: 5 minutes, 33 seconds, 378 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T10:08:20.780


Max rewards of (N, n) policy is -69406.45770298988  n is 7  N is 7
60
K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         331.2144557s: 5 minutes, 31 seconds, 214 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T10:13:52.001


Max rewards of (N, n) policy is -73195.78600578444  n is 7  N is 7
60
K : 2
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          333.002236s: 5 minutes, 33 seconds, 2 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T10:19:25.010


Max rewards of (N, n) policy is -76898.03274022693  n is 7  N is 8
60
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         331.3282163s: 5 minutes, 31 seconds, 328 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T10:24:56.345


Max rewards of (N, n) policy is -80694.19068889678  n is 7  N is 8
60
K : 2
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         332.3382604s: 5 minutes, 32 seconds, 338 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T10:30:28.690


Max rewards of (N, n) policy is -84245.35134854092  n is 7  N is 9
60
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         334.0108857s: 5 minutes, 34 seconds, 10 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T10:36:02.708


Max rewards of (N, n) policy is -91307.75056411793  n is 7  N is 9
60
K : 2
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         333.5780985s: 5 minutes, 33 seconds, 578 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T10:41:36.293


Max rewards of (N, n) policy is -98325.19117014672  n is 7  N is 9
60
K : 2
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         332.1537632s: 5 minutes, 32 seconds, 153 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T10:47:08.453


Max rewards of (N, n) policy is -104110.13744333263  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         332.1274683s: 5 minutes, 32 seconds, 127 milliseconds


60
K : 4
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T10:52:43.975


Max rewards of (N, n) policy is -47118.558631135485  n is 7  N is 7
60
K : 4
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         332.1131683s: 5 minutes, 32 seconds, 113 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T10:58:16.106


Max rewards of (N, n) policy is -50900.08982217614  n is 7  N is 7
60
K : 4
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         332.2682214s: 5 minutes, 32 seconds, 268 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T11:03:48.380


Max rewards of (N, n) policy is -54696.74065524194  n is 7  N is 8
60
K : 4
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         332.8932375s: 5 minutes, 32 seconds, 893 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T11:09:21.280


Max rewards of (N, n) policy is -58449.662639261056  n is 7  N is 7
60
K : 4
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         331.9495814s: 5 minutes, 31 seconds, 949 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T11:14:53.237


Max rewards of (N, n) policy is -62234.60553818021  n is 7  N is 8
60
K : 4
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          332.125331s: 5 minutes, 32 seconds, 125 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T11:20:25.368


Max rewards of (N, n) policy is -65998.05487398429  n is 7  N is 9
60
K : 4
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         332.5332548s: 5 minutes, 32 seconds, 533 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T11:25:57.909


Max rewards of (N, n) policy is -69590.73174068832  n is 7  N is 9
60
K : 4
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         332.7152923s: 5 minutes, 32 seconds, 715 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T11:31:30.630


Max rewards of (N, n) policy is -73005.53684972365  n is 7  N is 9
60
K : 4
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         333.9895048s: 5 minutes, 33 seconds, 989 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T11:37:04.627


Max rewards of (N, n) policy is -76584.19966412835  n is 7  N is 9
60
K : 4
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         331.8918198s: 5 minutes, 31 seconds, 891 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T11:42:36.525


Max rewards of (N, n) policy is -80024.95492891222  n is 7  N is 9
60
K : 4
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         333.3290746s: 5 minutes, 33 seconds, 329 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T11:48:09.861


Max rewards of (N, n) policy is -87149.30774989323  n is 7  N is 9
60
K : 4
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         332.7048909s: 5 minutes, 32 seconds, 704 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T11:53:42.584


Max rewards of (N, n) policy is -92377.96381541967  n is 5  N is 10
60
K : 4
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         333.4971916s: 5 minutes, 33 seconds, 497 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T11:59:16.088


Max rewards of (N, n) policy is -96353.02560407305  n is 5  N is 10
60
K : 6
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         333.8099702s: 5 minutes, 33 seconds, 809 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T12:04:53.634


Max rewards of (N, n) policy is -47067.27558779857  n is 7  N is 8
60
K : 6
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         321.8265728s: 5 minutes, 21 seconds, 826 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T12:10:15.467


Max rewards of (N, n) policy is -50852.70986759571  n is 7  N is 8


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.7307801s: 4 minutes, 55 seconds, 730 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T12:15:11.204


60
K : 6
[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -54626.74019849715  n is 7  N is 7
60
K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.5024527s: 4 minutes, 55 seconds, 502 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T12:20:06.712


Max rewards of (N, n) policy is -58381.90629478707  n is 7  N is 8
60
K : 6
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.2004622s: 4 minutes, 55 seconds, 200 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T12:25:01.930


Max rewards of (N, n) policy is -62159.500316358135  n is 7  N is 8
60
K : 6
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.1247523s: 4 minutes, 55 seconds, 124 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T12:29:57.072


Max rewards of (N, n) policy is -65709.09966332628  n is 7  N is 9
60
K : 6
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          296.091989s: 4 minutes, 56 seconds, 91 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T12:34:53.170


Max rewards of (N, n) policy is -69190.08571133783  n is 7  N is 9
60
K : 6
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.1244593s: 4 minutes, 56 seconds, 124 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T12:39:49.300


Max rewards of (N, n) policy is -72757.55773912703  n is 7  N is 9
60
K : 6
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         294.5376548s: 4 minutes, 54 seconds, 537 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T12:44:43.843


Max rewards of (N, n) policy is -76194.42758778606  n is 7  N is 9
60
K : 6
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         294.9159783s: 4 minutes, 54 seconds, 915 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T12:49:38.777


Max rewards of (N, n) policy is -79685.98076265218  n is 7  N is 9
60
K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.3072666s: 4 minutes, 55 seconds, 307 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T12:54:34.089


Max rewards of (N, n) policy is -85585.24445415128  n is 6  N is 10
60
K : 6
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         294.7647122s: 4 minutes, 54 seconds, 764 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T12:59:28.861


Max rewards of (N, n) policy is -89796.46926423826  n is 6  N is 10
60
K : 6
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.9495375s: 4 minutes, 55 seconds, 949 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T13:04:24.816


Max rewards of (N, n) policy is -93999.75630297515  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         297.4575069s: 4 minutes, 57 seconds, 457 milliseconds


60
K : 8
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T13:09:26.014


Max rewards of (N, n) policy is -47052.218950426686  n is 7  N is 8
60
K : 8
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.5395514s: 4 minutes, 55 seconds, 539 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T13:14:21.560


Max rewards of (N, n) policy is -50805.16787992858  n is 7  N is 8
60
K : 8
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.8037595s: 4 minutes, 56 seconds, 803 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T13:19:18.369


Max rewards of (N, n) policy is -54643.34768787501  n is 7  N is 7
60
K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.9514925s: 4 minutes, 55 seconds, 951 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T13:24:14.326


Max rewards of (N, n) policy is -58356.41386073494  n is 7  N is 7
60
K : 8
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.8376667s: 4 minutes, 55 seconds, 837 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T13:29:10.171


Max rewards of (N, n) policy is -62150.26172182089  n is 7  N is 7
60
K : 8
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.5598658s: 4 minutes, 55 seconds, 559 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T13:34:05.736


Max rewards of (N, n) policy is -65642.22036407086  n is 7  N is 9
60
K : 8
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          295.850876s: 4 minutes, 55 seconds, 850 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T13:39:01.604


Max rewards of (N, n) policy is -69165.23389724312  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.9922446s: 4 minutes, 55 seconds, 992 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T13:43:57.602


60
K : 8
[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -72677.70556592346  n is 7  N is 9
60
K : 8
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.6266916s: 4 minutes, 55 seconds, 626 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T13:48:53.234


Max rewards of (N, n) policy is -76201.31022470216  n is 7  N is 9
60
K : 8
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.0757285s: 4 minutes, 56 seconds, 75 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T13:53:49.317


Max rewards of (N, n) policy is -79606.32149842074  n is 7  N is 9
60
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.3825011s: 4 minutes, 56 seconds, 382 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T13:58:45.705


Max rewards of (N, n) policy is -84815.21308184885  n is 6  N is 10
60
K : 8
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.3227708s: 4 minutes, 56 seconds, 322 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T14:03:42.033


Max rewards of (N, n) policy is -89103.22932765972  n is 6  N is 10
60
K : 8
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.4265474s: 4 minutes, 56 seconds, 426 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T14:08:38.465


Max rewards of (N, n) policy is -93280.37922913743  n is 6  N is 10
60
K : 10
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.6263233s: 4 minutes, 55 seconds, 626 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T14:13:37.346


Max rewards of (N, n) policy is -47063.93890478802  n is 7  N is 8
60
K : 10
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.0421787s: 4 minutes, 56 seconds, 42 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T14:18:33.406


Max rewards of (N, n) policy is -50820.98576657064  n is 7  N is 8
60
K : 10
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.4083192s: 4 minutes, 55 seconds, 408 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T14:23:28.820


Max rewards of (N, n) policy is -54646.75377507429  n is 7  N is 8
60
K : 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.8192636s: 4 minutes, 56 seconds, 819 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T14:28:25.656


10
[0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -58376.85356857796  n is 7  N is 8
60
K : 10
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.3859186s: 4 minutes, 56 seconds, 385 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T14:33:22.047


Max rewards of (N, n) policy is -62108.36722455506  n is 7  N is 9
60
K : 10
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.7326496s: 4 minutes, 56 seconds, 732 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T14:38:18.786


Max rewards of (N, n) policy is -65685.91467760879  n is 7  N is 9
60
K : 10
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.1021433s: 4 minutes, 56 seconds, 102 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T14:43:14.894


Max rewards of (N, n) policy is -69154.38330998682  n is 7  N is 9
60
K : 10
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.5252056s: 4 minutes, 56 seconds, 525 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T14:48:11.425


Max rewards of (N, n) policy is -72676.41211860398  n is 7  N is 9
60
K : 10
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.7469173s: 4 minutes, 56 seconds, 746 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T14:53:08.190


Max rewards of (N, n) policy is -76134.96409266371  n is 7  N is 9
60
K : 10
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.8062966s: 4 minutes, 56 seconds, 806 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T14:58:05.001


Max rewards of (N, n) policy is -79642.19534575519  n is 7  N is 9
60
K : 10
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.2895972s: 4 minutes, 56 seconds, 289 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T15:03:01.297


Max rewards of (N, n) policy is -84713.45205733807  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.9981946s: 4 minutes, 56 seconds, 998 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T15:07:58.302


60
K : 10
[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -88876.98442324827  n is 6  N is 10
60
K : 10
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.1594297s: 4 minutes, 56 seconds, 159 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T15:12:54.477


Max rewards of (N, n) policy is -93013.72394238069  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.7005817s: 4 minutes, 55 seconds, 700 milliseconds


60
K : 12
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T15:17:53.536


Max rewards of (N, n) policy is -47066.846314455805  n is 7  N is 7
60
K : 12
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.5738487s: 4 minutes, 56 seconds, 573 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T15:22:50.116


Max rewards of (N, n) policy is -50827.32056286035  n is 7  N is 8
60
K : 12
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.0410221s: 4 minutes, 56 seconds, 41 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T15:27:46.163


Max rewards of (N, n) policy is -54601.03694158105  n is 7  N is 8


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           296.27026s: 4 minutes, 56 seconds, 270 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T15:32:42.439


60
K : 12
[0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -58384.794323570466  n is 7  N is 8
60
K : 12
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.3054597s: 4 minutes, 56 seconds, 305 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T15:37:38.751


Max rewards of (N, n) policy is -62135.609931298015  n is 7  N is 8
60
K : 12
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         297.0128074s: 4 minutes, 57 seconds, 12 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T15:42:35.770


Max rewards of (N, n) policy is -65648.90616799655  n is 7  N is 9
60
K : 12
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.9379641s: 4 minutes, 55 seconds, 937 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T15:47:31.714


Max rewards of (N, n) policy is -69143.65105448483  n is 7  N is 9
60
K : 12
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.7863144s: 4 minutes, 56 seconds, 786 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T15:52:28.516


Max rewards of (N, n) policy is -72574.52089241688  n is 7  N is 9
60
K : 12
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.3665732s: 4 minutes, 56 seconds, 366 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T15:57:24.888


Max rewards of (N, n) policy is -76165.81916096325  n is 7  N is 9
60
K : 12
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.3856019s: 4 minutes, 56 seconds, 385 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T16:02:21.292


Max rewards of (N, n) policy is -79652.98414018228  n is 7  N is 9
60
K : 12
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          296.883431s: 4 minutes, 56 seconds, 883 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T16:07:18.181


Max rewards of (N, n) policy is -84665.44517686631  n is 6  N is 10
60
K : 12
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.1317179s: 4 minutes, 56 seconds, 131 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T16:12:14.319


Max rewards of (N, n) policy is -88878.68082988671  n is 6  N is 10
60
K : 12
[0

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         297.0948394s: 4 minutes, 57 seconds, 94 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T16:17:11.419


 -3200 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -92972.65132658827  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         297.1530581s: 4 minutes, 57 seconds, 153 milliseconds


60
K : 14
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T16:22:12.318


Max rewards of (N, n) policy is -47040.60030677999  n is 7  N is 7
60
K : 14
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.8900177s: 4 minutes, 55 seconds, 890 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T16:27:08.213


Max rewards of (N, n) policy is -50823.545315253636  n is 7  N is 8
60
K : 14
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         295.2340104s: 4 minutes, 55 seconds, 234 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T16:32:03.454


Max rewards of (N, n) policy is -54632.832507704916  n is 7  N is 7
60
K : 14
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.4318883s: 4 minutes, 56 seconds, 431 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T16:36:59.892


Max rewards of (N, n) policy is -58379.85448735714  n is 7  N is 7
60
K : 14
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         296.1172821s: 4 minutes, 56 seconds, 117 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T16:41:56.015


Max rewards of (N, n) policy is -62144.719336766444  n is 7  N is 7
60
K : 14
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         290.8397069s: 4 minutes, 50 seconds, 839 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T16:46:46.860


Max rewards of (N, n) policy is -65647.16608664293  n is 7  N is 9
60
K : 14
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         277.0072942s: 4 minutes, 37 seconds, 7 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T16:51:23.874


Max rewards of (N, n) policy is -69112.5144904243  n is 7  N is 9
60
K : 14
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         276.8570464s: 4 minutes, 36 seconds, 857 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T16:56:00.736


Max rewards of (N, n) policy is -72663.89004956566  n is 7  N is 9
60
K : 14
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         276.6524889s: 4 minutes, 36 seconds, 652 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T17:00:37.395


Max rewards of (N, n) policy is -76169.93515517715  n is 7  N is 9
60
K : 14
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         276.9013292s: 4 minutes, 36 seconds, 901 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T17:05:14.301


Max rewards of (N, n) policy is -79638.6703437546  n is 7  N is 9
60
K : 14
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         277.0936058s: 4 minutes, 37 seconds, 93 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T17:09:51.400


Max rewards of (N, n) policy is -84619.59422673371  n is 6  N is 10
60
K : 14
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         276.9037072s: 4 minutes, 36 seconds, 903 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T17:14:28.310


Max rewards of (N, n) policy is -88825.97292986682  n is 6  N is 10
60
K : 14
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         277.7068919s: 4 minutes, 37 seconds, 706 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T17:19:06.023


Max rewards of (N, n) policy is -93055.24430241756  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         277.5494622s: 4 minutes, 37 seconds, 549 milliseconds


60
K : 16
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T17:23:46.787


Max rewards of (N, n) policy is -47064.41676733624  n is 7  N is 7
60
K : 16
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         278.0520889s: 4 minutes, 38 seconds, 52 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T17:28:24.845


Max rewards of (N, n) policy is -50861.81132477862  n is 7  N is 8
60
K : 16
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         278.0080677s: 4 minutes, 38 seconds, 8 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T17:33:02.860


Max rewards of (N, n) policy is -54614.57057236898  n is 7  N is 7
60
K : 16
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          278.971841s: 4 minutes, 38 seconds, 971 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T17:37:41.837


Max rewards of (N, n) policy is -58378.00306369893  n is 7  N is 7
60
K : 16
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         277.7800294s: 4 minutes, 37 seconds, 780 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T17:42:19.623


Max rewards of (N, n) policy is -62169.00460434742  n is 7  N is 8
60
K : 16
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         278.5518601s: 4 minutes, 38 seconds, 551 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T17:46:58.180


Max rewards of (N, n) policy is -65743.63172243454  n is 7  N is 9
60
K : 16
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         278.4683353s: 4 minutes, 38 seconds, 468 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T17:51:36.655


Max rewards of (N, n) policy is -69152.47627563596  n is 7  N is 9
60
K : 16
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         278.6873592s: 4 minutes, 38 seconds, 687 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T17:56:15.347


Max rewards of (N, n) policy is -72636.70831893575  n is 7  N is 9
60
K : 16
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         278.5813919s: 4 minutes, 38 seconds, 581 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T18:00:53.934


Max rewards of (N, n) policy is -76222.98070842895  n is 7  N is 9
60
K : 16
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.1963145s: 4 minutes, 39 seconds, 196 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T18:05:33.136


Max rewards of (N, n) policy is -79673.985214779  n is 7  N is 9
60
K : 16
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.2448025s: 4 minutes, 39 seconds, 244 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T18:10:12.388


Max rewards of (N, n) policy is -84659.60216718144  n is 6  N is 10
60
K : 16
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.1367798s: 4 minutes, 39 seconds, 136 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T18:14:51.541


Max rewards of (N, n) policy is -88863.29211856425  n is 6  N is 10
60
K : 16
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         278.9526616s: 4 minutes, 38 seconds, 952 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T18:19:30.498


Max rewards of (N, n) policy is -93062.01938429954  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.1509033s: 4 minutes, 39 seconds, 150 milliseconds


60
K : 18
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T18:24:13.223


Max rewards of (N, n) policy is -47075.793306969616  n is 7  N is 8
60
K : 18
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.1339672s: 4 minutes, 39 seconds, 133 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T18:28:52.363


Max rewards of (N, n) policy is -50805.80455283313  n is 7  N is 8
60
K : 18
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         278.3276401s: 4 minutes, 38 seconds, 327 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T18:33:30.696


Max rewards of (N, n) policy is -54629.94259104011  n is 7  N is 7
60
K : 18
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.4135372s: 4 minutes, 39 seconds, 413 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T18:38:10.116


Max rewards of (N, n) policy is -58405.97452837808  n is 7  N is 8
60
K : 18
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.3412099s: 4 minutes, 39 seconds, 341 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T18:42:49.463


Max rewards of (N, n) policy is -62090.91795004665  n is 7  N is 8
60
K : 18
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         278.8848827s: 4 minutes, 38 seconds, 884 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T18:47:28.353


Max rewards of (N, n) policy is -65688.92868101096  n is 7  N is 9
60
K : 18
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          279.259249s: 4 minutes, 39 seconds, 259 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T18:52:07.618


Max rewards of (N, n) policy is -69086.52300292857  n is 7  N is 9
60
K : 18
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         278.9280685s: 4 minutes, 38 seconds, 928 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T18:56:46.552


Max rewards of (N, n) policy is -72693.18559491723  n is 7  N is 9
60
K : 18
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.1763513s: 4 minutes, 39 seconds, 176 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T19:01:25.734


Max rewards of (N, n) policy is -76200.62967966481  n is 7  N is 9
60
K : 18
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.3959389s: 4 minutes, 39 seconds, 395 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T19:06:05.147


Max rewards of (N, n) policy is -79661.31047575474  n is 7  N is 9
60
K : 18
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.6902301s: 4 minutes, 39 seconds, 690 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T19:10:44.854


Max rewards of (N, n) policy is -84570.68900529068  n is 6  N is 10
60
K : 18
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.4005915s: 4 minutes, 39 seconds, 400 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T19:15:24.259


Max rewards of (N, n) policy is -88834.39513590878  n is 6  N is 10
60
K : 18
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          279.563308s: 4 minutes, 39 seconds, 563 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T19:20:03.829


Max rewards of (N, n) policy is -93019.59941136325  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.2844463s: 4 minutes, 40 seconds, 284 milliseconds


60
K : 20
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T19:24:47.328


Max rewards of (N, n) policy is -47040.305132517526  n is 7  N is 7
60
K : 20
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.9820376s: 4 minutes, 39 seconds, 982 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T19:29:27.316


Max rewards of (N, n) policy is -50825.122761811755  n is 7  N is 8
60
K : 20
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.5433067s: 4 minutes, 40 seconds, 543 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T19:34:07.865


Max rewards of (N, n) policy is -54592.53868270643  n is 7  N is 8
60
K : 20
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.1128788s: 4 minutes, 40 seconds, 112 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T19:38:47.994


Max rewards of (N, n) policy is -58383.07518389089  n is 7  N is 8
60
K : 20
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.7793562s: 4 minutes, 39 seconds, 779 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T19:43:27.779


Max rewards of (N, n) policy is -62132.31613415272  n is 7  N is 9
60
K : 20
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.0118501s: 4 minutes, 40 seconds, 11 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T19:48:07.796


Max rewards of (N, n) policy is -65674.42075905815  n is 7  N is 9
60
K : 20
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.5553414s: 4 minutes, 40 seconds, 555 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T19:52:48.358


Max rewards of (N, n) policy is -69109.42799341612  n is 7  N is 9
60
K : 20
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.7410229s: 4 minutes, 40 seconds, 741 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T19:57:29.104


Max rewards of (N, n) policy is -72641.0255414707  n is 7  N is 9
60
K : 20
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.6821187s: 4 minutes, 40 seconds, 682 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T20:02:09.792


Max rewards of (N, n) policy is -76153.04909860819  n is 7  N is 9
60
K : 20
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          280.986736s: 4 minutes, 40 seconds, 986 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T20:06:50.785


Max rewards of (N, n) policy is -79654.98984474296  n is 7  N is 9
60
K : 20
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.9515756s: 4 minutes, 40 seconds, 951 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T20:11:31.742


Max rewards of (N, n) policy is -84613.8654764131  n is 6  N is 10
60
K : 20
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.7262688s: 4 minutes, 40 seconds, 726 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T20:16:12.484


Max rewards of (N, n) policy is -88889.35343374335  n is 6  N is 10
60
K : 20
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.8384799s: 4 minutes, 40 seconds, 838 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T20:20:53.328


Max rewards of (N, n) policy is -92956.17090214742  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         281.7406435s: 4 minutes, 41 seconds, 740 milliseconds


60
K : 22
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T20:25:38.273


Max rewards of (N, n) policy is -47062.931280995166  n is 7  N is 8
60
K : 22
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          281.418447s: 4 minutes, 41 seconds, 418 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T20:30:19.698


Max rewards of (N, n) policy is -50804.53243168115  n is 7  N is 7
60
K : 22
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         281.6046143s: 4 minutes, 41 seconds, 604 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T20:35:01.308


Max rewards of (N, n) policy is -54604.69777561495  n is 7  N is 8
60
K : 22
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         281.9104358s: 4 minutes, 41 seconds, 910 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T20:39:43.225


Max rewards of (N, n) policy is -58388.95437712798  n is 7  N is 8
60
K : 22
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         281.5947771s: 4 minutes, 41 seconds, 594 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T20:44:24.825


Max rewards of (N, n) policy is -62138.85933486115  n is 7  N is 9
60
K : 22
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         282.0299303s: 4 minutes, 42 seconds, 29 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T20:49:06.861


Max rewards of (N, n) policy is -65649.45555197909  n is 7  N is 9
60
K : 22
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         281.7833501s: 4 minutes, 41 seconds, 783 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T20:53:48.650


Max rewards of (N, n) policy is -69167.428290225  n is 7  N is 9
60
K : 22
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         281.8692356s: 4 minutes, 41 seconds, 869 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T20:58:30.525


Max rewards of (N, n) policy is -72627.56032046721  n is 7  N is 9
60
K : 22
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          282.141887s: 4 minutes, 42 seconds, 141 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T21:03:12.672


Max rewards of (N, n) policy is -76149.43548111792  n is 7  N is 9
60
K : 22
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          282.102129s: 4 minutes, 42 seconds, 102 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T21:07:54.780


Max rewards of (N, n) policy is -79636.9599930937  n is 7  N is 9
60
K : 22
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         282.0744198s: 4 minutes, 42 seconds, 74 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T21:12:36.872


Max rewards of (N, n) policy is -84614.89877207705  n is 6  N is 10
60
K : 22
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         282.0656296s: 4 minutes, 42 seconds, 65 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T21:17:18.954


Max rewards of (N, n) policy is -88799.16702442121  n is 6  N is 10
60
K : 22
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         282.3431664s: 4 minutes, 42 seconds, 343 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T21:22:01.302


Max rewards of (N, n) policy is -92969.33547100736  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         282.6662308s: 4 minutes, 42 seconds, 666 milliseconds


60
K : 24
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T21:26:47.243


Max rewards of (N, n) policy is -47089.824719286305  n is 7  N is 7
60
K : 24
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         282.4000997s: 4 minutes, 42 seconds, 400 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T21:31:29.649


Max rewards of (N, n) policy is -50821.63641104391  n is 7  N is 7
60
K : 24
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         282.3962304s: 4 minutes, 42 seconds, 396 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T21:36:12.051


Max rewards of (N, n) policy is -54647.34505061697  n is 7  N is 7
60
K : 24
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         282.5714841s: 4 minutes, 42 seconds, 571 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T21:40:54.629


Max rewards of (N, n) policy is -58371.84715387652  n is 7  N is 8
60
K : 24
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         282.5983025s: 4 minutes, 42 seconds, 598 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T21:45:37.233


Max rewards of (N, n) policy is -62155.86771449391  n is 7  N is 8
60
K : 24
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         282.5238523s: 4 minutes, 42 seconds, 523 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T21:50:19.762


Max rewards of (N, n) policy is -65683.15390897031  n is 7  N is 9
60
K : 24
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         282.9723804s: 4 minutes, 42 seconds, 972 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T21:55:02.740


Max rewards of (N, n) policy is -69138.61198654502  n is 7  N is 9
60
K : 24
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         282.6466344s: 4 minutes, 42 seconds, 646 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T21:59:45.393


Max rewards of (N, n) policy is -72701.58199810085  n is 7  N is 9
60
K : 24
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         283.4176707s: 4 minutes, 43 seconds, 417 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T22:04:28.816


Max rewards of (N, n) policy is -76141.1540564584  n is 7  N is 9
60
K : 24
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         283.3371435s: 4 minutes, 43 seconds, 337 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T22:09:12.160


Max rewards of (N, n) policy is -79604.40283380069  n is 7  N is 9
60
K : 24
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         283.3269358s: 4 minutes, 43 seconds, 326 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T22:13:55.492


Max rewards of (N, n) policy is -84643.22492095553  n is 6  N is 10
60
K : 24
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         283.4455623s: 4 minutes, 43 seconds, 445 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T22:18:38.954


Max rewards of (N, n) policy is -88790.38600286373  n is 6  N is 10
60
K : 24
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          283.849618s: 4 minutes, 43 seconds, 849 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T22:23:22.809


Max rewards of (N, n) policy is -93003.59271728728  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         283.6973116s: 4 minutes, 43 seconds, 697 milliseconds


60
K : 26
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T22:28:09.873


Max rewards of (N, n) policy is -47029.528445295706  n is 7  N is 7
60
K : 26
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         284.0024817s: 4 minutes, 44 seconds, 2 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T22:32:53.882


Max rewards of (N, n) policy is -50837.42677134537  n is 7  N is 8
60
K : 26
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         283.5169196s: 4 minutes, 43 seconds, 516 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T22:37:37.405


Max rewards of (N, n) policy is -54553.58894584595  n is 7  N is 8
60
K : 26
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         285.8309278s: 4 minutes, 45 seconds, 830 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T22:42:23.240


Max rewards of (N, n) policy is -58402.91598718923  n is 7  N is 7
60
K : 26
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         284.0308636s: 4 minutes, 44 seconds, 30 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T22:47:07.278


Max rewards of (N, n) policy is -62125.19540923813  n is 7  N is 9
60
K : 26
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         283.9267817s: 4 minutes, 43 seconds, 926 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T22:51:51.210


Max rewards of (N, n) policy is -65653.97512867584  n is 7  N is 9
60
K : 26
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         284.5289608s: 4 minutes, 44 seconds, 528 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T22:56:35.745


Max rewards of (N, n) policy is -69191.99103001888  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         284.0868493s: 4 minutes, 44 seconds, 86 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T23:01:19.849


60
K : 26
[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -72607.85579046873  n is 7  N is 9
60
K : 26
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         284.1945342s: 4 minutes, 44 seconds, 194 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T23:06:04.060


Max rewards of (N, n) policy is -76164.83810063908  n is 7  N is 9
60
K : 26
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         285.1368154s: 4 minutes, 45 seconds, 136 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T23:10:49.202


Max rewards of (N, n) policy is -79642.23637327133  n is 7  N is 9
60
K : 26
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         284.1957633s: 4 minutes, 44 seconds, 195 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T23:15:33.403


Max rewards of (N, n) policy is -84696.48683596775  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         284.1094232s: 4 minutes, 44 seconds, 109 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T23:20:17.519


60
K : 26
[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -88756.26752710016  n is 6  N is 10
60
K : 26
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         285.3974788s: 4 minutes, 45 seconds, 397 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T23:25:02.923


Max rewards of (N, n) policy is -92974.81356331088  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         284.6997571s: 4 minutes, 44 seconds, 699 milliseconds


60
K : 28
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T23:29:50.744


Max rewards of (N, n) policy is -47059.69050441137  n is 7  N is 7
60
K : 28
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         284.8669782s: 4 minutes, 44 seconds, 866 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T23:34:35.617


Max rewards of (N, n) policy is -50826.29648683538  n is 7  N is 8
60
K : 28


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         285.7174039s: 4 minutes, 45 seconds, 717 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T23:39:21.340


[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -54593.39396938396  n is 7  N is 8
60
K : 28
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         285.2769997s: 4 minutes, 45 seconds, 276 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T23:44:06.623


Max rewards of (N, n) policy is -58406.54784998157  n is 7  N is 8
60
K : 28
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         285.5118762s: 4 minutes, 45 seconds, 511 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T23:48:52.140


Max rewards of (N, n) policy is -62163.23667492523  n is 7  N is 9
60
K : 28
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         285.0126761s: 4 minutes, 45 seconds, 12 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T23:53:37.159


Max rewards of (N, n) policy is -65600.85947531219  n is 7  N is 9
60
K : 28
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         286.2807571s: 4 minutes, 46 seconds, 280 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-13T23:58:23.446


Max rewards of (N, n) policy is -69192.61443255613  n is 7  N is 9
60
K : 28
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         285.8265258s: 4 minutes, 45 seconds, 826 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T00:03:09.278


Max rewards of (N, n) policy is -72657.45836481923  n is 7  N is 9
60
K : 28
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         285.5297071s: 4 minutes, 45 seconds, 529 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T00:07:54.814


Max rewards of (N, n) policy is -76171.66035657004  n is 7  N is 9
60
K : 28
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         286.4821772s: 4 minutes, 46 seconds, 482 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T00:12:41.301


Max rewards of (N, n) policy is -79638.73164883807  n is 7  N is 9
60
K : 28
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         285.8613328s: 4 minutes, 45 seconds, 861 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T00:17:27.168


Max rewards of (N, n) policy is -84597.8903565529  n is 6  N is 10
60
K : 28
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         285.4620408s: 4 minutes, 45 seconds, 462 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T00:22:12.637


Max rewards of (N, n) policy is -88832.09830768805  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         286.4095342s: 4 minutes, 46 seconds, 409 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T00:26:59.052


60
K : 28
[0 -3200 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -93081.71944965748  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         286.2310477s: 4 minutes, 46 seconds, 231 milliseconds


60
K : 30
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T00:31:48.671


Max rewards of (N, n) policy is -47091.78142717083  n is 7  N is 8
60
K : 30
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         286.1621252s: 4 minutes, 46 seconds, 162 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T00:36:34.839


Max rewards of (N, n) policy is -50815.37037551805  n is 7  N is 7
60
K : 30
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         286.9039749s: 4 minutes, 46 seconds, 903 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T00:41:21.748


Max rewards of (N, n) policy is -54569.49361402825  n is 7  N is 8
60
K : 30
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         285.1774629s: 4 minutes, 45 seconds, 177 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T00:46:06.932


Max rewards of (N, n) policy is -58364.82772390647  n is 7  N is 7
60
K : 30
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         285.7664986s: 4 minutes, 45 seconds, 766 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T00:50:52.704


Max rewards of (N, n) policy is -62151.90370362177  n is 7  N is 7
60
K : 30
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         286.7335095s: 4 minutes, 46 seconds, 733 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T00:55:39.444


Max rewards of (N, n) policy is -65686.26130367292  n is 7  N is 9
60
K : 30
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         286.3160809s: 4 minutes, 46 seconds, 316 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T01:00:25.766


Max rewards of (N, n) policy is -69096.29423142075  n is 7  N is 9
60
K : 30
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         286.2302584s: 4 minutes, 46 seconds, 230 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T01:05:12.002


Max rewards of (N, n) policy is -72705.77125551426  n is 7  N is 9
60
K : 30
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         286.4277731s: 4 minutes, 46 seconds, 427 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T01:09:58.436


Max rewards of (N, n) policy is -76139.31403856378  n is 7  N is 9
60
K : 30
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         287.0003474s: 4 minutes, 47 seconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T01:14:45.442


Max rewards of (N, n) policy is -79610.08226186647  n is 7  N is 9
60
K : 30
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         286.2978743s: 4 minutes, 46 seconds, 297 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T01:19:31.747


Max rewards of (N, n) policy is -84568.79157637022  n is 6  N is 10
60
K : 30
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         286.1305998s: 4 minutes, 46 seconds, 130 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T01:24:17.894


Max rewards of (N, n) policy is -88878.93425833798  n is 6  N is 10
60
K : 30
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         287.1039218s: 4 minutes, 47 seconds, 103 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T01:29:05.003


Max rewards of (N, n) policy is -93033.00376649662  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         286.5298811s: 4 minutes, 46 seconds, 529 milliseconds


50
K : 2
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T01:33:54.714


Max rewards of (N, n) policy is -42325.51754300238  n is 7  N is 7
50
K : 2
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         251.3643775s: 4 minutes, 11 seconds, 364 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T01:38:06.095


Max rewards of (N, n) policy is -46135.3768262899  n is 7  N is 8
50
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         252.1356394s: 4 minutes, 12 seconds, 135 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T01:42:18.235


Max rewards of (N, n) policy is -49969.88141573602  n is 7  N is 8
50
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         251.7119404s: 4 minutes, 11 seconds, 711 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T01:46:29.963


Max rewards of (N, n) policy is -53634.5142222858  n is 7  N is 8
50
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         251.6673839s: 4 minutes, 11 seconds, 667 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T01:50:41.636


Max rewards of (N, n) policy is -57482.68446924057  n is 7  N is 8
50
K : 2
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         251.2208213s: 4 minutes, 11 seconds, 220 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T01:54:52.861


Max rewards of (N, n) policy is -61180.030961698736  n is 7  N is 8
50
K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         252.6820509s: 4 minutes, 12 seconds, 682 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T01:59:05.559


Max rewards of (N, n) policy is -64970.82787028985  n is 7  N is 8
50
K : 2
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         251.8982388s: 4 minutes, 11 seconds, 898 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T02:03:17.462


Max rewards of (N, n) policy is -68730.04339228883  n is 7  N is 7
50
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         251.4678744s: 4 minutes, 11 seconds, 467 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T02:07:28.936


Max rewards of (N, n) policy is -72217.35074159219  n is 7  N is 9
50
K : 2
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         252.3142772s: 4 minutes, 12 seconds, 314 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T02:11:41.255


Max rewards of (N, n) policy is -75615.87106464611  n is 7  N is 9
50
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         252.3319423s: 4 minutes, 12 seconds, 331 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T02:15:53.592


Max rewards of (N, n) policy is -82339.31438654836  n is 7  N is 9
50
K : 2
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          251.999331s: 4 minutes, 11 seconds, 999 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T02:20:05.607


Max rewards of (N, n) policy is -88123.19923670564  n is 5  N is 9
50
K : 2
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         251.9788024s: 4 minutes, 11 seconds, 978 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T02:24:17.591


Max rewards of (N, n) policy is -92361.6645329409  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         253.6509792s: 4 minutes, 13 seconds, 650 milliseconds


50
K : 4
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T02:28:34.493


Max rewards of (N, n) policy is -39894.839785249445  n is 7  N is 8
50
K : 4
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         252.3796371s: 4 minutes, 12 seconds, 379 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T02:32:46.878


Max rewards of (N, n) policy is -43645.5569623596  n is 7  N is 8


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         251.8750608s: 4 minutes, 11 seconds, 875 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T02:36:58.758


50
K : 4
[0 -600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -47420.80595926339  n is 7  N is 7
50
K : 4
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         253.1151544s: 4 minutes, 13 seconds, 115 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T02:41:11.879


Max rewards of (N, n) policy is -51144.516022278185  n is 7  N is 8
50
K : 4
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         252.4547288s: 4 minutes, 12 seconds, 454 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T02:45:24.338


Max rewards of (N, n) policy is -54959.67176949922  n is 7  N is 8
50
K : 4
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         252.1455622s: 4 minutes, 12 seconds, 145 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T02:49:36.489


Max rewards of (N, n) policy is -58404.82116557772  n is 7  N is 9
50
K : 4
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         252.3421803s: 4 minutes, 12 seconds, 342 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T02:53:48.836


Max rewards of (N, n) policy is -61784.136182230664  n is 7  N is 9
50
K : 4
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         252.9926187s: 4 minutes, 12 seconds, 992 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T02:58:01.834


Max rewards of (N, n) policy is -65131.11691936802  n is 7  N is 9
50
K : 4
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         252.5977308s: 4 minutes, 12 seconds, 597 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T03:02:14.437


Max rewards of (N, n) policy is -68533.6413396647  n is 7  N is 9
50
K : 4
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         252.9192733s: 4 minutes, 12 seconds, 919 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T03:06:27.362


Max rewards of (N, n) policy is -71933.18874881892  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         252.8439815s: 4 minutes, 12 seconds, 843 milliseconds


50
K : 4
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T03:10:40.221


Max rewards of (N, n) policy is -77587.20489207272  n is 5  N is 10
50
K : 4
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         253.3247046s: 4 minutes, 13 seconds, 324 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T03:14:53.551


Max rewards of (N, n) policy is -81265.68299367065  n is 5  N is 10
50
K : 4
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         252.9723652s: 4 minutes, 12 seconds, 972 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T03:19:06.529


Max rewards of (N, n) policy is -85150.96189109216  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         253.0795147s: 4 minutes, 13 seconds, 79 milliseconds


50
K : 6
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T03:23:22.792


Max rewards of (N, n) policy is -39871.2163210086  n is 7  N is 7
50
K : 6
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         253.9232664s: 4 minutes, 13 seconds, 923 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T03:27:36.721


Max rewards of (N, n) policy is -43638.03858866535  n is 7  N is 8
50
K : 6
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          253.625508s: 4 minutes, 13 seconds, 625 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T03:31:50.351


Max rewards of (N, n) policy is -47393.31900628798  n is 7  N is 7
50
K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           253.82796s: 4 minutes, 13 seconds, 827 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T03:36:04.185


Max rewards of (N, n) policy is -51141.65837108309  n is 7  N is 8
50
K : 6
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          253.110415s: 4 minutes, 13 seconds, 110 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T03:40:17.301


Max rewards of (N, n) policy is -54725.74026887552  n is 7  N is 9
50
K : 6
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         254.8900363s: 4 minutes, 14 seconds, 890 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T03:44:32.196


Max rewards of (N, n) policy is -58100.923393920384  n is 7  N is 9
50
K : 6
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         253.6153527s: 4 minutes, 13 seconds, 615 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T03:48:45.816


Max rewards of (N, n) policy is -61480.05580658891  n is 7  N is 9
50
K : 6
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         253.6433857s: 4 minutes, 13 seconds, 643 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T03:52:59.475


Max rewards of (N, n) policy is -64835.74378141785  n is 7  N is 9
50
K : 6
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         254.4006119s: 4 minutes, 14 seconds, 400 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T03:57:13.881


Max rewards of (N, n) policy is -68265.82381771748  n is 7  N is 9
50
K : 6
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         253.8988601s: 4 minutes, 13 seconds, 898 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:01:27.785


Max rewards of (N, n) policy is -71448.14900752736  n is 6  N is 10
50
K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         254.1277162s: 4 minutes, 14 seconds, 127 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:05:41.918


Max rewards of (N, n) policy is -75390.21178866315  n is 6  N is 10
50
K : 6
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         253.9437844s: 4 minutes, 13 seconds, 943 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:09:55.866


Max rewards of (N, n) policy is -79362.29566139032  n is 5  N is 10
50
K : 6
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         254.7478125s: 4 minutes, 14 seconds, 747 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:14:10.620


Max rewards of (N, n) policy is -83233.80925538635  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         254.3364107s: 4 minutes, 14 seconds, 336 milliseconds


50
K : 8
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:18:28.203


Max rewards of (N, n) policy is -39850.065192545546  n is 7  N is 7
50
K : 8
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.8034567s: 3 minutes, 57 seconds, 803 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:22:26.011


Max rewards of (N, n) policy is -43625.59763495609  n is 7  N is 8
50
K : 8
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.6458246s: 3 minutes, 57 seconds, 645 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:26:23.662


Max rewards of (N, n) policy is -47365.68738584659  n is 7  N is 8
50
K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.4440973s: 3 minutes, 58 seconds, 444 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:30:22.111


Max rewards of (N, n) policy is -51137.94455793545  n is 7  N is 8
50
K : 8
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.7333706s: 3 minutes, 57 seconds, 733 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:34:19.849


Max rewards of (N, n) policy is -54704.55681496396  n is 7  N is 9
50
K : 8
[0 -1200 -100 -1200

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.2618856s: 3 minutes, 58 seconds, 261 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:38:18.117


 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -58067.79157101678  n is 7  N is 9
50
K : 8
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.0302898s: 3 minutes, 58 seconds, 30 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:42:16.151


Max rewards of (N, n) policy is -61474.445913319636  n is 7  N is 9
50
K : 8
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.7336473s: 3 minutes, 57 seconds, 733 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:46:13.891


Max rewards of (N, n) policy is -64831.70092698637  n is 7  N is 9
50
K : 8
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.9518634s: 3 minutes, 57 seconds, 951 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:50:11.858


Max rewards of (N, n) policy is -68182.83052597726  n is 7  N is 9
50
K : 8
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.4457751s: 3 minutes, 57 seconds, 445 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:54:09.309


Max rewards of (N, n) policy is -70874.37655648119  n is 6  N is 10
50
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         236.8674929s: 3 minutes, 56 seconds, 867 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T04:58:06.181


Max rewards of (N, n) policy is -74811.4079565003  n is 6  N is 10
50
K : 8
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.7945692s: 3 minutes, 57 seconds, 794 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:02:03.981


Max rewards of (N, n) policy is -78802.28425888029  n is 5  N is 10
50
K : 8
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.7981683s: 3 minutes, 57 seconds, 798 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:06:01.784


Max rewards of (N, n) policy is -82541.72676092232  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.3756945s: 3 minutes, 57 seconds, 375 milliseconds


50
K : 10
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:10:02.636


Max rewards of (N, n) policy is -39841.05748562963  n is 7  N is 8
50
K : 10
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.0743314s: 3 minutes, 58 seconds, 74 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:14:00.715


Max rewards of (N, n) policy is -43603.80906658408  n is 7  N is 7
50
K : 10
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.1897916s: 3 minutes, 58 seconds, 189 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:17:58.911


Max rewards of (N, n) policy is -47357.11587048817  n is 7  N is 8
50
K : 10
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.0873961s: 3 minutes, 57 seconds, 87 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:21:56.003


Max rewards of (N, n) policy is -51175.08090144811  n is 7  N is 8
50
K : 10
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.7407592s: 3 minutes, 57 seconds, 740 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:25:53.749


Max rewards of (N, n) policy is -54694.501243321785  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.9998603s: 3 minutes, 57 seconds, 999 milliseconds


50
K : 10
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:29:51.765


Max rewards of (N, n) policy is -58164.03693053528  n is 7  N is 9
50
K : 10
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.7236683s: 3 minutes, 57 seconds, 723 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:33:49.493


Max rewards of (N, n) policy is -61502.19278204198  n is 7  N is 9
50
K : 10
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.4357383s: 3 minutes, 57 seconds, 435 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:37:46.934


Max rewards of (N, n) policy is -64800.57593321858  n is 7  N is 9
50
K : 10
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.5004323s: 3 minutes, 57 seconds, 500 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:41:44.439


Max rewards of (N, n) policy is -68223.81722400931  n is 7  N is 9
50
K : 10
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.4078136s: 3 minutes, 58 seconds, 407 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:45:42.852


Max rewards of (N, n) policy is -70670.57821103749  n is 6  N is 10
50
K : 10
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.2552425s: 3 minutes, 57 seconds, 255 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:49:40.113


Max rewards of (N, n) policy is -74788.82575858393  n is 6  N is 10
50
K : 10
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.7462952s: 3 minutes, 57 seconds, 746 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:53:37.864


Max rewards of (N, n) policy is -78683.76335603186  n is 6  N is 10
50
K : 10
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.7557305s: 3 minutes, 57 seconds, 755 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T05:57:35.624


Max rewards of (N, n) policy is -82444.694818263  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.9292712s: 3 minutes, 57 seconds, 929 milliseconds


50
K : 12
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:01:37.032


Max rewards of (N, n) policy is -39873.353219335084  n is 7  N is 8
50
K : 12
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           237.51736s: 3 minutes, 57 seconds, 517 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:05:34.555


Max rewards of (N, n) policy is -43618.616198352516  n is 7  N is 7
50
K : 12
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          237.689547s: 3 minutes, 57 seconds, 689 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:09:32.249


Max rewards of (N, n) policy is -47397.67729471708  n is 7  N is 8
50
K : 12
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.9377456s: 3 minutes, 57 seconds, 937 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:13:30.192


Max rewards of (N, n) policy is -51149.94653793583  n is 7  N is 8
50
K : 12
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.0559268s: 3 minutes, 57 seconds, 55 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:17:27.253


Max rewards of (N, n) policy is -54731.54002568688  n is 7  N is 9
50
K : 12
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.9730181s: 3 minutes, 57 seconds, 973 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:21:25.231


Max rewards of (N, n) policy is -58019.93974217887  n is 7  N is 9
50
K : 12
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          237.004179s: 3 minutes, 57 seconds, 4 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:25:22.240


Max rewards of (N, n) policy is -61425.039204739616  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.6293437s: 3 minutes, 57 seconds, 629 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:29:19.875


50
K : 12
[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -64806.28087064407  n is 7  N is 9
50
K : 12
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.7035025s: 3 minutes, 57 seconds, 703 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:33:17.583


Max rewards of (N, n) policy is -68230.00290263408  n is 7  N is 9
50
K : 12
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.5182878s: 3 minutes, 57 seconds, 518 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:37:15.106


Max rewards of (N, n) policy is -70703.22615471589  n is 6  N is 10
50
K : 12
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.4206504s: 3 minutes, 58 seconds, 420 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:41:13.532


Max rewards of (N, n) policy is -74660.41172897964  n is 6  N is 10
50
K : 12
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         236.8604094s: 3 minutes, 56 seconds, 860 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:45:10.397


Max rewards of (N, n) policy is -78592.48009992258  n is 5  N is 10
50
K : 12
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.2359851s: 3 minutes, 57 seconds, 235 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:49:07.638


Max rewards of (N, n) policy is -82415.16456884047  n is 5  N is 10
50
K : 14
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.8063358s: 3 minutes, 57 seconds, 806 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:53:08.805


Max rewards of (N, n) policy is -39828.22601674146  n is 7  N is 7
50
K : 14
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.0178211s: 3 minutes, 57 seconds, 17 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T06:57:05.838


Max rewards of (N, n) policy is -43631.74800753762  n is 7  N is 7
50
K : 14
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.7058266s: 3 minutes, 57 seconds, 705 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T07:01:03.548


Max rewards of (N, n) policy is -47412.0137638866  n is 7  N is 7
50
K : 14
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         236.8398367s: 3 minutes, 56 seconds, 839 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T07:05:00.394


Max rewards of (N, n) policy is -51187.77006874844  n is 7  N is 8
50
K : 14
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.5851601s: 3 minutes, 57 seconds, 585 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T07:08:57.983


Max rewards of (N, n) policy is -54643.41389697844  n is 7  N is 9
50
K : 14
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.3223558s: 3 minutes, 57 seconds, 322 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T07:12:55.322


Max rewards of (N, n) policy is -58026.65682914566  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.1252478s: 3 minutes, 57 seconds, 125 milliseconds


50
K : 14
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T07:16:52.463


Max rewards of (N, n) policy is -61431.76008427309  n is 7  N is 9
50
K : 14
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.3008152s: 3 minutes, 57 seconds, 300 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T07:20:49.768


Max rewards of (N, n) policy is -64823.35659809338  n is 7  N is 9
50
K : 14
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.3815274s: 3 minutes, 57 seconds, 381 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T07:24:47.155


Max rewards of (N, n) policy is -68193.75314273855  n is 7  N is 9
50
K : 14
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         236.9283175s: 3 minutes, 56 seconds, 928 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T07:28:44.099


Max rewards of (N, n) policy is -70744.79263587957  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         250.9262927s: 4 minutes, 10 seconds, 926 milliseconds


50
K : 14
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T07:32:55.041


Max rewards of (N, n) policy is -74642.41486797229  n is 6  N is 10
50
K : 14
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         345.1645219s: 5 minutes, 45 seconds, 164 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T07:38:40.213


Max rewards of (N, n) policy is -78604.60822605663  n is 5  N is 10
50
K : 14
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         345.4663805s: 5 minutes, 45 seconds, 466 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T07:44:25.686


Max rewards of (N, n) policy is -82456.31167186325  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         342.8800524s: 5 minutes, 42 seconds, 880 milliseconds


50
K : 16
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T07:50:11.793


Max rewards of (N, n) policy is -39867.30557853431  n is 7  N is 8
50
K : 16
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         323.8604496s: 5 minutes, 23 seconds, 860 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T07:55:35.671


Max rewards of (N, n) policy is -43625.19404219421  n is 7  N is 8
50
K : 16
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         322.2204997s: 5 minutes, 22 seconds, 220 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T08:00:57.908


Max rewards of (N, n) policy is -47398.374357727196  n is 7  N is 8
50
K : 16
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         322.7977298s: 5 minutes, 22 seconds, 797 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T08:06:20.712


Max rewards of (N, n) policy is -51151.97062208364  n is 7  N is 8
50
K : 16
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         323.9063065s: 5 minutes, 23 seconds, 906 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T08:11:44.626


Max rewards of (N, n) policy is -54721.97767067927  n is 7  N is 9
50
K : 16
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         323.0466491s: 5 minutes, 23 seconds, 46 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T08:17:07.678


Max rewards of (N, n) policy is -58047.46310601835  n is 7  N is 9
50
K : 16
[0 -1400 -100 -1200 -2000]


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         321.9045668s: 5 minutes, 21 seconds, 904 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T08:22:29.601


Finding best nN policy...
Max rewards of (N, n) policy is -61434.88265877802  n is 7  N is 9
50
K : 16
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         322.2338401s: 5 minutes, 22 seconds, 233 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T08:27:51.841


Max rewards of (N, n) policy is -64785.59716456994  n is 7  N is 9
50
K : 16
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         322.6955593s: 5 minutes, 22 seconds, 695 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T08:33:14.543


Max rewards of (N, n) policy is -68205.28160450619  n is 7  N is 9
50
K : 16
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         322.4510617s: 5 minutes, 22 seconds, 451 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T08:38:37.001


Max rewards of (N, n) policy is -70692.434087274  n is 6  N is 10
50
K : 16
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         323.2095393s: 5 minutes, 23 seconds, 209 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T08:44:00.228


Max rewards of (N, n) policy is -74715.11351404453  n is 6  N is 10
50
K : 16
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         323.0298653s: 5 minutes, 23 seconds, 29 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T08:49:23.276


Max rewards of (N, n) policy is -78671.83813765425  n is 6  N is 10
50
K : 16
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         322.4810054s: 5 minutes, 22 seconds, 481 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T08:54:45.763


Max rewards of (N, n) policy is -82430.48154534478  n is 5  N is 10
50
K : 18
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          322.391698s: 5 minutes, 22 seconds, 391 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T09:00:11.471


Max rewards of (N, n) policy is -39870.19096841056  n is 7  N is 8
50
K : 18
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         322.3448169s: 5 minutes, 22 seconds, 344 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T09:05:33.833


Max rewards of (N, n) policy is -43644.5580295923  n is 7  N is 8
50
K : 18
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          321.933197s: 5 minutes, 21 seconds, 933 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T09:10:55.784


Max rewards of (N, n) policy is -47381.598661018426  n is 7  N is 8
50
K : 18
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         325.8152848s: 5 minutes, 25 seconds, 815 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T09:16:21.606


Max rewards of (N, n) policy is -51154.4027390796  n is 7  N is 8
50
K : 18
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         319.3861586s: 5 minutes, 19 seconds, 386 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T09:21:40.999


Max rewards of (N, n) policy is -54702.50539729352  n is 7  N is 9
50
K : 18
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         322.1170283s: 5 minutes, 22 seconds, 117 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T09:27:03.122


Max rewards of (N, n) policy is -58087.64719891305  n is 7  N is 9
50
K : 18
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         323.1083947s: 5 minutes, 23 seconds, 108 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T09:32:26.237


Max rewards of (N, n) policy is -61449.00353628522  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          321.318322s: 5 minutes, 21 seconds, 318 milliseconds


50
K : 18
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T09:37:47.573


Max rewards of (N, n) policy is -64813.272952543295  n is 7  N is 9
50
K : 18
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         322.9057925s: 5 minutes, 22 seconds, 905 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T09:43:10.484


Max rewards of (N, n) policy is -68198.54031805843  n is 7  N is 9
50
K : 18
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         319.1206119s: 5 minutes, 19 seconds, 120 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T09:48:29.612


Max rewards of (N, n) policy is -70710.84069359636  n is 6  N is 10
50
K : 18
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         317.6016435s: 5 minutes, 17 seconds, 601 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T09:53:47.231


Max rewards of (N, n) policy is -74669.05062927605  n is 6  N is 10
50
K : 18
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         321.9546375s: 5 minutes, 21 seconds, 954 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T09:59:09.192


Max rewards of (N, n) policy is -78601.95219035463  n is 5  N is 10
50
K : 18
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         319.8125291s: 5 minutes, 19 seconds, 812 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T10:04:29.011


Max rewards of (N, n) policy is -82410.22437071991  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         299.0729128s: 4 minutes, 59 seconds, 72 milliseconds


50
K : 20
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T10:09:31.383


Max rewards of (N, n) policy is -39828.42958479724  n is 7  N is 8
50
K : 20
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         281.9260086s: 4 minutes, 41 seconds, 926 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T10:14:13.325


Max rewards of (N, n) policy is -43635.1983937969  n is 7  N is 7
50
K : 20
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.5112212s: 4 minutes, 40 seconds, 511 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T10:18:53.841


Max rewards of (N, n) policy is -47375.82804216827  n is 7  N is 8
50
K : 20
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         281.5774603s: 4 minutes, 41 seconds, 577 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T10:23:35.436


Max rewards of (N, n) policy is -51144.84900724419  n is 7  N is 8
50
K : 20
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         281.5362059s: 4 minutes, 41 seconds, 536 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T10:28:16.977


Max rewards of (N, n) policy is -54673.600490782555  n is 7  N is 9
50
K : 20
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         282.1219928s: 4 minutes, 42 seconds, 121 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T10:32:59.106


Max rewards of (N, n) policy is -58072.41540862097  n is 7  N is 9
50
K : 20
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         281.0740381s: 4 minutes, 41 seconds, 74 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T10:37:40.186


Max rewards of (N, n) policy is -61444.968969166875  n is 7  N is 9
50
K : 20
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          281.178852s: 4 minutes, 41 seconds, 178 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T10:42:21.381


Max rewards of (N, n) policy is -64836.9455242205  n is 7  N is 9
50
K : 20
[0 -1800 -100 -1200 -2000]
Finding best nN policy...

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          281.779308s: 4 minutes, 41 seconds, 779 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T10:47:03.167



Max rewards of (N, n) policy is -68235.62808194115  n is 7  N is 9
50
K : 20
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         281.1885048s: 4 minutes, 41 seconds, 188 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T10:51:44.361


Max rewards of (N, n) policy is -70728.27618291185  n is 6  N is 10
50
K : 20
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.6605144s: 4 minutes, 40 seconds, 660 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T10:56:25.038


Max rewards of (N, n) policy is -74733.74832068403  n is 6  N is 10
50
K : 20
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.5611622s: 4 minutes, 40 seconds, 561 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T11:01:05.605


Max rewards of (N, n) policy is -78604.21594383361  n is 5  N is 10
50
K : 20
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         282.7680976s: 4 minutes, 42 seconds, 768 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T11:05:48.379


Max rewards of (N, n) policy is -82458.32364935018  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         281.2348673s: 4 minutes, 41 seconds, 234 milliseconds


50
K : 22
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T11:10:33.211


Max rewards of (N, n) policy is -39858.15269251125  n is 7  N is 7
50
K : 22
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          279.222525s: 4 minutes, 39 seconds, 222 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T11:15:12.439


Max rewards of (N, n) policy is -43594.23769188137  n is 7  N is 7
50
K : 22
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.6484867s: 4 minutes, 39 seconds, 648 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T11:19:52.094


Max rewards of (N, n) policy is -47401.987714720424  n is 7  N is 8
50
K : 22
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.3121947s: 4 minutes, 39 seconds, 312 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T11:24:31.412


Max rewards of (N, n) policy is -51141.42482038358  n is 7  N is 8
50
K : 22
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.3649252s: 4 minutes, 40 seconds, 364 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T11:29:11.793


Max rewards of (N, n) policy is -54675.51961737089  n is 7  N is 9
50
K : 22
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.6069257s: 4 minutes, 40 seconds, 606 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T11:33:52.405


Max rewards of (N, n) policy is -58074.29674782599  n is 7  N is 9
50
K : 22
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.1911553s: 4 minutes, 40 seconds, 191 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T11:38:32.603


Max rewards of (N, n) policy is -61435.09916222  n is 7  N is 9
50
K : 22
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.1935585s: 4 minutes, 39 seconds, 193 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T11:43:11.801


Max rewards of (N, n) policy is -64800.05268364415  n is 7  N is 9
50
K : 22
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.6466796s: 4 minutes, 40 seconds, 646 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T11:47:52.454


Max rewards of (N, n) policy is -68242.60880539176  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.9716733s: 4 minutes, 40 seconds, 971 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T11:52:33.431


50
K : 22
[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -70750.35989784198  n is 6  N is 10
50
K : 22
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.8622657s: 4 minutes, 39 seconds, 862 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T11:57:13.300


Max rewards of (N, n) policy is -74618.45805442684  n is 6  N is 10
50
K : 22
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.0718342s: 4 minutes, 40 seconds, 71 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T12:01:53.378


Max rewards of (N, n) policy is -78671.84655974223  n is 6  N is 10
50
K : 22
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.7642233s: 4 minutes, 39 seconds, 764 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T12:06:33.147


Max rewards of (N, n) policy is -82420.13297348647  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.9869998s: 4 minutes, 40 seconds, 986 milliseconds


50
K : 24
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T12:11:17.400


Max rewards of (N, n) policy is -39849.138588724396  n is 7  N is 8
50
K : 24
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.5590116s: 4 minutes, 40 seconds, 559 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T12:15:57.975


Max rewards of (N, n) policy is -43587.68965432371  n is 7  N is 8
50
K : 24
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          281.412275s: 4 minutes, 41 seconds, 412 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T12:20:39.393


Max rewards of (N, n) policy is -47396.22910074707  n is 7  N is 8
50
K : 24
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         281.0748172s: 4 minutes, 41 seconds, 74 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T12:25:20.474


Max rewards of (N, n) policy is -51150.34524651489  n is 7  N is 8
50
K : 24
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         281.9265986s: 4 minutes, 41 seconds, 926 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T12:30:02.406


Max rewards of (N, n) policy is -54708.72714540572  n is 7  N is 9
50
K : 24
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.9198028s: 4 minutes, 39 seconds, 919 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T12:34:42.332


Max rewards of (N, n) policy is -58055.807480307  n is 7  N is 9
50
K : 24
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.7022266s: 4 minutes, 40 seconds, 702 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T12:39:23.040


Max rewards of (N, n) policy is -61443.661979443554  n is 7  N is 9
50
K : 24
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.1447119s: 4 minutes, 40 seconds, 144 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T12:44:03.201


Max rewards of (N, n) policy is -64817.82594460092  n is 7  N is 9
50
K : 24
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.0456687s: 4 minutes, 40 seconds, 45 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T12:48:43.252


Max rewards of (N, n) policy is -68248.58317918115  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          280.326468s: 4 minutes, 40 seconds, 326 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T12:53:23.585


50
K : 24
[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -70756.0468957696  n is 6  N is 10
50
K : 24
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          280.233563s: 4 minutes, 40 seconds, 233 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T12:58:03.824


Max rewards of (N, n) policy is -74694.2564650951  n is 6  N is 10
50
K : 24
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.6063779s: 4 minutes, 40 seconds, 606 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T13:02:44.437


Max rewards of (N, n) policy is -78677.85983312933  n is 6  N is 10
50
K : 24
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         279.6447042s: 4 minutes, 39 seconds, 644 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T13:07:24.087


Max rewards of (N, n) policy is -82460.6175576423  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         280.3235078s: 4 minutes, 40 seconds, 323 milliseconds


40
K : 2
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T13:12:08.088


Max rewards of (N, n) policy is -34344.266671314566  n is 7  N is 8
40
K : 2
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.1618286s: 3 minutes, 59 seconds, 161 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T13:16:07.254


Max rewards of (N, n) policy is -38092.01949331392  n is 7  N is 7
40
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.4988905s: 4 minutes, 498 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T13:20:07.759


Max rewards of (N, n) policy is -41853.15824574214  n is 7  N is 7
40
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.2407705s: 4 minutes, 240 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T13:24:08.004


Max rewards of (N, n) policy is -45622.758242970755  n is 7  N is 8


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.7422461s: 3 minutes, 59 seconds, 742 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T13:28:07.751


40
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -49417.099649686425  n is 7  N is 8
40
K : 2
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         241.4989795s: 4 minutes, 1 second, 498 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T13:32:09.256


Max rewards of (N, n) policy is -53146.871071072215  n is 7  N is 8
40
K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.3867627s: 4 minutes, 386 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T13:36:09.648


Max rewards of (N, n) policy is -56823.978239096294  n is 7  N is 8
40
K : 2
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         243.0029654s: 4 minutes, 3 seconds, 2 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T13:40:12.656


Max rewards of (N, n) policy is -60004.734068185804  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.6599008s: 4 minutes, 659 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T13:44:13.321


40
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -62994.55637487618  n is 6  N is 9
40
K : 2
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          239.392608s: 3 minutes, 59 seconds, 392 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T13:48:12.718


Max rewards of (N, n) policy is -65749.83111300519  n is 5  N is 9
40
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          240.436857s: 4 minutes, 436 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T13:52:13.160


Max rewards of (N, n) policy is -70786.67358998567  n is 5  N is 9
40
K : 2
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          238.927485s: 3 minutes, 58 seconds, 927 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T13:56:12.092


Max rewards of (N, n) policy is -75887.3923587014  n is 5  N is 9
40
K : 2
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.9487346s: 3 minutes, 58 seconds, 948 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:00:11.057


Max rewards of (N, n) policy is -80105.66639469902  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         241.2458909s: 4 minutes, 1 second, 245 milliseconds


40
K : 4
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:04:15.628


Max rewards of (N, n) policy is -32640.39877195029  n is 7  N is 8
40
K : 4
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.7608906s: 3 minutes, 59 seconds, 760 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:08:15.405


Max rewards of (N, n) policy is -36400.59060827333  n is 7  N is 7
40
K : 4
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.0848781s: 4 minutes, 84 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:12:15.495


Max rewards of (N, n) policy is -40141.32388750361  n is 7  N is 8
40
K : 4
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.1809298s: 3 minutes, 59 seconds, 180 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:16:14.681


Max rewards of (N, n) policy is -43945.29623551352  n is 7  N is 8
40
K : 4
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          240.553076s: 4 minutes, 553 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:20:15.239


Max rewards of (N, n) policy is -47174.088822115686  n is 7  N is 9
40
K : 4
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.7670808s: 3 minutes, 59 seconds, 767 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:24:15.011


Max rewards of (N, n) policy is -50374.216344598426  n is 7  N is 9
40
K : 4
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.3511652s: 3 minutes, 59 seconds, 351 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:28:14.367


Max rewards of (N, n) policy is -53560.54279953064  n is 7  N is 9
40
K : 4
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.5454668s: 3 minutes, 58 seconds, 545 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:32:12.917


Max rewards of (N, n) policy is -56776.305522032824  n is 7  N is 9
40
K : 4
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          239.790453s: 3 minutes, 59 seconds, 790 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:36:12.713


Max rewards of (N, n) policy is -59775.93403129381  n is 6  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         241.4267027s: 4 minutes, 1 second, 426 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:40:14.156


40
K : 4
[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -62462.152756664014  n is 5  N is 9
40
K : 4
[

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.7675775s: 3 minutes, 58 seconds, 767 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:44:12.928


0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -66373.28220775063  n is 5  N is 10
40
K : 4
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.6620813s: 3 minutes, 59 seconds, 662 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:48:12.606


Max rewards of (N, n) policy is -69986.2629621582  n is 5  N is 10
40
K : 4
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.8575377s: 3 minutes, 58 seconds, 857 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:52:11.480


Max rewards of (N, n) policy is -73532.13265682902  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.6801092s: 3 minutes, 59 seconds, 680 milliseconds


40
K : 6
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T14:56:14.417


Max rewards of (N, n) policy is -32652.352618985868  n is 7  N is 8
40
K : 6
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.1508738s: 4 minutes, 150 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:00:14.573


Max rewards of (N, n) policy is -36423.58736262756  n is 7  N is 7
40
K : 6
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.0416716s: 4 minutes, 41 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:04:14.620


Max rewards of (N, n) policy is -40165.61406829147  n is 7  N is 8
40
K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.0310124s: 3 minutes, 59 seconds, 31 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:08:13.656


Max rewards of (N, n) policy is -43699.05451938784  n is 7  N is 9
40
K : 6
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.5439106s: 4 minutes, 543 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:12:14.204


Max rewards of (N, n) policy is -46952.65762992362  n is 7  N is 9
40
K : 6
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.3774357s: 3 minutes, 58 seconds, 377 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:16:12.587


Max rewards of (N, n) policy is -50168.381322506946  n is 7  N is 9
40
K : 6
[0

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.6343073s: 4 minutes, 634 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:20:13.227


 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -53347.640790849415  n is 7  N is 9
40
K : 6
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          239.269531s: 3 minutes, 59 seconds, 269 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:24:12.501


Max rewards of (N, n) policy is -56557.251321776916  n is 7  N is 9
40
K : 6
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.2832023s: 4 minutes, 283 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:28:12.790


Max rewards of (N, n) policy is -59315.98213882257  n is 5  N is 10
40
K : 6
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.5685891s: 4 minutes, 568 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:32:13.362


Max rewards of (N, n) policy is -61192.46094675407  n is 5  N is 10
40
K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.7995201s: 3 minutes, 58 seconds, 799 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:36:12.178


Max rewards of (N, n) policy is -64705.77472141247  n is 5  N is 10
40
K : 6
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         241.1532339s: 4 minutes, 1 second, 153 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:40:13.336


Max rewards of (N, n) policy is -68269.67748400205  n is 5  N is 10
40
K : 6
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.0577027s: 3 minutes, 59 seconds, 57 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:44:12.409


Max rewards of (N, n) policy is -71788.03902055256  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.4137388s: 3 minutes, 59 seconds, 413 milliseconds


40
K : 8
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:48:14.946


Max rewards of (N, n) policy is -32630.723950367003  n is 7  N is 7
40
K : 8
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.0612755s: 3 minutes, 59 seconds, 61 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:52:14.012


Max rewards of (N, n) policy is -36348.81579813722  n is 7  N is 8
40
K : 8
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.6134961s: 4 minutes, 613 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T15:56:14.630


Max rewards of (N, n) policy is -40158.36990203327  n is 7  N is 8
40
K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.1491006s: 4 minutes, 149 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:00:14.784


Max rewards of (N, n) policy is -43713.132370567655  n is 7  N is 9
40
K : 8
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          239.774083s: 3 minutes, 59 seconds, 774 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:04:14.563


Max rewards of (N, n) policy is -46912.53176532005  n is 7  N is 9
40
K : 8
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.0345384s: 4 minutes, 34 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:08:14.614


Max rewards of (N, n) policy is -50114.54920049567  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.7762172s: 3 minutes, 58 seconds, 776 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:12:13.406


40
K : 8
[0 -1400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -53276.668112649895  n is 7  N is 9
40
K : 8
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.7022552s: 3 minutes, 59 seconds, 702 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:16:13.112


Max rewards of (N, n) policy is -56481.85448769758  n is 7  N is 9
40
K : 8
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.9500477s: 3 minutes, 58 seconds, 950 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:20:12.078


Max rewards of (N, n) policy is -58898.03666093099  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.2365088s: 3 minutes, 58 seconds, 236 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:24:10.320


40
K : 8
[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -60790.79754654652  n is 5  N is 10
40
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.9619277s: 3 minutes, 58 seconds, 961 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:28:09.286


Max rewards of (N, n) policy is -64269.934618358064  n is 5  N is 10
40
K : 8
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.0527458s: 4 minutes, 52 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:32:09.344


Max rewards of (N, n) policy is -67908.73972986096  n is 5  N is 10
40
K : 8
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.9143158s: 4 minutes, 914 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:36:10.263


Max rewards of (N, n) policy is -71406.5363916605  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.1296996s: 3 minutes, 59 seconds, 129 milliseconds


40
K : 10
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:40:12.568


Max rewards of (N, n) policy is -32626.569109503947  n is 7  N is 8
40
K : 10
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.5592477s: 4 minutes, 559 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:44:13.143


Max rewards of (N, n) policy is -36412.1424226032  n is 7  N is 8
40
K : 10
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.0952003s: 3 minutes, 59 seconds, 95 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:48:12.242


Max rewards of (N, n) policy is -40159.992328289474  n is 7  N is 8
40
K : 10
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.8503059s: 3 minutes, 59 seconds, 850 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:52:12.098


Max rewards of (N, n) policy is -43688.98527391261  n is 7  N is 9
40
K : 10
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.3276677s: 3 minutes, 57 seconds, 327 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T16:56:09.431


Max rewards of (N, n) policy is -46888.156464706946  n is 7  N is 9
40
K : 10
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.7812886s: 3 minutes, 59 seconds, 781 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:00:09.218


Max rewards of (N, n) policy is -50111.72825897465  n is 7  N is 9
40
K : 10
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.6411191s: 3 minutes, 59 seconds, 641 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:04:08.863


Max rewards of (N, n) policy is -53323.203638319705  n is 7  N is 9
40
K : 10
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         241.0177797s: 4 minutes, 1 second, 17 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:08:09.886


Max rewards of (N, n) policy is -56490.87438156021  n is 7  N is 9
40
K : 10
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.7032763s: 3 minutes, 58 seconds, 703 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:12:08.595


Max rewards of (N, n) policy is -58839.94884089659  n is 6  N is 10
40
K : 10
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.8862109s: 3 minutes, 58 seconds, 886 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:16:07.485


Max rewards of (N, n) policy is -60667.14468051331  n is 5  N is 10
40
K : 10
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          239.194282s: 3 minutes, 59 seconds, 194 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:20:06.696


Max rewards of (N, n) policy is -64251.91375556495  n is 5  N is 10
40
K : 10
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.9791271s: 3 minutes, 58 seconds, 979 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:24:05.679


Max rewards of (N, n) policy is -67799.80267658774  n is 5  N is 10
40
K : 10
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.6210746s: 3 minutes, 59 seconds, 621 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:28:05.306


Max rewards of (N, n) policy is -71353.8142576428  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.5304147s: 3 minutes, 59 seconds, 530 milliseconds


40
K : 12
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:32:08.149


Max rewards of (N, n) policy is -32621.70638008634  n is 7  N is 8
40
K : 12
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.3473054s: 3 minutes, 58 seconds, 347 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:36:06.501


Max rewards of (N, n) policy is -36386.80379687224  n is 7  N is 8
40
K : 12
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.7656218s: 3 minutes, 59 seconds, 765 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:40:06.283


Max rewards of (N, n) policy is -40137.169892317426  n is 7  N is 8
40
K : 12
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.9724419s: 3 minutes, 57 seconds, 972 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:44:04.260


Max rewards of (N, n) policy is -43696.121108448555  n is 7  N is 9
40
K : 12
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.6144755s: 3 minutes, 59 seconds, 614 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:48:03.880


Max rewards of (N, n) policy is -46967.390931042224  n is 7  N is 9
40
K : 12
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          238.156443s: 3 minutes, 58 seconds, 156 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:52:02.041


Max rewards of (N, n) policy is -50142.56783746375  n is 7  N is 9
40
K : 12
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.1165808s: 3 minutes, 58 seconds, 116 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:56:00.162


Max rewards of (N, n) policy is -53307.25737185817  n is 7  N is 9
40
K : 12
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.8316611s: 3 minutes, 59 seconds, 831 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T17:59:59.999


Max rewards of (N, n) policy is -56573.535215575954  n is 7  N is 9
40
K : 12
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.8562323s: 3 minutes, 58 seconds, 856 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:03:58.872


Max rewards of (N, n) policy is -58778.188522311175  n is 6  N is 10
40
K : 12
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.6556572s: 3 minutes, 58 seconds, 655 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:07:57.532


Max rewards of (N, n) policy is -60621.7477628823  n is 5  N is 10
40
K : 12
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.8194388s: 3 minutes, 58 seconds, 819 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:11:56.356


Max rewards of (N, n) policy is -64210.540670286086  n is 5  N is 10
40
K : 12
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          237.583951s: 3 minutes, 57 seconds, 583 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:15:53.957


Max rewards of (N, n) policy is -67814.62074939079  n is 5  N is 10
40
K : 12
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          239.277858s: 3 minutes, 59 seconds, 277 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:19:53.239


Max rewards of (N, n) policy is -71357.90861706421  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.3028086s: 3 minutes, 59 seconds, 302 milliseconds


40
K : 14
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:23:56.006


Max rewards of (N, n) policy is -32620.515968370426  n is 7  N is 7
40
K : 14
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.0552845s: 3 minutes, 57 seconds, 55 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:27:53.067


Max rewards of (N, n) policy is -36387.28413883082  n is 7  N is 8
40
K : 14
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.7539783s: 3 minutes, 58 seconds, 753 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:31:51.826


Max rewards of (N, n) policy is -40130.09061508972  n is 7  N is 8
40
K : 14
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.2720273s: 3 minutes, 57 seconds, 272 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:35:49.102


Max rewards of (N, n) policy is -43739.4901684564  n is 7  N is 9
40
K : 14
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.9273794s: 3 minutes, 59 seconds, 927 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:39:49.046


Max rewards of (N, n) policy is -46937.84828457206  n is 7  N is 9
40
K : 14
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.3554107s: 3 minutes, 59 seconds, 355 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:43:48.417


Max rewards of (N, n) policy is -50111.897629955994  n is 7  N is 9
40
K : 14
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          239.460038s: 3 minutes, 59 seconds, 460 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:47:47.881


Max rewards of (N, n) policy is -53310.246657128424  n is 7  N is 9
40
K : 14
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.8473413s: 3 minutes, 58 seconds, 847 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:51:46.733


Max rewards of (N, n) policy is -56455.31725019243  n is 7  N is 9
40
K : 14
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.1642841s: 3 minutes, 59 seconds, 164 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:55:45.903


Max rewards of (N, n) policy is -58813.38801940844  n is 6  N is 10
40
K : 14
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.7697798s: 3 minutes, 58 seconds, 769 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T18:59:44.678


Max rewards of (N, n) policy is -60653.08297876709  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.0754352s: 3 minutes, 59 seconds, 75 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:03:43.769


40
K : 14
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -64215.06731797825  n is 5  N is 10
40

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.5606317s: 3 minutes, 58 seconds, 560 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:07:42.345



K : 14
[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -67797.13450274778  n is 5  N is 10
40
K : 14
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.6321235s: 3 minutes, 58 seconds, 632 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:11:40.981


Max rewards of (N, n) policy is -71356.13212436122  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.4823287s: 3 minutes, 59 seconds, 482 milliseconds


40
K : 16
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:15:43.815


Max rewards of (N, n) policy is -32671.71077405849  n is 7  N is 8
40
K : 16
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.9565061s: 3 minutes, 58 seconds, 956 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:19:42.788


Max rewards of (N, n) policy is -36405.210155389825  n is 7  N is 8
40
K : 16
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.7393246s: 3 minutes, 58 seconds, 739 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:23:41.532


Max rewards of (N, n) policy is -40180.72399594667  n is 7  N is 8
40
K : 16
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.2205557s: 3 minutes, 59 seconds, 220 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:27:40.758


Max rewards of (N, n) policy is -43737.36314179369  n is 7  N is 9
40
K : 16
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.7918937s: 3 minutes, 58 seconds, 791 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:31:39.554


Max rewards of (N, n) policy is -46897.06383155321  n is 7  N is 9
40
K : 16
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.0061503s: 3 minutes, 58 seconds, 6 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:35:37.565


Max rewards of (N, n) policy is -50092.498242436595  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.6609646s: 3 minutes, 58 seconds, 660 milliseconds


40
K : 16
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:39:36.231


Max rewards of (N, n) policy is -53279.422997055684  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          239.249145s: 3 minutes, 59 seconds, 249 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:43:35.485


40
K : 16
[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -56471.16951540189  n is 7  N is 9
40
K : 16
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.1503939s: 3 minutes, 58 seconds, 150 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:47:33.641


Max rewards of (N, n) policy is -58797.208340829384  n is 6  N is 10
40
K : 16
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.9691744s: 3 minutes, 59 seconds, 969 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:51:33.626


Max rewards of (N, n) policy is -60666.607868893145  n is 5  N is 10
40
K : 16
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.1939946s: 3 minutes, 59 seconds, 193 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:55:32.836


Max rewards of (N, n) policy is -64230.577633232846  n is 5  N is 10
40
K : 16
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.7123304s: 3 minutes, 57 seconds, 712 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T19:59:30.564


Max rewards of (N, n) policy is -67803.0431943774  n is 5  N is 10
40
K : 16
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          238.280084s: 3 minutes, 58 seconds, 280 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:03:28.848


Max rewards of (N, n) policy is -71367.1674427036  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.2317524s: 3 minutes, 58 seconds, 231 milliseconds


40
K : 18
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:07:30.695


Max rewards of (N, n) policy is -32587.399145096435  n is 7  N is 8
40
K : 18
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.3570784s: 3 minutes, 59 seconds, 357 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:11:30.067


Max rewards of (N, n) policy is -36367.515787201715  n is 7  N is 7
40
K : 18
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.6830189s: 3 minutes, 58 seconds, 683 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:15:28.755


Max rewards of (N, n) policy is -40155.21867228906  n is 7  N is 8
40
K : 18
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          237.714215s: 3 minutes, 57 seconds, 714 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:19:26.474


Max rewards of (N, n) policy is -43720.74829613117  n is 7  N is 9
40
K : 18
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.1528926s: 3 minutes, 58 seconds, 152 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:23:24.632


Max rewards of (N, n) policy is -46922.61582414283  n is 7  N is 9
40
K : 18
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.7823631s: 3 minutes, 57 seconds, 782 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:27:22.420


Max rewards of (N, n) policy is -50112.29889663456  n is 7  N is 9
40
K : 18
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          238.597198s: 3 minutes, 58 seconds, 597 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:31:21.021


Max rewards of (N, n) policy is -53334.84799593936  n is 7  N is 9
40
K : 18
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.1064307s: 3 minutes, 58 seconds, 106 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:35:19.133


Max rewards of (N, n) policy is -56535.213121857574  n is 7  N is 9
40
K : 18
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.0275943s: 3 minutes, 59 seconds, 27 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:39:18.166


Max rewards of (N, n) policy is -58805.88543760214  n is 6  N is 10
40
K : 18
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           238.47861s: 3 minutes, 58 seconds, 478 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:43:16.649


Max rewards of (N, n) policy is -60667.02421947453  n is 6  N is 10
40
K : 18
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.0658328s: 3 minutes, 58 seconds, 65 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:47:14.720


Max rewards of (N, n) policy is -64210.151627048814  n is 5  N is 10
40
K : 18
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         236.3105618s: 3 minutes, 56 seconds, 310 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:51:11.035


Max rewards of (N, n) policy is -67795.40326093858  n is 5  N is 10
40
K : 18
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          238.266643s: 3 minutes, 58 seconds, 266 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:55:09.307


Max rewards of (N, n) policy is -71375.02837714429  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.5961144s: 3 minutes, 57 seconds, 596 milliseconds


40
K : 20
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T20:59:10.325


Max rewards of (N, n) policy is -32617.511283555483  n is 7  N is 7
40
K : 20
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         236.0855344s: 3 minutes, 56 seconds, 85 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:03:06.415


Max rewards of (N, n) policy is -36380.24855632574  n is 7  N is 7
40
K : 20
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.9324729s: 3 minutes, 58 seconds, 932 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:07:05.352


Max rewards of (N, n) policy is -40160.32172375032  n is 7  N is 8
40
K : 20
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.3245529s: 3 minutes, 57 seconds, 324 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:11:02.682


Max rewards of (N, n) policy is -43703.49546296523  n is 7  N is 9
40
K : 20
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.2143374s: 3 minutes, 58 seconds, 214 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:15:00.901


Max rewards of (N, n) policy is -46901.7721959803  n is 7  N is 9
40
K : 20
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.0895406s: 3 minutes, 58 seconds, 89 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:18:59.007


Max rewards of (N, n) policy is -50150.9171041109  n is 7  N is 9
40
K : 20
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          238.468215s: 3 minutes, 58 seconds, 468 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:22:57.480


Max rewards of (N, n) policy is -53304.43808347964  n is 7  N is 9
40
K : 20
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.8770237s: 3 minutes, 58 seconds, 877 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:26:56.362


Max rewards of (N, n) policy is -56476.571758818754  n is 7  N is 9
40
K : 20
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.0643238s: 3 minutes, 59 seconds, 64 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:30:55.432


Max rewards of (N, n) policy is -58833.7894763402  n is 6  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         239.4633103s: 3 minutes, 59 seconds, 463 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:34:54.900


40
K : 20
[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -60637.19456250241  n is 5  N is 10
40
K : 20
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         240.6484536s: 4 minutes, 648 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:38:55.554


Max rewards of (N, n) policy is -64230.374702460584  n is 5  N is 10
40
K : 20
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.8421608s: 3 minutes, 57 seconds, 842 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:42:53.411


Max rewards of (N, n) policy is -67802.29372018528  n is 5  N is 10
40
K : 20
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         237.2621446s: 3 minutes, 57 seconds, 262 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:46:50.679


Max rewards of (N, n) policy is -71334.7844294136  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         238.5434681s: 3 minutes, 58 seconds, 543 milliseconds


30
K : 2
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:50:52.598


Max rewards of (N, n) policy is -26406.67577740783  n is 7  N is 7
30
K : 2
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.8214298s: 3 minutes, 16 seconds, 821 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:54:09.423


Max rewards of (N, n) policy is -30148.482054449833  n is 7  N is 7
30
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.5065766s: 3 minutes, 16 seconds, 506 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T21:57:25.934


Max rewards of (N, n) policy is -33908.92787876763  n is 7  N is 8
30
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.3646267s: 3 minutes, 15 seconds, 364 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:00:41.314


Max rewards of (N, n) policy is -37656.223098759205  n is 7  N is 8
30
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         197.3856196s: 3 minutes, 17 seconds, 385 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:03:58.703


Max rewards of (N, n) policy is -41365.25047613538  n is 7  N is 8
30
K : 2
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.8297936s: 3 minutes, 16 seconds, 829 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:07:15.537


Max rewards of (N, n) policy is -44177.22746229188  n is 6  N is 9
30
K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         197.0651937s: 3 minutes, 17 seconds, 65 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:10:32.606


Max rewards of (N, n) policy is -46682.93701598024  n is 5  N is 9
30
K : 2
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.2934378s: 3 minutes, 16 seconds, 293 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:13:48.915


Max rewards of (N, n) policy is -49101.5256433985  n is 5  N is 9
30
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         197.0310038s: 3 minutes, 17 seconds, 31 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:17:05.950


Max rewards of (N, n) policy is -51388.47558926655  n is 5  N is 9
30
K : 2
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.3079286s: 3 minutes, 16 seconds, 307 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:20:22.262


Max rewards of (N, n) policy is -53792.47387030218  n is 5  N is 9
30
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.7732436s: 3 minutes, 15 seconds, 773 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:23:38.040


Max rewards of (N, n) policy is -58497.642694071124  n is 5  N is 9
30
K : 2
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.3034757s: 3 minutes, 16 seconds, 303 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:26:54.347


Max rewards of (N, n) policy is -62876.197496935056  n is 4  N is 9
30
K : 2
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          195.850215s: 3 minutes, 15 seconds, 850 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:30:10.202


Max rewards of (N, n) policy is -67169.52425554104  n is 4  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.1875411s: 3 minutes, 16 seconds, 187 milliseconds


30
K : 4
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:33:29.721


Max rewards of (N, n) policy is -25434.206406503876  n is 7  N is 7
30
K : 4
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.3677435s: 3 minutes, 16 seconds, 367 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:36:46.092


Max rewards of (N, n) policy is -29149.67866510048  n is 7  N is 8
30
K : 4
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.4763667s: 3 minutes, 16 seconds, 476 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:40:02.573


Max rewards of (N, n) policy is -32876.70404635532  n is 7  N is 8


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           195.01577s: 3 minutes, 15 seconds, 15 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:43:17.593


30
K : 4
[0 -800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -35866.9394663416  n is 7  N is 9
30
K : 4
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.6253819s: 3 minutes, 16 seconds, 625 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:46:34.222


Max rewards of (N, n) policy is -38775.382291295675  n is 7  N is 9
30
K : 4
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.6886777s: 3 minutes, 15 seconds, 688 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:49:49.915


Max rewards of (N, n) policy is -41553.35623027121  n is 6  N is 9
30
K : 4
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.8705318s: 3 minutes, 15 seconds, 870 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:53:05.801


Max rewards of (N, n) policy is -44133.266185611115  n is 6  N is 9
30
K : 4
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          194.987225s: 3 minutes, 14 seconds, 987 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:56:20.803


Max rewards of (N, n) policy is -46527.983645144  n is 5  N is 9
30
K : 4
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          196.141671s: 3 minutes, 16 seconds, 141 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T22:59:36.949


Max rewards of (N, n) policy is -48808.85114333018  n is 5  N is 9
30
K : 4
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.3709007s: 3 minutes, 16 seconds, 370 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:02:53.324


Max rewards of (N, n) policy is -51172.47853390225  n is 5  N is 9
30
K : 4
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         197.3158624s: 3 minutes, 17 seconds, 315 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:06:10.655


Max rewards of (N, n) policy is -54671.60149714899  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.7556963s: 3 minutes, 16 seconds, 755 milliseconds


30
K : 4
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:09:27.425


Max rewards of (N, n) policy is -58011.89440593762  n is 5  N is 10
30
K : 4
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.4237131s: 3 minutes, 15 seconds, 423 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:12:42.864


Max rewards of (N, n) policy is -61282.21736670999  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         197.8294597s: 3 minutes, 17 seconds, 829 milliseconds


30
K : 6
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:16:03.851


Max rewards of (N, n) policy is -25403.39887348761  n is 7  N is 7


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.8907843s: 3 minutes, 15 seconds, 890 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:19:19.746


30
K : 6
[0 -400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -29145.530500590026  n is 7  N is 8
30
K : 6
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          196.100822s: 3 minutes, 16 seconds, 100 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:22:35.851


Max rewards of (N, n) policy is -32781.93408709535  n is 7  N is 9
30
K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.0690209s: 3 minutes, 16 seconds, 69 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:25:51.925


Max rewards of (N, n) policy is -35697.04488222463  n is 7  N is 9
30
K : 6
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.3130821s: 3 minutes, 16 seconds, 313 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:29:08.241


Max rewards of (N, n) policy is -38642.627199102506  n is 7  N is 9
30
K : 6
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.2421015s: 3 minutes, 16 seconds, 242 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:32:24.488


Max rewards of (N, n) policy is -41441.1730976947  n is 6  N is 9
30
K : 6
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.4417609s: 3 minutes, 15 seconds, 441 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:35:39.945


Max rewards of (N, n) policy is -44064.005560342324  n is 5  N is 9
30
K : 6
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.5601648s: 3 minutes, 15 seconds, 560 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:38:55.520


Max rewards of (N, n) policy is -46400.1919083907  n is 5  N is 9
30
K : 6
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.3978204s: 3 minutes, 15 seconds, 397 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:42:10.921


Max rewards of (N, n) policy is -48530.72140031476  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         194.9653448s: 3 minutes, 14 seconds, 965 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:45:25.901


30
K : 6
[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -50118.5638341716  n is 5  N is 10
30
K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.0218826s: 3 minutes, 15 seconds, 21 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:48:40.927


Max rewards of (N, n) policy is -53344.52615077172  n is 5  N is 10
30
K : 6
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         194.5823727s: 3 minutes, 14 seconds, 582 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:51:55.514


Max rewards of (N, n) policy is -56660.39811609945  n is 5  N is 10
30
K : 6
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.3004989s: 3 minutes, 15 seconds, 300 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:55:10.829


Max rewards of (N, n) policy is -59916.137841459444  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         194.8629986s: 3 minutes, 14 seconds, 862 milliseconds


30
K : 8
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-14T23:58:28.974


Max rewards of (N, n) policy is -25413.615394014232  n is 7  N is 7
30
K : 8
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.6709084s: 3 minutes, 15 seconds, 670 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:01:44.650


Max rewards of (N, n) policy is -29141.414677005672  n is 7  N is 8
30
K : 8
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         194.9940495s: 3 minutes, 14 seconds, 994 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:04:59.648


Max rewards of (N, n) policy is -32750.792498926992  n is 7  N is 9
30
K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.1026332s: 3 minutes, 16 seconds, 102 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:08:15.754


Max rewards of (N, n) policy is -35715.48362432861  n is 7  N is 9
30
K : 8
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.5712247s: 3 minutes, 15 seconds, 571 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:11:31.330


Max rewards of (N, n) policy is -38597.20054112141  n is 7  N is 9
30
K : 8
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.8685964s: 3 minutes, 15 seconds, 868 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:14:47.203


Max rewards of (N, n) policy is -41419.57804163542  n is 6  N is 9
30
K : 8
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.2788282s: 3 minutes, 16 seconds, 278 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:18:03.486


Max rewards of (N, n) policy is -44049.82595524403  n is 5  N is 9
30
K : 8
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         194.5838669s: 3 minutes, 14 seconds, 583 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:21:18.085


Max rewards of (N, n) policy is -46346.19257374136  n is 5  N is 9
30
K : 8
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          195.733004s: 3 minutes, 15 seconds, 733 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:24:33.832


Max rewards of (N, n) policy is -48222.58027743137  n is 5  N is 10
30
K : 8
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          196.428779s: 3 minutes, 16 seconds, 428 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:27:50.265


Max rewards of (N, n) policy is -49893.629651288924  n is 5  N is 10
30
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.1100215s: 3 minutes, 16 seconds, 110 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:31:06.379


Max rewards of (N, n) policy is -53193.068431242726  n is 5  N is 10
30
K : 8
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.0900327s: 3 minutes, 16 seconds, 90 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:34:22.484


Max rewards of (N, n) policy is -56419.73958364546  n is 5  N is 10
30
K : 8
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.0097222s: 3 minutes, 16 seconds, 9 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:37:38.498


Max rewards of (N, n) policy is -59739.20110716103  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.0026588s: 3 minutes, 16 seconds, 2 milliseconds


30
K : 10
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:40:57.693


Max rewards of (N, n) policy is -25404.677323278414  n is 7  N is 7
30
K : 10
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.2247983s: 3 minutes, 16 seconds, 224 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:44:13.922


Max rewards of (N, n) policy is -29199.40885434687  n is 7  N is 7
30
K : 10
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          195.674684s: 3 minutes, 15 seconds, 674 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:47:29.601


Max rewards of (N, n) policy is -32716.435566424392  n is 7  N is 9
30
K : 10
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         194.7838206s: 3 minutes, 14 seconds, 783 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:50:44.389


Max rewards of (N, n) policy is -35695.179726925686  n is 7  N is 9
30
K : 10
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.4601129s: 3 minutes, 15 seconds, 460 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:53:59.853


Max rewards of (N, n) policy is -38609.18581689432  n is 7  N is 9
30
K : 10
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.9133033s: 3 minutes, 15 seconds, 913 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T00:57:15.771


Max rewards of (N, n) policy is -41466.69326489967  n is 6  N is 9
30
K : 10
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.8215645s: 3 minutes, 15 seconds, 821 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:00:31.597


Max rewards of (N, n) policy is -44005.21202535387  n is 6  N is 9
30
K : 10
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.6039124s: 3 minutes, 16 seconds, 603 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:03:48.204


Max rewards of (N, n) policy is -46405.52643381332  n is 5  N is 9
30
K : 10
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.4119097s: 3 minutes, 15 seconds, 411 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:07:03.621


Max rewards of (N, n) policy is -48251.61196138844  n is 5  N is 10
30
K : 10
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.2728754s: 3 minutes, 16 seconds, 272 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:10:19.897


Max rewards of (N, n) policy is -49814.95404885612  n is 5  N is 10
30
K : 10
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         194.8160307s: 3 minutes, 14 seconds, 816 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:13:34.718


Max rewards of (N, n) policy is -53120.430437299576  n is 5  N is 10
30
K : 10
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.9360092s: 3 minutes, 15 seconds, 936 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:16:50.669


Max rewards of (N, n) policy is -56416.23148285854  n is 5  N is 10
30
K : 10
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.3919866s: 3 minutes, 15 seconds, 391 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:20:06.065


Max rewards of (N, n) policy is -59725.96090566776  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.0570921s: 3 minutes, 16 seconds, 57 milliseconds


30
K : 12
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:23:25.519


Max rewards of (N, n) policy is -25416.53915426201  n is 7  N is 8
30
K : 12
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.7036827s: 3 minutes, 15 seconds, 703 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:26:41.226


Max rewards of (N, n) policy is -29157.42295822718  n is 7  N is 8
30
K : 12
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.9910905s: 3 minutes, 15 seconds, 991 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:29:57.221


Max rewards of (N, n) policy is -32737.537550219608  n is 7  N is 9
30
K : 12
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.5447338s: 3 minutes, 15 seconds, 544 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:33:12.771


Max rewards of (N, n) policy is -35691.164129687706  n is 7  N is 9
30
K : 12
[0 -1000 -100 -1200

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.5657745s: 3 minutes, 16 seconds, 565 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:36:29.341


 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -38644.915004413466  n is 7  N is 9
30
K : 12
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.2818953s: 3 minutes, 15 seconds, 281 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:39:44.626


Max rewards of (N, n) policy is -41427.21701868563  n is 6  N is 9
30
K : 12
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.4751676s: 3 minutes, 15 seconds, 475 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:43:00.105


Max rewards of (N, n) policy is -44020.951944874614  n is 5  N is 9
30
K : 12
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.5586922s: 3 minutes, 15 seconds, 558 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:46:15.680


Max rewards of (N, n) policy is -46413.01869531983  n is 5  N is 9
30
K : 12
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.3805421s: 3 minutes, 16 seconds, 380 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:49:32.064


Max rewards of (N, n) policy is -48231.937126576435  n is 5  N is 10
30
K : 12
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.2241767s: 3 minutes, 16 seconds, 224 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:52:48.293


Max rewards of (N, n) policy is -49858.76554284629  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.5984324s: 3 minutes, 15 seconds, 598 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:56:03.906


30
K : 12
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -53106.389955561855  n is 5  N is 10
30
K : 12
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.4703727s: 3 minutes, 16 seconds, 470 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T01:59:20.380


Max rewards of (N, n) policy is -56461.39857545803  n is 5  N is 10
30
K : 12
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           195.37465s: 3 minutes, 15 seconds, 374 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:02:35.759


Max rewards of (N, n) policy is -59728.21039353332  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.2355869s: 3 minutes, 15 seconds, 235 milliseconds


30
K : 14
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:05:54.307


Max rewards of (N, n) policy is -25412.609234912496  n is 7  N is 8
30
K : 14
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.7584914s: 3 minutes, 15 seconds, 758 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:09:10.069


Max rewards of (N, n) policy is -29143.86517503642  n is 7  N is 8
30
K : 14
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.9036858s: 3 minutes, 15 seconds, 903 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:12:25.977


Max rewards of (N, n) policy is -32757.11327497369  n is 7  N is 9
30
K : 14
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.9430534s: 3 minutes, 16 seconds, 943 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:15:42.924


Max rewards of (N, n) policy is -35708.11856474042  n is 7  N is 9
30
K : 14
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          196.085486s: 3 minutes, 16 seconds, 85 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:18:59.015


Max rewards of (N, n) policy is -38590.680298826526  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          196.476934s: 3 minutes, 16 seconds, 476 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:22:15.496


30
K : 14
[0 -1200 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -41400.63418030126  n is 6  N is 9
30
K : 14
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         194.6196296s: 3 minutes, 14 seconds, 619 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:25:30.119


Max rewards of (N, n) policy is -44033.98255820669  n is 6  N is 9
30
K : 14
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         194.4953158s: 3 minutes, 14 seconds, 495 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:28:44.619


Max rewards of (N, n) policy is -46368.773903040084  n is 5  N is 9
30
K : 14
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.2265078s: 3 minutes, 16 seconds, 226 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:32:00.849


Max rewards of (N, n) policy is -48204.99212075004  n is 5  N is 10
30
K : 14
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.3028218s: 3 minutes, 15 seconds, 302 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:35:16.157


Max rewards of (N, n) policy is -49881.209392591765  n is 5  N is 10
30
K : 14
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.1371402s: 3 minutes, 15 seconds, 137 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:38:31.297


Max rewards of (N, n) policy is -53146.61396128037  n is 5  N is 10
30
K : 14
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.7953403s: 3 minutes, 15 seconds, 795 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:41:47.097


Max rewards of (N, n) policy is -56317.41945313597  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         196.0508734s: 3 minutes, 16 seconds, 50 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:45:03.152


30
K : 14
[0 -3200 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -59735.50767255124  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         195.9244664s: 3 minutes, 15 seconds, 924 milliseconds


25
K : 2
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:48:22.230


Max rewards of (N, n) policy is -22482.45790841344  n is 7  N is 7
25

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.2653001s: 2 minutes, 54 seconds, 265 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:51:16.498



K : 2
[0 -400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -26240.07467693349  n is 7  N is 8
25
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.3465663s: 2 minutes, 55 seconds, 346 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:54:11.849


Max rewards of (N, n) policy is -29913.87435270115  n is 7  N is 8
25
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.9668137s: 2 minutes, 55 seconds, 966 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T02:57:07.820


Max rewards of (N, n) policy is -33631.1265749404  n is 7  N is 8
25
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.1972552s: 2 minutes, 55 seconds, 197 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:00:03.032


Max rewards of (N, n) policy is -36304.364619024185  n is 6  N is 9
25
K : 2
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         177.0868974s: 2 minutes, 57 seconds, 86 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:03:00.122


Max rewards of (N, n) policy is -38680.86891064004  n is 5  N is 9
25
K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         176.2864973s: 2 minutes, 56 seconds, 286 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:05:56.413


Max rewards of (N, n) policy is -40896.49338603338  n is 5  N is 9
25
K : 2
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         176.1245662s: 2 minutes, 56 seconds, 124 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:08:52.540


Max rewards of (N, n) policy is -43149.04421574291  n is 5  N is 9
25
K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          175.598571s: 2 minutes, 55 seconds, 598 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:11:48.143


Max rewards of (N, n) policy is -45323.28025317733  n is 5  N is 9
25
K : 2
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.2068676s: 2 minutes, 55 seconds, 206 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:14:43.354


Max rewards of (N, n) policy is -47542.03762930517  n is 5  N is 9
25
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.2119508s: 2 minutes, 55 seconds, 211 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:17:38.580


Max rewards of (N, n) policy is -51827.542142015256  n is 4  N is 9
25
K : 2
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.4005101s: 2 minutes, 55 seconds, 400 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:20:33.984


Max rewards of (N, n) policy is -55923.905455877706  n is 4  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         176.1362808s: 2 minutes, 56 seconds, 136 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:23:30.125


25
K : 2
[0 -3200 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -60094.54357470839  n is 4  N is 9
25
K : 4
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           175.89994s: 2 minutes, 55 seconds, 899 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:26:29.209


Max rewards of (N, n) policy is -21800.343692493327  n is 7  N is 7
25
K : 4
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.1906297s: 2 minutes, 55 seconds, 190 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:29:24.403


Max rewards of (N, n) policy is -25480.990076284193  n is 7  N is 8
25
K : 4
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.5636492s: 2 minutes, 55 seconds, 563 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:32:19.970


Max rewards of (N, n) policy is -28777.113348770763  n is 7  N is 9
25
K : 4
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.3317102s: 2 minutes, 55 seconds, 331 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:35:15.306


Max rewards of (N, n) policy is -31564.497542121415  n is 7  N is 9
25
K : 4
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.0944656s: 2 minutes, 55 seconds, 94 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:38:10.405


Max rewards of (N, n) policy is -34096.34774190574  n is 6  N is 9
25
K : 4
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.9192898s: 2 minutes, 55 seconds, 919 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:41:06.328


Max rewards of (N, n) policy is -36503.16402992784  n is 5  N is 9
25
K : 4
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.2914723s: 2 minutes, 55 seconds, 291 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:44:01.622


Max rewards of (N, n) policy is -38783.68136330686  n is 5  N is 9
25
K : 4
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.7057352s: 2 minutes, 54 seconds, 705 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:46:56.332


Max rewards of (N, n) policy is -40988.37432907059  n is 5  N is 9
25
K : 4
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.3844403s: 2 minutes, 54 seconds, 384 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:49:50.720


Max rewards of (N, n) policy is -43212.3303433151  n is 5  N is 9
25
K : 4
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         176.5449156s: 2 minutes, 56 seconds, 544 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:52:47.269


Max rewards of (N, n) policy is -45418.35579996602  n is 5  N is 9
25
K : 4
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           175.56246s: 2 minutes, 55 seconds, 562 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:55:42.835


Max rewards of (N, n) policy is -48567.13508341977  n is 5  N is 10
25
K : 4
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.0415399s: 2 minutes, 55 seconds, 41 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T03:58:37.881


Max rewards of (N, n) policy is -51648.16456517701  n is 5  N is 10
25
K : 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          174.501703s: 2 minutes, 54 seconds, 501 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:01:32.397


[0 -3200 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -54823.71957279036  n is 5  N is 10
25
K : 6
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.9525563s: 2 minutes, 54 seconds, 952 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:04:30.696


Max rewards of (N, n) policy is -21782.02947524542  n is 7  N is 7
25
K : 6
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.6020046s: 2 minutes, 54 seconds, 602 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:07:25.313


Max rewards of (N, n) policy is -25515.621944530893  n is 7  N is 8
25
K : 6
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.7153919s: 2 minutes, 54 seconds, 715 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:10:20.032


Max rewards of (N, n) policy is -28696.53445831046  n is 7  N is 9
25
K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.9417566s: 2 minutes, 54 seconds, 941 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:13:14.978


Max rewards of (N, n) policy is -31454.60149493374  n is 7  N is 9
25
K : 6
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.4955874s: 2 minutes, 55 seconds, 495 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:16:10.477


Max rewards of (N, n) policy is -34009.79783807394  n is 6  N is 9
25
K : 6
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.8897073s: 2 minutes, 54 seconds, 889 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:19:05.371


Max rewards of (N, n) policy is -36463.431912046515  n is 5  N is 9
25
K : 6
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.8552065s: 2 minutes, 54 seconds, 855 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:22:00.229


Max rewards of (N, n) policy is -38685.75275445683  n is 5  N is 9
25
K : 6
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.8383063s: 2 minutes, 54 seconds, 838 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:24:55.071


Max rewards of (N, n) policy is -40846.393036719746  n is 5  N is 9
25
K : 6
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         176.7065678s: 2 minutes, 56 seconds, 706 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:27:51.782


Max rewards of (N, n) policy is -42899.21853200703  n is 5  N is 10
25
K : 6
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           175.11158s: 2 minutes, 55 seconds, 111 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:30:46.897


Max rewards of (N, n) policy is -44382.943847629496  n is 5  N is 10
25
K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.4072038s: 2 minutes, 54 seconds, 407 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:33:41.308


Max rewards of (N, n) policy is -47493.4828966142  n is 5  N is 10
25
K : 6
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.6291913s: 2 minutes, 54 seconds, 629 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:36:35.952


Max rewards of (N, n) policy is -50656.76456782578  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.0301906s: 2 minutes, 55 seconds, 30 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:39:30.986


25
K : 6
[0 -3200 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -53737.42102100854  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         176.0735881s: 2 minutes, 56 seconds, 73 milliseconds


25
K : 8
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:42:30.266


Max rewards of (N, n) policy is -21803.206544569563  n is 7  N is 7
25
K : 8
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.0454544s: 2 minutes, 55 seconds, 45 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:45:25.316


Max rewards of (N, n) policy is -25514.515108888543  n is 7  N is 8
25
K : 8
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.6216308s: 2 minutes, 55 seconds, 621 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:48:20.941


Max rewards of (N, n) policy is -28698.41435807159  n is 7  N is 9
25
K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.1972264s: 2 minutes, 55 seconds, 197 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:51:16.142


Max rewards of (N, n) policy is -31463.91172858447  n is 7  N is 9
25
K : 8
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.4572059s: 2 minutes, 54 seconds, 457 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:54:10.613


Max rewards of (N, n) policy is -33978.24845219496  n is 6  N is 9
25
K : 8
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.5192826s: 2 minutes, 55 seconds, 519 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T04:57:06.136


Max rewards of (N, n) policy is -36421.470002455  n is 6  N is 9
25
K : 8
[0 -1400

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          174.364584s: 2 minutes, 54 seconds, 364 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:00:00.515


 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -38668.125218595276  n is 5  N is 9
25
K : 8
[0

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.8202988s: 2 minutes, 54 seconds, 820 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:02:55.338


 -1600 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -40898.054034428395  n is 5  N is 9
25
K : 8
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.6383118s: 2 minutes, 55 seconds, 638 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:05:50.980


Max rewards of (N, n) policy is -42719.813592510465  n is 5  N is 10
25
K : 8
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.8188625s: 2 minutes, 54 seconds, 818 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:08:45.803


Max rewards of (N, n) policy is -44249.0486460652  n is 5  N is 10
25
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         172.6293467s: 2 minutes, 52 seconds, 629 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:11:38.436


Max rewards of (N, n) policy is -47338.58986003728  n is 5  N is 10
25
K : 8
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         170.9348086s: 2 minutes, 50 seconds, 934 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:14:29.375


Max rewards of (N, n) policy is -50485.66456281296  n is 5  N is 10
25
K : 8
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         173.0954106s: 2 minutes, 53 seconds, 95 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:17:22.474


Max rewards of (N, n) policy is -53538.78481388652  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         173.8937223s: 2 minutes, 53 seconds, 893 milliseconds


25
K : 10
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:20:19.715


Max rewards of (N, n) policy is -21798.493672588436  n is 7  N is 7
25
K : 10
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         173.4186787s: 2 minutes, 53 seconds, 418 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:23:13.137


Max rewards of (N, n) policy is -25448.795400591094  n is 7  N is 8
25
K : 10
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         172.7406207s: 2 minutes, 52 seconds, 740 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:26:05.882


Max rewards of (N, n) policy is -28667.975338615805  n is 7  N is 9
25
K : 10
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.9693108s: 2 minutes, 54 seconds, 969 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:29:00.865


Max rewards of (N, n) policy is -31418.6364712482  n is 7  N is 9
25
K : 10
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.7121593s: 2 minutes, 55 seconds, 712 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:31:56.581


Max rewards of (N, n) policy is -34038.869889574715  n is 6  N is 9
25
K : 10
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.6612098s: 2 minutes, 54 seconds, 661 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:34:51.246


Max rewards of (N, n) policy is -36441.04568342078  n is 6  N is 9
25
K : 10
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.4211079s: 2 minutes, 54 seconds, 421 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:37:45.670


Max rewards of (N, n) policy is -38662.26988652756  n is 5  N is 9
25
K : 10
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.0127873s: 2 minutes, 55 seconds, 12 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:40:40.687


Max rewards of (N, n) policy is -40904.09784962524  n is 5  N is 9
25
K : 10
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.0071883s: 2 minutes, 55 seconds, 7 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:43:35.710


Max rewards of (N, n) policy is -42741.743566355835  n is 5  N is 10
25
K : 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.4565419s: 2 minutes, 55 seconds, 456 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:46:31.181


10
[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -44224.37224682901  n is 5  N is 10
25
K : 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.1516694s: 2 minutes, 55 seconds, 151 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:49:26.335


[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -47353.23450933497  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.7367818s: 2 minutes, 54 seconds, 736 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:52:21.076


25
K : 10
[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -50429.262978204824  n is 5  N is 10
25
K : 10
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.3992617s: 2 minutes, 54 seconds, 399 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:55:15.490


Max rewards of (N, n) policy is -53598.153570258364  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.4819478s: 2 minutes, 55 seconds, 481 milliseconds


25
K : 12
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T05:58:14.377


Max rewards of (N, n) policy is -21799.737066423764  n is 7  N is 7
25
K : 12
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.1003035s: 2 minutes, 55 seconds, 100 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:01:09.481


Max rewards of (N, n) policy is -25515.362372006053  n is 7  N is 8
25
K : 12
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.0291856s: 2 minutes, 55 seconds, 29 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:04:04.515


Max rewards of (N, n) policy is -28688.42113006409  n is 7  N is 9
25
K : 12
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.7768754s: 2 minutes, 55 seconds, 776 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:07:00.295


Max rewards of (N, n) policy is -31456.31090010681  n is 7  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m           175.17863s: 2 minutes, 55 seconds, 178 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:09:55.477


25
K : 12
[0 -1000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -34031.75183891094  n is 6  N is 9
25
K : 12
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.4817764s: 2 minutes, 54 seconds, 481 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:12:49.963


Max rewards of (N, n) policy is -36450.1882785538  n is 5  N is 9
25
K : 12
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.4785747s: 2 minutes, 54 seconds, 478 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:15:44.445


Max rewards of (N, n) policy is -38704.61013507692  n is 5  N is 9
25
K : 12
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.8448472s: 2 minutes, 54 seconds, 844 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:18:39.305


Max rewards of (N, n) policy is -40890.4913730531  n is 5  N is 9
25
K : 12
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.4852627s: 2 minutes, 54 seconds, 485 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:21:33.794


Max rewards of (N, n) policy is -42701.72920850394  n is 5  N is 10
25


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         173.7653876s: 2 minutes, 53 seconds, 765 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:24:27.574


K : 12
[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -44312.161249510034  n is 5  N is 10
25
K : 12
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.3903264s: 2 minutes, 55 seconds, 390 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:27:22.968


Max rewards of (N, n) policy is -47363.55815258411  n is 5  N is 10
25
K : 12
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.2361074s: 2 minutes, 55 seconds, 236 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:30:18.207


Max rewards of (N, n) policy is -50528.16875157463  n is 5  N is 10
25
K : 12


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         175.7058942s: 2 minutes, 55 seconds, 705 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:33:13.928


[0 -3200 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -53553.83009765123  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         174.4458056s: 2 minutes, 54 seconds, 445 milliseconds


20
K : 2
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:36:11.719


Max rewards of (N, n) policy is -18648.067992566987  n is 7  N is 7
20
K : 2
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.4725736s: 2 minutes, 35 seconds, 472 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:38:47.195


Max rewards of (N, n) policy is -22288.180559599048  n is 7  N is 8
20
K : 2
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         156.3687202s: 2 minutes, 36 seconds, 368 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:41:23.567


Max rewards of (N, n) policy is -25899.501188174814  n is 7  N is 8
20
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         153.8137144s: 2 minutes, 33 seconds, 813 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:43:57.384


Max rewards of (N, n) policy is -28659.41997070379  n is 6  N is 9
20
K : 2
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          154.575077s: 2 minutes, 34 seconds, 575 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:46:31.963


Max rewards of (N, n) policy is -30883.247270018095  n is 5  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         153.9250256s: 2 minutes, 33 seconds, 925 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:49:05.891


20
K : 2
[0 -1200 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -32871.93381660745  n is 5  N is 9
20
K : 2
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.9900191s: 2 minutes, 34 seconds, 990 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:51:40.884


Max rewards of (N, n) policy is -34888.69057236121  n is 5  N is 9
20
K : 2
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.1165718s: 2 minutes, 35 seconds, 116 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:54:16.005


Max rewards of (N, n) policy is -36971.659073678304  n is 5  N is 9
20

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.2969616s: 2 minutes, 35 seconds, 296 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:56:51.315



K : 2
[0 -1800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -39033.7909202744  n is 5  N is 9
20
K : 2
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.8036857s: 2 minutes, 34 seconds, 803 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T06:59:26.133


Max rewards of (N, n) policy is -41084.94974309268  n is 4  N is 9
20
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.2205322s: 2 minutes, 34 seconds, 220 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:02:00.356


Max rewards of (N, n) policy is -45039.28656433213  n is 4  N is 9
20
K : 2


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.3001027s: 2 minutes, 35 seconds, 300 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:04:35.660


[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -48931.974171354275  n is 4  N is 9
20
K : 2
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.1621701s: 2 minutes, 35 seconds, 162 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:07:10.825


Max rewards of (N, n) policy is -52789.73636452031  n is 4  N is 9


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.1863162s: 2 minutes, 34 seconds, 186 milliseconds


20
K : 4
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:09:48.223


Max rewards of (N, n) policy is -18164.016553814483  n is 7  N is 8
20
K : 4
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         153.8406669s: 2 minutes, 33 seconds, 840 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:12:22.066


Max rewards of (N, n) policy is -21760.127135417843  n is 7  N is 8
20
K : 4
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.8395411s: 2 minutes, 34 seconds, 839 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:14:56.909


Max rewards of (N, n) policy is -24466.93056312572  n is 7  N is 9
20
K : 4
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.0221413s: 2 minutes, 34 seconds, 22 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:17:30.935


Max rewards of (N, n) policy is -26815.559630839904  n is 6  N is 9
20
K : 4
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.8940216s: 2 minutes, 34 seconds, 894 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:20:05.843


Max rewards of (N, n) policy is -29064.180603782308  n is 6  N is 9
20
K : 4
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.4943207s: 2 minutes, 34 seconds, 494 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:22:40.340


Max rewards of (N, n) policy is -31116.554777647954  n is 5  N is 9
20
K : 4
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.8281557s: 2 minutes, 35 seconds, 828 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:25:16.171


Max rewards of (N, n) policy is -33211.24420407791  n is 5  N is 9
20
K : 4
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.1597763s: 2 minutes, 35 seconds, 159 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:27:51.335


Max rewards of (N, n) policy is -35280.25034844281  n is 5  N is 9
20
K : 4
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         153.8502776s: 2 minutes, 33 seconds, 850 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:30:25.188


Max rewards of (N, n) policy is -37337.968279293105  n is 5  N is 9
20
K : 4
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.1725052s: 2 minutes, 34 seconds, 172 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:32:59.364


Max rewards of (N, n) policy is -39292.897482685  n is 5  N is 10
20
K : 4
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.5895949s: 2 minutes, 34 seconds, 589 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:35:33.968


Max rewards of (N, n) policy is -42165.9318385816  n is 5  N is 10
20
K : 4
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.4957408s: 2 minutes, 34 seconds, 495 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:38:08.466


Max rewards of (N, n) policy is -45046.53293702048  n is 4  N is 10
20
K : 4
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          154.412765s: 2 minutes, 34 seconds, 412 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:40:42.883


Max rewards of (N, n) policy is -47934.5341846962  n is 4  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.5684783s: 2 minutes, 34 seconds, 568 milliseconds


20
K : 6
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:43:20.897


Max rewards of (N, n) policy is -18169.84747590174  n is 7  N is 8
20
K : 6
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.2156457s: 2 minutes, 35 seconds, 215 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:45:56.116


Max rewards of (N, n) policy is -21756.4547241193  n is 7  N is 8
20
K : 6
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.2567283s: 2 minutes, 34 seconds, 256 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:48:30.376


Max rewards of (N, n) policy is -24395.68245690891  n is 7  N is 9
20
K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.8558953s: 2 minutes, 34 seconds, 855 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:51:05.235


Max rewards of (N, n) policy is -26736.737411703987  n is 6  N is 9
20
K : 6
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         153.1272581s: 2 minutes, 33 seconds, 127 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:53:38.366


Max rewards of (N, n) policy is -28987.1083789022  n is 6  N is 9
20
K : 6
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          152.222092s: 2 minutes, 32 seconds, 222 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:56:10.592


Max rewards of (N, n) policy is -31079.283050906808  n is 5  N is 9
20
K : 6
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         153.3566125s: 2 minutes, 33 seconds, 356 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T07:58:43.951


Max rewards of (N, n) policy is -33174.13739146561  n is 5  N is 9
20
K : 6
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         152.8778833s: 2 minutes, 32 seconds, 877 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:01:16.832


Max rewards of (N, n) policy is -35258.1645110704  n is 5  N is 9
20
K : 6
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         153.2198876s: 2 minutes, 33 seconds, 219 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:03:50.055


Max rewards of (N, n) policy is -37058.88075884941  n is 5  N is 10
20
K : 6
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         152.9575593s: 2 minutes, 32 seconds, 957 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:06:23.027


Max rewards of (N, n) policy is -38582.53648990843  n is 5  N is 10
20
K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         151.7792354s: 2 minutes, 31 seconds, 779 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:08:54.809


Max rewards of (N, n) policy is -41456.2449964592  n is 5  N is 10
20
K : 6
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.8685578s: 2 minutes, 34 seconds, 868 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:11:29.681


Max rewards of (N, n) policy is -44244.305548097174  n is 5  N is 10
20
K : 6
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.4141425s: 2 minutes, 34 seconds, 414 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:14:04.099


Max rewards of (N, n) policy is -47173.33125224573  n is 4  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.6884547s: 2 minutes, 34 seconds, 688 milliseconds


20
K : 8
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:16:42.117


Max rewards of (N, n) policy is -18132.791129827194  n is 7  N is 8
20
K : 8
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.1836638s: 2 minutes, 35 seconds, 183 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:19:17.304


Max rewards of (N, n) policy is -21761.689688808267  n is 7  N is 8
20
K : 8
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.4003996s: 2 minutes, 34 seconds, 400 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:21:51.708


Max rewards of (N, n) policy is -24414.524421255242  n is 7  N is 9
20
K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.2275119s: 2 minutes, 34 seconds, 227 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:24:25.939


Max rewards of (N, n) policy is -26727.130898179723  n is 6  N is 9
20
K : 8
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.9083272s: 2 minutes, 35 seconds, 908 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:27:01.851


Max rewards of (N, n) policy is -29032.821140965192  n is 5  N is 9
20
K : 8
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          153.820353s: 2 minutes, 33 seconds, 820 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:29:35.674


Max rewards of (N, n) policy is -31104.109041646312  n is 5  N is 9
20
K : 8
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.3923444s: 2 minutes, 35 seconds, 392 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:32:11.070


Max rewards of (N, n) policy is -33136.66863134408  n is 5  N is 9
20
K : 8
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.0776683s: 2 minutes, 34 seconds, 77 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:34:45.151


Max rewards of (N, n) policy is -35179.55388373219  n is 5  N is 9
20
K : 8
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.7851455s: 2 minutes, 34 seconds, 785 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:37:19.940


Max rewards of (N, n) policy is -37009.77371467826  n is 5  N is 10
20
K : 8
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         153.8416111s: 2 minutes, 33 seconds, 841 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:39:53.785


Max rewards of (N, n) policy is -38385.16747782368  n is 5  N is 10
20
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.5550355s: 2 minutes, 34 seconds, 555 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:42:28.354


Max rewards of (N, n) policy is -41283.20976318501  n is 5  N is 10
20
K : 8
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.5049357s: 2 minutes, 34 seconds, 504 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:45:02.862


Max rewards of (N, n) policy is -44240.96592828842  n is 4  N is 10
20

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.8763783s: 2 minutes, 34 seconds, 876 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:47:37.752



K : 8
[0 -3200 -100 -1200 -2000]
Finding best nN policy...
Max rewards of (N, n) policy is -47023.502152523724  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         153.6365871s: 2 minutes, 33 seconds, 636 milliseconds


20
K : 10
[0 -200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:50:14.637


Max rewards of (N, n) policy is -18164.3924849513  n is 7  N is 8
20
K : 10
[0 -400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.1807617s: 2 minutes, 35 seconds, 180 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:52:49.832


Max rewards of (N, n) policy is -21769.095915640242  n is 7  N is 8
20
K : 10
[0 -600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.3156532s: 2 minutes, 34 seconds, 315 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:55:24.151


Max rewards of (N, n) policy is -24414.80402891482  n is 7  N is 9
20
K : 10
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.7913043s: 2 minutes, 34 seconds, 791 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T08:57:58.956


Max rewards of (N, n) policy is -26752.21615853955  n is 6  N is 9
20
K : 10
[0 -1000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m          154.383816s: 2 minutes, 34 seconds, 383 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T09:00:33.342


Max rewards of (N, n) policy is -29055.69712427997  n is 6  N is 9
20
K : 10
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.4517329s: 2 minutes, 34 seconds, 451 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T09:03:07.798


Max rewards of (N, n) policy is -31112.906177153218  n is 5  N is 9
20
K : 10
[0 -1400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.6671771s: 2 minutes, 34 seconds, 667 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T09:05:42.468


Max rewards of (N, n) policy is -33105.4910077359  n is 5  N is 9
20
K : 10
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.3849427s: 2 minutes, 34 seconds, 384 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T09:08:16.857


Max rewards of (N, n) policy is -35248.32725475781  n is 5  N is 9
20
K : 10
[0 -1800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         153.3987449s: 2 minutes, 33 seconds, 398 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T09:10:50.269


Max rewards of (N, n) policy is -36950.63777920223  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         155.1954692s: 2 minutes, 35 seconds, 195 milliseconds


20
K : 10
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T09:13:25.478


Max rewards of (N, n) policy is -38441.16725158525  n is 5  N is 10
20
K : 10
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         153.7085209s: 2 minutes, 33 seconds, 708 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T09:15:59.190


Max rewards of (N, n) policy is -41314.950426335825  n is 5  N is 10
20
K : 10
[0 -2800 -100 -1200 -2000]

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.8198343s: 2 minutes, 34 seconds, 819 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T09:18:34.013



Finding best nN policy...
Max rewards of (N, n) policy is -44214.748514916195  n is 5  N is 10
20
K : 10
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.2479893s: 2 minutes, 34 seconds, 247 milliseconds
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2024-03-15T09:21:08.264


Max rewards of (N, n) policy is -47095.952517670026  n is 5  N is 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m         154.7143205s: 2 minutes, 34 seconds, 714 milliseconds


In [6]:
df

Row,u,K,n,s,m,f,p,mean,std,nN
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any
1,20,10,0,-3200,-100,-1200,-2000,-47096.0,3413.02,"CartesianIndex(5, 10)"
2,20,10,0,-2800,-100,-1200,-2000,-44214.7,3170.08,"CartesianIndex(5, 10)"
3,20,10,0,-2400,-100,-1200,-2000,-41315.0,2922.69,"CartesianIndex(5, 10)"
4,20,10,0,-2000,-100,-1200,-2000,-38441.2,2725.02,"CartesianIndex(5, 10)"
5,20,10,0,-1800,-100,-1200,-2000,-36950.6,2623.11,"CartesianIndex(5, 10)"
6,20,10,0,-1600,-100,-1200,-2000,-35248.3,2479.3,"CartesianIndex(5, 9)"
7,20,10,0,-1400,-100,-1200,-2000,-33105.5,2364.91,"CartesianIndex(5, 9)"
8,20,10,0,-1200,-100,-1200,-2000,-31112.9,2287.95,"CartesianIndex(5, 9)"
9,20,10,0,-1000,-100,-1200,-2000,-29055.7,2160.89,"CartesianIndex(6, 9)"
10,20,10,0,-800,-100,-1200,-2000,-26752.2,2048.19,"CartesianIndex(6, 9)"


In [7]:
using CSV
CSV.write("Homogeneous nN_simple.csv", df)

"Homogeneous nN_simple.csv"

In [8]:
#= using Plots
 =#

In [9]:
#= plot(df[df.s.==-250,:].u,df[df.s.==-250,:].mean) =#

In [10]:
#= convert(Int64,df.mean[1]) =#

In [11]:
#= plot(df.u,df.mean.+1100.0,yticks=df.mean,xticks=df.u,ytickfontrotation=10.0)
scatter!(df.u,df.mean.+1100.0)
plot!(xscale= :identity
    
    ,yscale=:log10,ytickfontrotation=10.0)
 =#

In [12]:
#= plot(df[df.s.==-300,:].u,df[df.s.==-300,:].mean,label="nN") =#