In [73]:
# import Pkg; Pkg.add("POMDPPolicies")
# Pkg.add("POMDPModelTools")
# Pkg.add("Distributions")
# Pkg.add("Combinatorics")
# Pkg.add("StaticArrays")
# Pkg.add("FileIO")
# Pkg.add("JLD2")
# Pkg.add("TickTock")
# Pkg.add("POMDPTools")
# Pkg.add("D3Trees")
# Pkg.add("Random")
# Pkg.add("DataFrames")
# Pkg.add("CSV")

In [74]:
using POMDPs, POMDPTools, QuickPOMDPs, MCTS, DiscreteValueIteration, POMDPSimulators, POMDPModels, POMDPPolicies
using Distributions, Combinatorics, StaticArrays, D3Trees, Random
using FileIO, JLD2, TickTock

In [75]:
global Number_level = 4;
global Action_num = 4;
gamma=0.95

States=Array((1:Number_level))
Observations=Array((1:Number_level))

4-element Vector{Int64}:
 1
 2
 3
 4

In [76]:
Observation_matrix=[
    0.8 0.2 0.0 0.0;
    0.1 0.8 0.1 0.0;
    0.0 0.2 0.8 0.0;
    0.0 0.0 0.0 1.0
]

4×4 Matrix{Float64}:
 0.8  0.2  0.0  0.0
 0.1  0.8  0.1  0.0
 0.0  0.2  0.8  0.0
 0.0  0.0  0.0  1.0

In [77]:
T=zeros(Number_level,Number_level,100);

# Load transition probability matrix for each component
for i in 1:100
    fullname = "C:/Users/hhu98/OneDrive - UW-Madison/POMDP/code/Heterogeneous system/hetero_tm/tm"*string(i);
   
    Transition_matrix = load(fullname*".jld2","transition_matrix");
    # the last term in argument of T, which is written as 'i' here,
    # is the index of component in the heterogeneous system
    T[:,:,i].=Transition_matrix; 
end

In [78]:
NumberUnits=20
limit=12
failure_penalty = -1200.0;
maintenance_penalty = -100.0;
setup_cost = -300.0;
normal_operation = 0.0;
system_penalty=-2000

-2000

In [79]:
Initialstates = zeros(Number_level)
Initialstates[1]=1.0
Initialstates
Initialstate = SparseCat(States, Initialstates)

Initialobs = SparseCat(Observations, Observation_matrix[1,:])

               [97;1mSparseCat distribution[0m           
     [38;5;8m┌                                        ┐[0m 
   1 [38;5;8m┤[0m[38;5;2m■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■[0m 0.8 [38;5;8m [0m [38;5;8m[0m
   2 [38;5;8m┤[0m[38;5;2m■■■■■■■■■[0m 0.2                           [38;5;8m [0m [38;5;8m[0m
   3 [38;5;8m┤[0m[38;5;2m[0m 0.0                                    [38;5;8m [0m [38;5;8m[0m
   4 [38;5;8m┤[0m[38;5;2m[0m 0.0                                    [38;5;8m [0m [38;5;8m[0m
     [38;5;8m└                                        ┘[0m 

## Belief updater

In [80]:
function update_belief(b,s_b,a,o,i) 
    # NOTE: 
    # in input, b is belief from last epoch (before a)
    # a is the last action we took
    # o is the new observation after a
    updated_belief = zeros(Number_level)
    if o==Number_level
        updated_belief[Number_level]=1.0
        return updated_belief
    end
    
    if s_b >= a
    # We assume perfect maintenance and it is know to decision maker(belief updater)
        for sp in 1: Number_level
            #p_obs=pdf(SparseCat(Observations, Observation_matrix[sp,:]),o)
            p_obs=Observation_matrix[sp,o]
            p_trans = T[1,sp,i]
            updated_belief[sp] = p_obs * p_trans
        end
        updated_belief ./= sum(updated_belief)
        return updated_belief
    end
    
    for sp in 1:Number_level
        #p_obs=pdf(SparseCat(Observations, Observation_matrix[sp,:]),o)
        p_obs=Observation_matrix[sp,o]
        p_trans = 0.0
        for s in 1:Number_level
            p_trans += T[s,sp,i] * b[s]
        end
        updated_belief[sp] = p_obs * p_trans
    end
        # Normalize the updated belief state to ensure it sums up to 1
        updated_belief ./= sum(updated_belief)
    return updated_belief
end

update_belief (generic function with 1 method)

## Define POMDP

In [81]:
# s is the system-level state vector
#s,r,o,true_a=approximate(s,s_belief,local_a,234) 
# Approximating the solution by taking the state of highest belief as the true state
function approximate(s,s_belief,a,rng=234)
    r=0.0
    prevent_repair= false
    failed_components=count(x->x==Number_level,s_belief)
    if failed_components >= limit
        r += system_penalty
    end

    for i in 1:NumberUnits
        if s_belief[i] >= a 
            # Maintenance action is decided based on belief, not true state
            # since we don't know true states when making decision
            if s_belief[i] == Number_level
                r += failure_penalty
                if prevent_repair == false
                    r += setup_cost
                    prevent_repair = true
                end
            else
                r += maintenance_penalty
                if prevent_repair == false
                    r += setup_cost
                    prevent_repair = true
                end
            end
            # s[i]=rand(crd[1]) ## Substitute crd with T[]
            s[i] = rand(Categorical(T[1,:,i]))

            r += normal_operation
        else
            # s[i] = rand(crd[s[i]])
            s[i] = rand(Categorical(T[s[i],:,i]))
            r = r+normal_operation
        end
    end

    obs=repeat(1:1,NumberUnits)
    for i in 1:NumberUnits
        obs_distribution = SparseCat(Observations, Observation_matrix[s[i],:])
        # obs_distribution = observation_function(s[i])
        obs[i] = rand(obs_distribution)
    end

    return (s=s,r=r,o=obs,a=a)
end

approximate (generic function with 2 methods)

In [82]:
function findnN()
    println("Finding best nN policy...")
    rewards_nN = zeros(Number_level,Number_level);
    rewards_nN = rewards_nN.+(-100000000.0);
    temp_rewards = zeros(repetition,1);
    # global rewards_nN_std = zeros(Number_level,Number_level);
    # global rewards_nN_std = rewards_nN_std.+(-100000000.0);
    for N in 2:(Number_level)
        for n in 1:N
            # temp_rewards = zeros(repetition,1)
            Threads.@threads  for j in 1:repetition
            #for j in 1:repetition
                s=repeat(1:1,NumberUnits)
                # o=s 
                o = rand(Initialobs, NumberUnits)
                # belief = [[1.0, 0.0, 0.0, 0.0] for j in 1:NumberUnits]
                belief = [Observation_matrix[1,:] for j in 1:NumberUnits]
                R_sequence=[]
                for k in 1:simsteps
                    # s_belief = [argmax(row) for row in eachrow(belief)]
                    s_belief = argmax.(belief)
                    local_a = Number_level +1; # when local_a = 0, no repair unless system fails
                    for i in 1:NumberUnits
                    # if there is any unit reaching or exceeding threshold N, 
                    # maintain all the units reaching or exceeding threshold n.
                        
                        if s_belief[i]>= N
                            local_a=n
                        end
                    end
                    true_a=local_a
                    # s is the system-level state vector
                    s,r,o,true_a=approximate(s,s_belief,local_a,234) 
                    for i in 1: NumberUnits
                        belief[i]=update_belief(belief[i], s_belief[i],true_a, o[i],i)
                    end
                    append!(R_sequence,r)
                end
                R_discounted=0
                for k in 1:simsteps
                    R_discounted=gamma * R_discounted+R_sequence[simsteps-k+1]
                end
                temp_rewards[j]=R_discounted
            end
            rewards_nN[n,N] = mean(temp_rewards)
            #rewards_nN_std[n,N] = std(temp_rewards)
        end
    end
    # This max_reward is actually wrong in POMDP
    # The (n, N) obtained should be calculated based on true state
    (max_reward,nN)=findmax(rewards_nN)
    
    #println("Max rewards of (n, N)-policy is", max_reward, "n is ", nN[1], "N is ", nN[2])
    #return max_reward, rewards_nN_std[nN], nN
    return nN
end

findnN (generic function with 1 method)

In [83]:
function generative(s,s_belief,a,rng=234)
    r=0.0

    if a <= Number_level
         r += setup_cost
    end

    true_failed_components=count(x->x==Number_level,s)
    if true_failed_components >= limit
        r += system_penalty
    end

    for i in 1:NumberUnits
        if s_belief[i] >= a
            if s[i] == Number_level
                r += failure_penalty
            else
                r += maintenance_penalty
            end
            s[i] = rand(Categorical(T[1,:,i]))
            r += normal_operation
        else
            s[i] = rand(Categorical(T[s[i],:,i]))
            r = r+normal_operation
        end
    end

    obs=repeat(1:1,NumberUnits)
    for i in 1:NumberUnits
        obs_distribution = SparseCat(Observations, Observation_matrix[s[i],:])
        # obs_distribution = observation_function(s[i])
        obs[i] = rand(obs_distribution)
    end

    return (s=s,r=r,o=obs)
end

generative (generic function with 2 methods)

In [84]:
using DataFrames
df= DataFrame(u=[],K=[],n=[],s=[],m=[],f=[],p=[],mean=[],std=[],nN=[])
cost=[ 
    [0	-800	-100	-1200 -2000],
    [0	-1200	-100	-1200 -2000],
    [0	-1600	-100	-1200 -2000],
    [0	-2000	-100	-1200 -2000],
    [0  -2400   -100    -1200 -2000],
    [0  -2800   -100    -1200 -2000],
    [0  -3200   -100    -1200 -2000]
] 


for units in [20, 25, 30, 40, 50, 60]

    for K in 2:2:convert(Int64, floor((units+1)/2)) 
        
        global NumberUnits=convert(Int64,units)
        global limit=convert(Int64,K)
        # global crd = Array{Categorical}(undef,Number_level)
        # for i in 1:Number_level
        #     global crd[i] = Categorical(Transition_matrix[i,:]);
        # end
            
        # global initialb= zeros(NumberUnits, Number_level)  
        # global initialb[:, 1] .= 1.0
        for c in cost
            tick()
            println(units)  
            print("K : ")
            println(K)
            global normal_operation,setup_cost,maintenance_penalty,failure_penalty,system_penalty=c
            println(c)  
            global NumberUnits=units

            global simsteps = 100;
            global repetition = 10000;
            global rewards_nN = zeros(Number_level,Number_level);
            global rewards_nN = rewards_nN.+(-100000000.0);
            global rewards_nN_std = zeros(Number_level,Number_level);
            global rewards_nN_std = rewards_nN_std.+(-100000000.0);

            global discount_factor =gamma
            global temp_rewards = zeros(repetition,1);
            # Looking for optimal (n, N)
            nN =findnN()
            n=nN[1]
            N=nN[2]
            
            trials=10000;
            results=zeros(trials)*0.1
            # cnt_maintenance = zeros(trials)
            rewards=0.0
            rewards_std=0.0
            Threads.@threads for h in 1:trials
                s=repeat(1:1,NumberUnits)
                # o=s 
                o = rand(Initialobs, NumberUnits)
                # belief = [[1.0, 0.0, 0.0, 0.0] for j in 1:NumberUnits]
                belief = [Observation_matrix[1,:] for j in 1:NumberUnits]
                R_sequence=[]
              
                
                # maintenance_count = 0
                for k in 1:simsteps
                    s_belief = argmax.(belief)
                    local_a = Number_level +1; 
                    for i in 1:NumberUnits
                    # If there is any unit reaching or exceeding threshold N, 
                    # maintain all the units reaching or exceeding threshold n,
                    # otherwise, no maintenance.
                    # As a result, when system fails, there are components at failure level,
                    # so N must be reached.
                        if s_belief[i]>= N
                            local_a=n
                        end
                    end
                    s,r,o=generative(s,s_belief,local_a,234) 
                    for i in 1: NumberUnits
                        # modified: belief[i,:] to belief[i]
                        belief[i]=update_belief(belief[i], s_belief[i],local_a, o[i],i)
                    end
                    append!(R_sequence,r)
                end
                R_discounted=0.0
                for k in 1:simsteps
                    R_discounted=gamma * R_discounted+R_sequence[simsteps-k+1]
                end
                # cnt_maintenance[h]=maintenance_count
                results[h]=R_discounted
            end
            rewards=mean(results)
            rewards_std=std(results)
            # N_maintenance = mean(cnt_maintenance)
            println("Rewards of (n, N)-policy is", rewards, "n is ", nN[1], "N is ", nN[2])
            # println("Number of maintenance times is ", N_maintenance)
            pushfirst!(df,[units,limit,normal_operation,setup_cost,maintenance_penalty,failure_penalty,system_penalty,rewards,rewards_std,nN])
        end
    end
end

20
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T16:24:39.048


Rewards of (n, N)-policy is-20568.72618558247n is 2N is 3
20
K : 2
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T16:27:55.460


Rewards of (n, N)-policy is-25089.074923473425n is 2N is 3
20
K : 2
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T16:31:10.413


Rewards of (n, N)-policy is-29486.334559329516n is 2N is 4
20
K : 2
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T16:34:26.280


Rewards of (n, N)-policy is-31818.32652345618n is 2N is 4
20
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T16:37:41.615


Rewards of (n, N)-policy is-34182.37395367943n is 2N is 4
20
K : 2
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T16:40:57.197


Rewards of (n, N)-policy is-36412.64177523607n is 2N is 4
20
K : 2
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T16:44:14.870


Rewards of (n, N)-policy is-38730.50605332659n is 2N is 4
20
K : 4
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T16:47:29.501


Rewards of (n, N)-policy is-20378.655600652735n is 2N is 3
20
K : 4
[0 -1200 -100 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T16:50:44.385


-1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-23476.197167749466n is 2N is 4
20
K : 4
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T16:53:59.761


Rewards of (n, N)-policy is-25829.842561674435n is 2N is 4
20
K : 4
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T16:57:14.961


Rewards of (n, N)-policy is-28072.667544584n is 2N is 4
20
K : 4
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:00:31.031


Rewards of (n, N)-policy is-30427.467222927236n is 2N is 4
20
K : 4
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:03:46.260


Rewards of (n, N)-policy is-32657.77044143175n is 2N is 4
20
K : 4
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:07:01.948


Rewards of (n, N)-policy is-34965.73751312737n is 2N is 4
20

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:10:19.545



K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-20363.869732420422n is 2N is 3
20
K : 6
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:13:34.364


Rewards of (n, N)-policy is-23159.847010258025n is 2N is 4
20
K : 6
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:16:50.363


Rewards of (n, N)-policy is-25410.047796827n is 2N is 4
20
K : 6
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:20:06.400


Rewards of (n, N)-policy is-27724.483167666644n is 2N is 4
20
K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:23:22.158


Rewards of (n, N)-policy is-30030.14002408262n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:26:39.503


20
K : 6
[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-32306.600519167197n is 2N is 4
20
K : 6
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:29:55.041


Rewards of (n, N)-policy is-34623.49644321254n is 2N is 4
20
K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:33:10.270


Rewards of (n, N)-policy is-20348.462572137683n is 2N is 3
20
K : 8
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:36:25.592


Rewards of (n, N)-policy is-23129.40916959461n is 2N is 4
20
K : 8
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:39:42.165


Rewards of (n, N)-policy is-25428.612677217923n is 2N is 4
20
K : 8
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:42:58.056


Rewards of (n, N)-policy is-27714.948031695945n is 2N is 4
20
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:46:14.644


Rewards of (n, N)-policy is-30048.961411009954n is 2N is 4
20
K : 8
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:49:30.388


Rewards of (n, N)-policy is-32365.21542562548n is 2N is 4
20
K : 8
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:52:48.131


Rewards of (n, N)-policy is-34606.75850111269n is 2N is 4
20
K : 10
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:56:02.704


Rewards of (n, N)-policy is-20338.314877664532n is 2N is 3
20
K : 10
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T17:59:17.740


Rewards of (n, N)-policy is-23162.120093532452n is 2N is 4
20
K : 10
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:02:33.929


Rewards of (n, N)-policy is-25441.206840591334n is 2N is 4
20
K : 10
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:05:50.484


Rewards of (n, N)-policy is-27718.727498528944n is 2N is 4
20
K : 10
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:09:08.909


Rewards of (n, N)-policy is-30050.08631272526n is 2N is 4
20
K : 10
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:12:25.032


Rewards of (n, N)-policy is-32337.307459391308n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:15:41.034


20
K : 10
[0 -3200 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-34629.09363890449n is 2N is 4
25
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:18:57.366


Rewards of (n, N)-policy is-25477.42647941915n is 2N is 3
25
K : 2
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:22:57.231


Rewards of (n, N)-policy is-30295.753063142944n is 2N is 3
25
K : 2
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:26:56.919


Rewards of (n, N)-policy is-35126.005776150305n is 2N is 3
25
K : 2
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:30:57.352


Rewards of (n, N)-policy is-38003.333020289065n is 2N is 4
25
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:34:58.617


Rewards of (n, N)-policy is-40502.85716993727n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:39:00.074


25
K : 2
[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-43023.71168180835n is 2N is 4
25
K : 2
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:43:01.204


Rewards of (n, N)-policy is-45497.82607556937n is 2N is 4
25
K : 4
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:47:01.960


Rewards of (n, N)-policy is-25170.521392779796n is 2N is 3
25
K : 4
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:51:02.475


Rewards of (n, N)-policy is-28562.34713513267n is 2N is 4
25
K : 4
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:55:01.792


Rewards of (n, N)-policy is-31060.118958221577n is 2N is 4
25
K : 4
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T18:59:03.306


Rewards of (n, N)-policy is-33510.48325575591n is 2N is 4
25
K : 4
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:03:05.580


Rewards of (n, N)-policy is-36050.149681245086n is 2N is 4
25
K : 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:07:08.279


[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-38546.490740952584n is 2N is 4
25
K : 4
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:11:17.825


Rewards of (n, N)-policy is-41003.00918314346n is 2N is 4
25
K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:15:19.937


Rewards of (n, N)-policy is-25175.837662493217n is 2N is 3
25
K : 6
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:19:22.219


Rewards of (n, N)-policy is-28005.634813720197n is 2N is 4
25
K : 6
[0 -1600 -100 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:23:24.102


-1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-30503.5482792262n is 2N is 4
25

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:27:26.465



K : 6
[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-33008.21279335301n is 2N is 4
25
K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:31:28.608


Rewards of (n, N)-policy is-35461.918562002866n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:35:31.003


25
K : 6
[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-37973.1578835337n is 2N is 4
25
K : 6
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:39:33.109


Rewards of (n, N)-policy is-40473.08091013875n is 2N is 4
25
K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:43:35.540


Rewards of (n, N)-policy is-25201.850577699155n is 2N is 3
25
K : 8
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:47:36.186


Rewards of (n, N)-policy is-27967.32806142375n is 2N is 4
25
K : 8
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:51:38.077


Rewards of (n, N)-policy is-30463.316011992396n is 2N is 4
25
K : 8
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:55:40.230


Rewards of (n, N)-policy is-32952.49724708364n is 2N is 4
25
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T19:59:41.864


Rewards of (n, N)-policy is-35435.445669816996n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:03:43.472


25
K : 8
[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-37945.768080412076n is 2N is 4
25
K : 8
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:07:45.659


Rewards of (n, N)-policy is-40360.33198660497n is 2N is 4
25
K : 10
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:11:46.054


Rewards of (n, N)-policy is-25199.612236928962n is 2N is 3
25
K : 10
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:15:47.466


Rewards of (n, N)-policy is-27970.703072414053n is 2N is 4
25
K : 10
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:19:50.333


Rewards of (n, N)-policy is-30457.561640412467n is 2N is 4
25
K : 10
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:23:52.363


Rewards of (n, N)-policy is-32965.91029986963n is 2N is 4
25
K : 10
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:27:55.372


Rewards of (n, N)-policy is-35442.968345628n is 2N is 4
25
K : 10
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:31:57.896


Rewards of (n, N)-policy is-37950.394191848594n is 2N is 4
25
K : 10
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:35:57.754


Rewards of (n, N)-policy is-40447.22508581784n is 2N is 4
25
K : 12
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:39:57.271


Rewards of (n, N)-policy is-25160.274125600452n is 2N is 3
25
K : 12
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:43:56.529


Rewards of (n, N)-policy is-27984.846661525673n is 2N is 4
25
K : 12
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:47:55.335


Rewards of (n, N)-policy is-30453.9696364205n is 2N is 4
25
K : 12
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:51:55.069


Rewards of (n, N)-policy is-32930.01021581158n is 2N is 4
25
K : 12
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:55:55.061


Rewards of (n, N)-policy is-35437.25612654173n is 2N is 4
25
K : 12
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T20:59:54.814


Rewards of (n, N)-policy is-37903.5257983884n is 2N is 4
25
K : 12
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T21:03:54.233


Rewards of (n, N)-policy is-40373.60969132615n is 2N is 4
30
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T21:07:53.837


Rewards of (n, N)-policy is-31017.939098123377n is 2N is 3
30
K : 2
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T21:12:36.404


Rewards of (n, N)-policy is-36523.0270407203n is 2N is 3


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T21:17:18.873


30
K : 2
[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-40961.48874693005n is 2N is 4
30
K : 2
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T21:22:00.077


Rewards of (n, N)-policy is-43657.50821595202n is 2N is 4
30
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T21:26:44.087


Rewards of (n, N)-policy is-46322.550491218164n is 2N is 4
30
K : 2
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T21:31:28.960


Rewards of (n, N)-policy is-49000.27380321771n is 2N is 4
30
K : 2
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T21:36:12.421


Rewards of (n, N)-policy is-51642.25581983469n is 2N is 4
30
K : 4
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T21:40:56.982


Rewards of (n, N)-policy is-30616.129442978978n is 2N is 4
30
K : 4
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T21:45:41.500


Rewards of (n, N)-policy is-33321.67400952529n is 2N is 4
30
K : 4
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T21:50:25.903


Rewards of (n, N)-policy is-36022.75465382525n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T21:55:10.364


30
K : 4
[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-38651.32049108285n is 2N is 4
30
K : 4
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T21:59:54.029


Rewards of (n, N)-policy is-41382.41006475316n is 2N is 4
30
K : 4
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T22:04:37.496


Rewards of (n, N)-policy is-44091.71690476563n is 2N is 4
30
K : 4
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T22:09:22.982


Rewards of (n, N)-policy is-46740.162116662344n is 2N is 4
30
K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T22:14:05.520


Rewards of (n, N)-policy is-29871.99403967106n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T22:18:49.932


30
K : 6
[0 -1200 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-32553.779894320905n is 2N is 4
30
K : 6
[0

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T22:23:34.309


 -1600 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-35215.36367064004n is 2N is 4
30
K : 6
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T22:28:18.954


Rewards of (n, N)-policy is-37958.736421038026n is 2N is 4
30
K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T22:33:03.648


Rewards of (n, N)-policy is-40616.99873751786n is 2N is 4
30
K : 6
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T22:37:49.026


Rewards of (n, N)-policy is-43294.11952189711n is 2N is 4
30
K : 6
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T22:42:32.277


Rewards of (n, N)-policy is-45957.90188937705n is 2N is 4
30
K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T22:47:17.143


Rewards of (n, N)-policy is-29838.571476643927n is 2N is 4
30
K : 8
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T22:52:01.429


Rewards of (n, N)-policy is-32488.46940055058n is 2N is 4
30
K : 8
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T22:56:44.440


Rewards of (n, N)-policy is-35213.29798197325n is 2N is 4
30
K : 8
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T23:01:28.977


Rewards of (n, N)-policy is-37870.39508415927n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T23:06:14.120


30
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-40497.090131714445n is 2N is 4
30
K : 8
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T23:11:08.247


Rewards of (n, N)-policy is-43214.22070191311n is 2N is 4
30
K : 8
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T23:15:53.353


Rewards of (n, N)-policy is-45881.119788299766n is 2N is 4
30
K : 10
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T23:20:40.983


Rewards of (n, N)-policy is-29843.29538451729n is 2N is 4
30
K : 10
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T23:25:23.818


Rewards of (n, N)-policy is-32488.102720477033n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T23:30:09.217


30
K : 10
[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-35201.403522187386n is 2N is 4
30
K : 10
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T23:34:53.397


Rewards of (n, N)-policy is-37810.6434646125n is 2N is 4
30
K : 10
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T23:39:36.396


Rewards of (n, N)-policy is-40532.13437429863n is 2N is 4
30
K : 10
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T23:44:22.035


Rewards of (n, N)-policy is-43236.725593744915n is 2N is 4
30
K : 10
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T23:49:07.960


Rewards of (n, N)-policy is-45855.52463000178n is 2N is 4
30
K : 12
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T23:53:53.456


Rewards of (n, N)-policy is-29832.26397233888n is 2N is 4
30
K : 12
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-08T23:58:38.261


Rewards of (n, N)-policy is-32510.295869649708n is 2N is 4
30
K : 12
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T00:03:21.041


Rewards of (n, N)-policy is-35151.16818027082n is 2N is 4
30
K : 12
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T00:08:06.258


Rewards of (n, N)-policy is-37856.6778054947n is 2N is 4
30
K : 12
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T00:12:51.972


Rewards of (n, N)-policy is-40572.49702762911n is 2N is 4
30
K : 12
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T00:17:35.062


Rewards of (n, N)-policy is-43203.80729870434n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T00:22:20.017


30
K : 12
[0 -3200 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-45889.100209437915n is 2N is 4
30
K : 14
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T00:27:06.036


Rewards of (n, N)-policy is-29865.271059266044n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T00:31:51.959


30
K : 14
[0 -1200 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-32504.92503726241n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T00:36:37.458


30
K : 14
[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-35172.644929840826n is 2N is 4
30
K : 14
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T00:41:20.659


Rewards of (n, N)-policy is-37853.78387610246n is 2N is 4
30


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T00:46:06.562


K : 14
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-40538.08097201397n is 2N is 4
30
K : 14
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T00:50:52.439


Rewards of (n, N)-policy is-43211.6438976448n is 2N is 4
30
K : 14
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T00:55:38.532


Rewards of (n, N)-policy is-45938.30742107073n is 2N is 4
40
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T01:00:21.661


Rewards of (n, N)-policy is-37556.914113587096n is 2N is 3
40
K : 2
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T01:06:42.103


Rewards of (n, N)-policy is-43573.11576724708n is 2N is 3
40
K : 2
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T01:13:00.804


Rewards of (n, N)-policy is-47614.16643060471n is 2N is 4
40
K : 2
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T01:19:20.762


Rewards of (n, N)-policy is-50460.42814083896n is 2N is 4
40
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T01:25:42.536


Rewards of (n, N)-policy is-53344.616364828544n is 2N is 4
40
K : 2
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T01:32:02.961


Rewards of (n, N)-policy is-56151.69917693689n is 2N is 4
40
K : 2
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T01:38:22.893


Rewards of (n, N)-policy is-59036.33847883932n is 2N is 4
40
K : 4
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T01:44:45.305


Rewards of (n, N)-policy is-36462.38632370168n is 2N is 4
40
K : 4
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T01:51:05.429


Rewards of (n, N)-policy is-39248.0804299821n is 2N is 4
40
K : 4
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T01:57:25.758


Rewards of (n, N)-policy is-42167.12474583819n is 2N is 4
40
K : 4
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T03:03:48.177


Rewards of (n, N)-policy is-45014.23715441063n is 2N is 4
40
K : 4
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T03:10:08.626


Rewards of (n, N)-policy is-47810.30076428989n is 2N is 4
40
K : 4
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T03:16:29.072


Rewards of (n, N)-policy is-50774.76767364759n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T03:22:51.592


40
K : 4
[0 -3200 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-53515.56070400681n is 2N is 4
40
K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T03:29:12.245


Rewards of (n, N)-policy is-35389.79123761059n is 2N is 4
40
K : 6
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T03:35:33.078


Rewards of (n, N)-policy is-38261.39368690625n is 2N is 4
40
K : 6
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T03:41:55.327


Rewards of (n, N)-policy is-41088.21821631671n is 2N is 4
40
K : 6
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T03:48:16.713


Rewards of (n, N)-policy is-43972.434341316104n is 2N is 4
40
K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T03:54:38.497


Rewards of (n, N)-policy is-46801.9111587544n is 2N is 4
40
K : 6
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T04:01:00.423


Rewards of (n, N)-policy is-49657.914578632626n is 2N is 4
40
K : 6
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T04:07:23.801


Rewards of (n, N)-policy is-52537.314841691405n is 2N is 4
40
K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T04:13:46.683


Rewards of (n, N)-policy is-35231.483912500895n is 2N is 4
40
K : 8
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T04:20:09.733


Rewards of (n, N)-policy is-38131.07677120719n is 2N is 4
40
K : 8
[0 -1600 -100 -1200 -2000]

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T04:26:33.379



Finding best nN policy...
Rewards of (n, N)-policy is-40976.391618938185n is 2N is 4
40
K : 8
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T04:32:55.205


Rewards of (n, N)-policy is-43792.39944187335n is 2N is 4
40
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T04:39:17.413


Rewards of (n, N)-policy is-46693.40221970355n is 2N is 4
40
K : 8
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T04:45:37.511


Rewards of (n, N)-policy is-49504.92192684716n is 2N is 4
40
K : 8
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T04:51:59.286


Rewards of (n, N)-policy is-52420.84665287589n is 2N is 4
40
K : 10
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T04:58:21.695


Rewards of (n, N)-policy is-35235.99637389476n is 2N is 4
40
K : 10
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T05:04:43.403


Rewards of (n, N)-policy is-38086.70812139358n is 2N is 4
40
K : 10
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T05:11:06.860


Rewards of (n, N)-policy is-40945.36461236578n is 2N is 4
40
K : 10
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T05:17:31.879


Rewards of (n, N)-policy is-43814.99198322247n is 2N is 4
40
K : 10
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T05:23:58.271


Rewards of (n, N)-policy is-46725.480059341964n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T05:30:21.270


40
K : 10
[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-49566.18757465589n is 2N is 4
40
K : 10
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T05:36:43.161


Rewards of (n, N)-policy is-52376.02122916458n is 2N is 4
40
K : 12
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T05:43:05.636


Rewards of (n, N)-policy is-35267.50743281643n is 2N is 4
40
K : 12
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T05:49:29.606


Rewards of (n, N)-policy is-38125.65392782211n is 2N is 4
40
K : 12
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T05:55:49.882


Rewards of (n, N)-policy is-40978.84391594776n is 2N is 4
40
K : 12
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T06:02:11.856


Rewards of (n, N)-policy is-43789.5723125143n is 2N is 4
40
K : 12
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T06:08:36.635


Rewards of (n, N)-policy is-46697.72015669393n is 2N is 4
40
K : 12
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T06:14:58.392


Rewards of (n, N)-policy is-49553.22170195042n is 2N is 4
40
K : 12
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T06:21:20.380


Rewards of (n, N)-policy is-52412.51565554525n is 2N is 4
40
K : 14
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T06:27:45.193


Rewards of (n, N)-policy is-35220.02735041083n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T06:34:06.154


40
K : 14
[0 -1200 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-38101.51854391168n is 2N is 4
40
K : 14
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T06:40:28.750


Rewards of (n, N)-policy is-40973.1224596733n is 2N is 4
40
K : 14
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T06:46:52.510


Rewards of (n, N)-policy is-43851.20992140075n is 2N is 4
40
K : 14
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T06:53:15.945


Rewards of (n, N)-policy is-46642.604852837256n is 2N is 4
40
K : 14
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T06:59:40.725


Rewards of (n, N)-policy is-49531.80980200062n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T07:06:05.237


40
K : 14
[0 -3200 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-52356.88169757736n is 2N is 4
40
K : 16
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T07:12:26.844


Rewards of (n, N)-policy is-35275.71444585458n is 2N is 4
40
K : 16
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T07:18:49.054


Rewards of (n, N)-policy is-38070.75976544917n is 2N is 4
40
K : 16
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T07:25:13.112


Rewards of (n, N)-policy is-40938.334280689916n is 2N is 4
40
K : 16
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T07:31:35.322


Rewards of (n, N)-policy is-43856.839504365766n is 2N is 4
40
K : 16
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T07:37:58.947


Rewards of (n, N)-policy is-46620.87853761434n is 2N is 4
40
K : 16
[

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T07:44:23.777


0 -2800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-49567.12422518574n is 2N is 4
40
K : 16
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T07:50:46.102


Rewards of (n, N)-policy is-52370.15582529411n is 2N is 4
40
K : 18
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T07:57:08.285


Rewards of (n, N)-policy is-35250.62119114084n is 2N is 4
40
K : 18
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T08:03:33.952


Rewards of (n, N)-policy is-38095.4039665465n is 2N is 4
40
K : 18
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T08:09:54.186


Rewards of (n, N)-policy is-40950.2155307486n is 2N is 4
40
K : 18

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T08:16:11.775



[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-43770.05357948082n is 2N is 4
40
K : 18
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T08:22:31.644


Rewards of (n, N)-policy is-46738.18506143616n is 2N is 4
40
K : 18
[0

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T08:28:49.294


 -2800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-49538.314901121244n is 2N is 4
40
K : 18
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T08:35:08.071


Rewards of (n, N)-policy is-52475.153383171404n is 2N is 4
40
K : 20
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T08:41:23.802


Rewards of (n, N)-policy is-35268.548488960114n is 2N is 4
40
K : 20
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T08:47:41.796


Rewards of (n, N)-policy is-38068.11633702921n is 2N is 4
40
K : 20
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T08:53:58.598


Rewards of (n, N)-policy is-40978.31864505618n is 2N is 4
40
K : 20
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T09:00:17.956


Rewards of (n, N)-policy is-43832.71447499001n is 2N is 4
40
K : 20
[0 -2400 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T09:06:34.771


-100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-46617.42204262789n is 2N is 4
40
K : 20
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T09:12:54.133


Rewards of (n, N)-policy is-49527.78115267038n is 2N is 4
40
K : 20
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T09:19:11.504


Rewards of (n, N)-policy is-52423.10417778653n is 2N is 4
50
K : 2
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T09:25:39.053


Rewards of (n, N)-policy is-46055.49401713187n is 2N is 3
50
K : 2
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T09:33:24.714


Rewards of (n, N)-policy is-52456.05370161173n is 2N is 3
50
K : 2
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T09:41:11.153


Rewards of (n, N)-policy is-56370.239203890036n is 2N is 4
50
K : 2
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T09:48:58.920


Rewards of (n, N)-policy is-59383.27693986118n is 2N is 4
50
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T09:56:47.069


Rewards of (n, N)-policy is-62394.15450918624n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T10:04:35.506


50
K : 2
[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-65425.107272092464n is 2N is 4
50
K : 2
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T10:12:25.416


Rewards of (n, N)-policy is-68447.38786255216n is 2N is 4
50
K : 4
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T10:20:13.651


Rewards of (n, N)-policy is-44178.54760366492n is 3N is 3
50
K : 4
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T10:28:04.823


Rewards of (n, N)-policy is-47425.28829749701n is 2N is 4
50
K : 4
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T10:35:53.988


Rewards of (n, N)-policy is-50501.06341468963n is 2N is 4
50

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T10:43:52.032



K : 4
[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-53590.945521333015n is 2N is 4
50
K : 4
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T10:51:41.414


Rewards of (n, N)-policy is-56529.44544173359n is 2N is 4
50
K : 4
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T10:59:29.491


Rewards of (n, N)-policy is-59555.902689697n is 2N is 4
50
K : 4
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T11:07:17.586


Rewards of (n, N)-policy is-62597.66782237768n is 2N is 4
50
K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T11:15:09.858


Rewards of (n, N)-policy is-42931.06241330407n is 2N is 4
50
K : 6
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T11:23:02.412


Rewards of (n, N)-policy is-45939.70219277066n is 2N is 4
50


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T11:30:50.519


K : 6
[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-48965.95826439059n is 2N is 4
50
K : 6
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T11:38:39.433


Rewards of (n, N)-policy is-52002.620960611115n is 2N is 4
50
K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T11:46:28.754


Rewards of (n, N)-policy is-55046.67477133472n is 2N is 4
50
K : 6
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T11:54:18.127


Rewards of (n, N)-policy is-58037.24159198854n is 2N is 4
50
K : 6
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T12:02:04.921


Rewards of (n, N)-policy is-61064.35210688038n is 2N is 4
50
K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T12:09:56.542


Rewards of (n, N)-policy is-42717.564409228195n is 2N is 4
50
K : 8
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T12:17:46.598


Rewards of (n, N)-policy is-45725.97067522045n is 2N is 4
50
K : 8

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T12:25:41.094



[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-48770.99109334835n is 2N is 4
50
K : 8
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T12:33:29.092


Rewards of (n, N)-policy is-51774.70391492598n is 2N is 4
50
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T12:41:19.958


Rewards of (n, N)-policy is-54811.940285275814n is 2N is 4
50
K : 8
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T12:49:10.133


Rewards of (n, N)-policy is-57782.63983514192n is 2N is 4
50
K : 8
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T12:57:00.872


Rewards of (n, N)-policy is-60801.76658176968n is 2N is 4
50
K : 10
[

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T13:05:00.966


0 -800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-42679.72010684291n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T13:12:50.772


50
K : 10
[0 -1200 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-45712.647380623246n is 2N is 4
50
K : 10
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T13:20:41.034


Rewards of (n, N)-policy is-48756.875203175594n is 2N is 4
50
K : 10
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T13:28:30.025


Rewards of (n, N)-policy is-51720.303586595895n is 2N is 4
50
K : 10
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T13:36:18.958


Rewards of (n, N)-policy is-54714.652036094965n is 2N is 4
50
K : 10
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T13:44:09.416


Rewards of (n, N)-policy is-57832.505605520106n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T13:51:58.997


50
K : 10
[0 -3200 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-60823.73041396954n is 2N is 4
50
K : 12
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T13:59:48.973


Rewards of (n, N)-policy is-42678.98866601176n is 2N is 4
50
K : 12
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T14:07:39.547


Rewards of (n, N)-policy is-45694.496452050545n is 2N is 4
50
K : 12
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T14:15:29.850


Rewards of (n, N)-policy is-48802.41394379245n is 2N is 4
50
K : 12
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T14:23:20.039


Rewards of (n, N)-policy is-51748.632846312656n is 2N is 4
50
K : 12
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T14:31:10.893


Rewards of (n, N)-policy is-54742.63748348353n is 2N is 4
50
K : 12
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T14:39:06.513


Rewards of (n, N)-policy is-57769.130659673276n is 2N is 4
50
K : 12
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T14:46:57.910


Rewards of (n, N)-policy is-60731.43522590065n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T14:54:46.224


50
K : 14
[0 -800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-42698.74741724173n is 2N is 4
50
K : 14
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T15:02:36.107


Rewards of (n, N)-policy is-45700.75206975512n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T15:10:25.836


50
K : 14
[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-48748.36746430757n is 2N is 4
50
K : 14
[

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T15:18:15.062


0 -2000 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-51744.10408394193n is 2N is 4
50
K : 14
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T15:26:06.043


Rewards of (n, N)-policy is-54753.22638009317n is 2N is 4
50
K : 14
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T15:33:57.872


Rewards of (n, N)-policy is-57777.804274851704n is 2N is 4
50
K : 14
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T15:41:49.110


Rewards of (n, N)-policy is-60776.31201581386n is 2N is 4
50
K : 16


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T15:49:38.481


[0 -800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-42698.66260886712n is 2N is 4
50
K : 16
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T15:57:30.756


Rewards of (n, N)-policy is-45646.46055448337n is 2N is 4
50
K : 16
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T16:05:22.217


Rewards of (n, N)-policy is-48690.471603638885n is 2N is 4
50
K : 16
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T16:13:14.847


Rewards of (n, N)-policy is-51738.31161687499n is 2N is 4
50
K : 16
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T16:21:07.038


Rewards of (n, N)-policy is-54778.893056194545n is 2N is 4
50
K : 16
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T16:29:01.784


Rewards of (n, N)-policy is-57759.477517370186n is 2N is 4
50
K : 16
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T16:36:54.556


Rewards of (n, N)-policy is-60708.41774261713n is 2N is 4
50
K : 18
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T16:44:46.141


Rewards of (n, N)-policy is-42658.87772728358n is 2N is 4
50
K : 18
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T16:52:37.118


Rewards of (n, N)-policy is-45713.09115025732n is 2N is 4
50
K : 18
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T17:00:28.917


Rewards of (n, N)-policy is-48707.001606767335n is 2N is 4
50
K : 18
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T17:08:21.462


Rewards of (n, N)-policy is-51728.67875574198n is 2N is 4
50
K : 18
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T17:16:13.785


Rewards of (n, N)-policy is-54720.62911680856n is 2N is 4
50
K : 18
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T17:24:03.854


Rewards of (n, N)-policy is-57729.103626212476n is 2N is 4
50
K : 18
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T17:31:55.446


Rewards of (n, N)-policy is-60753.51015289426n is 2N is 4
50
K : 20
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T17:39:46.236


Rewards of (n, N)-policy is-42660.3305139461n is 2N is 4
50
K : 20
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T17:47:38.586


Rewards of (n, N)-policy is-45711.32351271289n is 2N is 4
50

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T17:55:29.905



K : 20
[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-48769.81864549036n is 2N is 4
50
K : 20
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T18:03:22.565


Rewards of (n, N)-policy is-51686.23733094986n is 2N is 4
50
K : 20
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T18:11:15.275


Rewards of (n, N)-policy is-54788.81220268284n is 2N is 4
50
K : 20
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T18:19:07.262


Rewards of (n, N)-policy is-57787.34387331531n is 2N is 4
50
K : 20
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T18:26:59.750


Rewards of (n, N)-policy is-60788.54202675194n is 2N is 4
50
K : 22
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T18:34:51.935


Rewards of (n, N)-policy is-42733.12580565775n is 2N is 4
50
K : 22
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T18:42:45.347


Rewards of (n, N)-policy is-45704.41866566759n is 2N is 4
50
K : 22
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T18:50:38.513


Rewards of (n, N)-policy is-48704.45674623376n is 2N is 4
50
K : 22
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T18:58:31.470


Rewards of (n, N)-policy is-51717.14180736941n is 2N is 4
50
K : 22
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T19:06:23.809


Rewards of (n, N)-policy is-54739.751792812756n is 2N is 4
50
K : 22
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T19:14:18.542


Rewards of (n, N)-policy is-57781.820660178426n is 2N is 4
50
K : 22
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T19:22:10.850


Rewards of (n, N)-policy is-60789.672562419226n is 2N is 4
50
K : 24
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T19:30:03.805


Rewards of (n, N)-policy is-42667.26347668985n is 2N is 4
50
K : 24
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T19:37:58.184


Rewards of (n, N)-policy is-45717.65060523212n is 2N is 4
50
K : 24
[0 -1600 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T19:45:50.470


-100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-48711.475691356434n is 2N is 4
50
K : 24
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T19:53:43.105


Rewards of (n, N)-policy is-51720.13953490012n is 2N is 4
50
K : 24


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T20:01:36.265


[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-54751.16210498941n is 2N is 4
50
K : 24
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T20:09:32.221


Rewards of (n, N)-policy is-57779.8668248286n is 2N is 4
50
K : 24
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T20:17:25.938


Rewards of (n, N)-policy is-60804.36335185049n is 2N is 4
60
K : 2
[0 -800 -100 -1200 -2000]


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T20:25:19.117


Finding best nN policy...
Rewards of (n, N)-policy is-51603.05296795347n is 2N is 3
60
K : 2
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T20:34:48.645


Rewards of (n, N)-policy is-58114.20865518967n is 2N is 3
60
K : 2
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T20:44:16.720


Rewards of (n, N)-policy is-61728.62256138428n is 2N is 4
60
K : 2
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T20:53:46.736


Rewards of (n, N)-policy is-64877.74241882918n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T21:03:17.236


60
K : 2
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-67918.78824893899n is 2N is 4
60
K : 2
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T21:12:48.171


Rewards of (n, N)-policy is-70989.24158024088n is 2N is 4
60
K : 2
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T21:22:19.432


Rewards of (n, N)-policy is-74099.71929157373n is 2N is 4
60

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T21:31:47.695



K : 4
[0 -800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-48424.66380309424n is 3N is 3
60
K : 4
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T21:41:29.395


Rewards of (n, N)-policy is-52785.52071422358n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T21:50:57.627


60
K : 4
[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-55824.664646242716n is 2N is 4
60
K : 4
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T22:00:25.721


Rewards of (n, N)-policy is-58909.56545025532n is 2N is 4
60
K : 4
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T22:09:56.500


Rewards of (n, N)-policy is-61992.52862539618n is 2N is 4
60
K : 4
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T22:19:26.661


Rewards of (n, N)-policy is-65053.03216576126n is 2N is 4
60
K : 4
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T22:28:51.691


Rewards of (n, N)-policy is-68162.82360315716n is 2N is 4
60
K : 6
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T22:38:18.969


Rewards of (n, N)-policy is-47879.011635426985n is 3N is 3
60
K : 6
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T22:47:45.739


Rewards of (n, N)-policy is-51012.600969362524n is 2N is 4
60
K : 6
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T22:57:09.011


Rewards of (n, N)-policy is-54086.109871094n is 2N is 4
60
K : 6
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T23:06:31.392


Rewards of (n, N)-policy is-57201.63863780434n is 2N is 4
60
K : 6
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T23:15:55.239


Rewards of (n, N)-policy is-60272.966623510765n is 2N is 4
60

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T23:25:19.136



K : 6
[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-63361.495186044696n is 2N is 4
60
K : 6
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T23:34:42.937


Rewards of (n, N)-policy is-66489.77097754658n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T23:44:04.670


60
K : 8
[0 -800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-47592.196503705476n is 2N is 4
60
K : 8
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-09T23:53:27.519


Rewards of (n, N)-policy is-50676.0654143163n is 2N is 4
60
K : 8
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T00:02:49.679


Rewards of (n, N)-policy is-53733.50730014921n is 2N is 4
60
K : 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T00:12:11.420


8
[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-56839.42935067094n is 2N is 4
60
K : 8
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T00:21:32.959


Rewards of (n, N)-policy is-59932.907365299296n is 2N is 4
60
K : 8


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T00:30:57.869


[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-63031.802270361484n is 2N is 4
60
K : 8
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T00:40:17.732


Rewards of (n, N)-policy is-66107.61413133299n is 2N is 4
60
K : 10
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T00:49:39.356


Rewards of (n, N)-policy is-47518.38216124221n is 2N is 4
60


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T00:59:01.777


K : 10
[0 -1200 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-50643.30057669203n is 2N is 4
60
K : 10
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T01:08:22.764


Rewards of (n, N)-policy is-53701.605103645656n is 2N is 4
60
K : 10


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T01:17:44.534


[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-56786.720483905396n is 2N is 4
60
K : 10
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T01:27:05.900


Rewards of (n, N)-policy is-59894.72880967363n is 2N is 4
60
K : 10
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T01:36:27.913


Rewards of (n, N)-policy is-62907.80482486408n is 2N is 4
60
K : 10
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T01:45:50.596


Rewards of (n, N)-policy is-66116.74448599428n is 2N is 4
60
K : 12
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T01:55:07.653


Rewards of (n, N)-policy is-47569.15583202507n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T02:04:30.634


60
K : 12
[0 -1200 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-50584.69890255607n is 2N is 4
60
K : 12
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T02:13:52.783


Rewards of (n, N)-policy is-53710.08559794011n is 2N is 4
60
K : 12
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T02:23:14.861


Rewards of (n, N)-policy is-56841.02102335279n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T02:32:36.831


60
K : 12
[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-59885.42579640202n is 2N is 4
60
K : 12
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T02:41:59.642


Rewards of (n, N)-policy is-62966.790570261845n is 2N is 4
60
K : 12
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T02:51:22.160


Rewards of (n, N)-policy is-66073.2201208162n is 2N is 4
60
K : 14
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T03:00:43.353


Rewards of (n, N)-policy is-47518.381448502514n is 2N is 4
60
K : 14
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T03:10:05.765


Rewards of (n, N)-policy is-50573.15229334654n is 2N is 4
60
K : 14
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T03:19:28.348


Rewards of (n, N)-policy is-53704.35332834915n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T03:28:50.402


60
K : 14
[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-56817.45151930538n is 2N is 4
60
K : 14
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T03:38:14.291


Rewards of (n, N)-policy is-59832.27678899083n is 2N is 4
60
K : 14
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T03:47:39.086


Rewards of (n, N)-policy is-62969.31300513489n is 2N is 4
60
K : 14
[0 

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T03:57:00.579


-3200 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-66057.66807431342n is 2N is 4
60
K : 16
[0 -800 -100 -1200 -2000]
Finding best nN policy...

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T04:06:23.435



Rewards of (n, N)-policy is-47520.65259604906n is 2N is 4
60
K : 16
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T04:15:44.872


Rewards of (n, N)-policy is-50650.30237128614n is 2N is 4
60


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T04:25:08.693


K : 16
[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-53697.16091901975n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T04:34:32.345


60
K : 16
[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-56783.428781160066n is 2N is 4
60
K : 16
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T04:43:54.160


Rewards of (n, N)-policy is-59814.57793556809n is 2N is 4
60
K : 16
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T04:53:12.991


Rewards of (n, N)-policy is-63000.98345181241n is 2N is 4
60
K : 16
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T05:02:36.120


Rewards of (n, N)-policy is-66080.99036949196n is 2N is 4
60
K : 18
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T05:11:58.399


Rewards of (n, N)-policy is-47482.57254058331n is 2N is 4
60
K : 18
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T05:21:21.017


Rewards of (n, N)-policy is-50640.020628450075n is 2N is 4
60
K : 18
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T05:30:44.525


Rewards of (n, N)-policy is-53671.18016678259n is 2N is 4
60
K : 18
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T05:40:07.984


Rewards of (n, N)-policy is-56749.731997339106n is 2N is 4
60
K : 18
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T05:49:30.735


Rewards of (n, N)-policy is-59889.61450730474n is 2N is 4
60
K : 18
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T05:59:01.915


Rewards of (n, N)-policy is-62971.419945723486n is 2N is 4
60
K : 18
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T06:08:26.026


Rewards of (n, N)-policy is-66071.98868310258n is 2N is 4
60
K : 20
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T06:17:48.617


Rewards of (n, N)-policy is-47530.68400193292n is 2N is 4
60
K : 20
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T06:27:12.683


Rewards of (n, N)-policy is-50634.33454019018n is 2N is 4
60
K : 20

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T06:36:34.912



[0 -1600 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-53700.52560511668n is 2N is 4
60
K : 20
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T06:45:56.508


Rewards of (n, N)-policy is-56797.42650698521n is 2N is 4
60
K : 20
[0 -2400 -100 -1200 -2000]


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T06:55:21.141


Finding best nN policy...
Rewards of (n, N)-policy is-59893.289544324114n is 2N is 4
60
K : 20
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T07:04:44.304


Rewards of (n, N)-policy is-62979.32572208823n is 2N is 4
60

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T07:14:06.186



K : 20
[0 -3200 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-66115.64028407037n is 2N is 4
60
K : 22
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T07:23:25.899


Rewards of (n, N)-policy is-47509.66773387049n is 2N is 4
60
K : 22
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T07:32:48.153


Rewards of (n, N)-policy is-50600.05385374089n is 2N is 4
60
K : 22
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T07:42:11.185


Rewards of (n, N)-policy is-53697.78045756221n is 2N is 4
60
K : 22
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T07:51:33.160


Rewards of (n, N)-policy is-56791.80768854896n is 2N is 4
60
K : 22
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T08:00:52.775


Rewards of (n, N)-policy is-59855.94118459934n is 2N is 4
60
K : 22
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T08:10:23.926


Rewards of (n, N)-policy is-62931.120604812095n is 2N is 4
60
K : 22
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T08:19:45.215


Rewards of (n, N)-policy is-66105.7638073853n is 2N is 4
60
K : 24
[0 -800 -100 -1200 -2000]


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T08:29:06.966


Finding best nN policy...
Rewards of (n, N)-policy is-47520.07870335885n is 2N is 4
60
K : 24
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T08:38:29.910


Rewards of (n, N)-policy is-50602.33406249429n is 2N is 4
60
K : 24
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T08:47:52.025


Rewards of (n, N)-policy is-53721.136809747935n is 2N is 4
60
K : 24
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T08:57:14.484


Rewards of (n, N)-policy is-56798.29061629709n is 2N is 4
60
K : 24
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T09:06:34.928


Rewards of (n, N)-policy is-59870.240111100575n is 2N is 4


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T09:15:56.066


60
K : 24
[0 -2800 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-62979.04107405667n is 2N is 4
60
K : 24
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T09:25:17.791


Rewards of (n, N)-policy is-66069.50041758988n is 2N is 4
60
K : 26
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T09:34:48.001


Rewards of (n, N)-policy is-47525.15709749799n is 2N is 4
60
K : 26
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T09:44:07.592


Rewards of (n, N)-policy is-50586.45607179509n is 2N is 4
60
K : 26
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T09:53:27.640


Rewards of (n, N)-policy is-53678.72926071495n is 2N is 4
60
K : 26
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T10:02:46.313


Rewards of (n, N)-policy is-56833.43419033358n is 2N is 4
60
K : 26


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T10:12:06.584


[0 -2400 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-59846.27435060768n is 2N is 4
60
K : 26
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T10:21:25.903


Rewards of (n, N)-policy is-62968.76860204716n is 2N is 4
60
K : 26
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T10:30:46.552


Rewards of (n, N)-policy is-66061.08521506873n is 2N is 4
60
K : 28
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T10:40:07.414


Rewards of (n, N)-policy is-47492.017577205355n is 2N is 4
60
K : 28
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T10:49:28.672


Rewards of (n, N)-policy is-50627.47911215162n is 2N is 4
60
K : 28
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T10:58:47.161


Rewards of (n, N)-policy is-53681.90869400804n is 2N is 4
60
K : 28
[0 -2000 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T11:08:07.597


Rewards of (n, N)-policy is-56836.24374653869n is 2N is 4
60
K : 28
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T11:17:27.647


Rewards of (n, N)-policy is-59867.8699334303n is 2N is 4
60
K : 28
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T11:26:46.689


Rewards of (n, N)-policy is-62966.1499754782n is 2N is 4
60
K : 28
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T11:36:06.344


Rewards of (n, N)-policy is-66061.63944361261n is 2N is 4
60
K : 30
[0 -800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T11:45:27.395


Rewards of (n, N)-policy is-47514.10269395505n is 2N is 4
60
K : 30
[0 -1200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T11:54:47.758


Rewards of (n, N)-policy is-50680.890571287855n is 2N is 4
60
K : 30
[0 -1600 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T12:04:08.922


Rewards of (n, N)-policy is-53703.835572793745n is 2N is 4
60
K : 30


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T12:13:28.666


[0 -2000 -100 -1200 -2000]
Finding best nN policy...
Rewards of (n, N)-policy is-56798.62774525418n is 2N is 4
60
K : 30
[0 -2400 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T12:22:50.824


Rewards of (n, N)-policy is-59879.03215076481n is 2N is 4
60
K : 30
[0 -2800 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T12:32:10.038


Rewards of (n, N)-policy is-62968.265890700626n is 2N is 4
60
K : 30
[0 -3200 -100 -1200 -2000]
Finding best nN policy...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m started timer at: 2025-03-10T12:41:30.275


Rewards of (n, N)-policy is-66054.62431604795n is 2N is 4


In [85]:
df

Row,u,K,n,s,m,f,p,mean,std,nN
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any
1,60,30,0,-3200,-100,-1200,-2000,-66054.6,3258.32,"CartesianIndex(2, 4)"
2,60,30,0,-2800,-100,-1200,-2000,-62968.3,3134.34,"CartesianIndex(2, 4)"
3,60,30,0,-2400,-100,-1200,-2000,-59879.0,2990.57,"CartesianIndex(2, 4)"
4,60,30,0,-2000,-100,-1200,-2000,-56798.6,2908.56,"CartesianIndex(2, 4)"
5,60,30,0,-1600,-100,-1200,-2000,-53703.8,2808.01,"CartesianIndex(2, 4)"
6,60,30,0,-1200,-100,-1200,-2000,-50680.9,2677.21,"CartesianIndex(2, 4)"
7,60,30,0,-800,-100,-1200,-2000,-47514.1,2592.19,"CartesianIndex(2, 4)"
8,60,28,0,-3200,-100,-1200,-2000,-66061.6,3340.46,"CartesianIndex(2, 4)"
9,60,28,0,-2800,-100,-1200,-2000,-62966.1,3169.47,"CartesianIndex(2, 4)"
10,60,28,0,-2400,-100,-1200,-2000,-59867.9,2993.96,"CartesianIndex(2, 4)"


In [86]:
using CSV
CSV.write("Hetero (n,N)-policy.csv", df)

"Hetero (n,N)-policy.csv"