This notebook is created for discount factor tuning for per-decision option discounting in DDO-MCTS. It parses a file specified by the FILE_NAME variable and also takes in a gamma factor, where default gamma = 0.9.

It outputs a new gamma factor for each option individually, based on the average distance of each option.

In [38]:
import re
import numpy as np

FILE_NAME = 'more_options_durations.txt'
GAMMA = .9

In [39]:
def read_options_count_file(file):
    options_count = {}
    op_patt = r'(GoToNearestSpriteOfItypeOption)|(GoToMovableOption)|(GoToPositionOption)|(GoNearMovableOption)'
    step_patt = r' [0-9]+'
    with open(FILE_NAME) as fd:
        for line in fd:
            match = re.search(op_patt, line)
            if match is not None:
                option_name = match.group(0)
                match = re.search(step_patt, line)
                steps = int(match.group(0))
                if option_name in options_count:
                    (cum_steps, count) = options_count.get(option_name)
                    options_count[option_name] = (cum_steps + steps, count+1)
                else:
                    options_count[option_name] = (steps, 1)
    return options_count

def sort_options_by_avg(file):
    options_count = read_options_count_file(file)
    unsorted_avg = {}
    for option, (cum_steps, count) in options_count.items():
        unsorted_avg[option] = cum_steps/count
    
    sorted_avg = sorted(unsorted_avg.items(), key=lambda kv: kv[1])
    return sorted_avg

def normalized(a, axis=-1, order=2):
    l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
    l2[l2==0] = 1
    return a / np.expand_dims(l2, axis)

def calc_gamma(norm_vals, gamma):
    gammas = np.full(len(norm_vals), gamma)
    max_stretch = np.full(len(norm_vals), 1 - gamma)
    final_gammas = (max_stretch * norm_vals) + gammas
    return np.round_(final_gammas, decimals=2)

def print_gammas(gammas, sorted_avg):
    i = 0
    for (key, value) in sorted_avg:
        print(key + ': ' + str(gammas[i]))
        i+=1

def run(file, gamma):
    sorted_avg = sort_options_by_avg(file)
    sorted_values = np.array([value for (key, value) in sorted_avg])
    normalized_values = normalized(sorted_values)
    gammas = calc_gamma(normalized_values, gamma)
    print_gammas(gammas[0], sorted_avg)
    

In [40]:
# The new decision step discounts for each option where GAMMA_F = 0.9
run(FILE_NAME, GAMMA)

GoToNearestSpriteOfItypeOption: 0.91
GoToPositionOption: 0.94
GoToMovableOption: 0.95
GoNearMovableOption: 0.98


In [41]:
# The new decision step discounts for each option where GAMMA_F = 0.95
run(FILE_NAME, 0.95)

GoToNearestSpriteOfItypeOption: 0.95
GoToPositionOption: 0.97
GoToMovableOption: 0.97
GoNearMovableOption: 0.99
