In [None]:
import os
import sys
import numpy as np
import time
import argparse
import logging
import function_board as fb
import function_tool as ft
import cProfile
import function_get_aiming_grid
from function_solve_dp_withturn import solve_dp_withturn_valueiteration, setup_logging, log_system_info, \
    solve_dp_withturn_policyiteration, solve_dp_withturn_valueiteration_gpu
np.set_printoptions(precision=4)
np.set_printoptions(linewidth=300)
np.set_printoptions(threshold=300)
function_map = {
    ('value', True): solve_dp_withturn_valueiteration_gpu,
    ('value', False): solve_dp_withturn_valueiteration,
    ('policy', True): solve_dp_withturn_policyiteration,
    ('policy', False): solve_dp_withturn_policyiteration,
}
result_dir = '../HW_result'
if not os.path.isdir(result_dir):
    os.makedirs(result_dir)

In [None]:
iter_limit = 1000
iter_error = 1e-4
playerID_list = [7,11]

# ValueIteration

In [None]:
method = 'value'
gpu = False
for playerID in playerID_list:
    logger = setup_logging(method, result_dir, playerID)
    log_system_info(logger)
    name_pa = 'player{}'.format(playerID)
    [aiming_grid, prob_grid_normalscore, prob_grid_singlescore, prob_grid_doublescore, prob_grid_triplescore,
     prob_grid_bullscore] = function_get_aiming_grid.load_aiming_grid(name_pa)

    t1 = time.time()
    V, optimal_action_index = function_map[(method, gpu)](aiming_grid, prob_grid_normalscore,
                                                          prob_grid_doublescore,
                                                          prob_grid_bullscore, iter_limit=iter_limit,
                                                          iter_error=iter_error, logger=logger)
    t2 = time.time()

    print(f'\nsolve dp_withturn in {t2 - t1} seconds for player{playerID}')
    np.save(os.path.join(result_dir, f'player{playerID}_valueiter_V.npy'), V)
    np.save(os.path.join(result_dir, f'player{playerID}_valueiter_Mu.npy'), optimal_action_index)
    logger.info(f'Saved V matrix and optimal action index for player {playerID}')
    logger.info(f'Total time for value iteration: {t2 - t1:.4f} seconds.')
    print('\n')

# PolicyIteration

In [None]:
method = 'policy'
for playerID in playerID_list:
    logger = setup_logging(method, result_dir, playerID)
    log_system_info(logger)
    name_pa = 'player{}'.format(playerID)
    [aiming_grid, prob_grid_normalscore, prob_grid_singlescore, prob_grid_doublescore, prob_grid_triplescore,
     prob_grid_bullscore] = function_get_aiming_grid.load_aiming_grid(name_pa)

    t1 = time.time()
    if method == 'value':
        V, optimal_action_index = solve_dp_withturn_valueiteration(aiming_grid, prob_grid_normalscore,
                                                                   prob_grid_doublescore,
                                                                   prob_grid_bullscore, iter_limit=iter_limit,
                                                                   iter_error=iter_error, logger=logger)
    elif method == 'policy':
        V, optimal_action_index = solve_dp_withturn_policyiteration(aiming_grid, prob_grid_normalscore,
                                                                    prob_grid_doublescore,
                                                                    prob_grid_bullscore, iter_limit=iter_limit,
                                                                    iter_error=iter_error, logger=logger)
    else:
        V, optimal_action_index = np.ndarray()
    t2 = time.time()

    print(f'\nsolve dp_withturn in {t2 - t1} seconds for player{playerID}')
    np.save(os.path.join(result_dir, f'player{playerID}_valueiter_V.npy'), V)
    np.save(os.path.join(result_dir, f'player{playerID}_valueiter_Mu.npy'), optimal_action_index)
    logger.info(f'Saved V matrix and optimal action index for player {playerID}')
    logger.info(f'Total time for value iteration: {t2 - t1:.4f} seconds.')
    print('\n')