In [42]:
import pandas as pd
import datetime
import sys
import keras as ks
import os
import numpy as np
try:
    from .game_v2 import *
except (ModuleNotFoundError if sys.version_info >= (3, 6) else SystemError) as e:
    from game_v2 import *


In [43]:
df = pd.read_pickle("local_result/greedy_playables_1000rounds_20190305110518.pkl")

In [44]:
def dqn_get_play(model, classmap, playable_cards, **info):
    # =============
    # preprocessing
    # =============
    # make model input from the given parameters
    state = np.zeros(state_space_dim)

    # play state(dim=22)
    play_state = info.get("play_state", None)
    state[0] = play_state["to_draw"]  # to draw(dim=1),        #0
    state[play_state["color"].value] = 1  # color(dim=4),      #1 - #4
    state[play_state["value"] + 6] = 1  # value(dim=11),       #5 - #15
    state[play_state["type"].value + 16] = 1  # type(dim=6),   #16 - #21

    # flow state(dim=2): clockwise(dim=2)
    # state[int(info.get("clockwise", None)) + 22] = 1  # #22 - #23
    clockwise = info.get("clockwise", None)
    if clockwise is not None:
        state[int(clockwise) + 22] = 1

    # player state(dim=110): cards(dim=54) in hand, number of them(dim=1) and valid actions can play(dim=55)
    player = info.get("current_player", None)
    state[24] = info.get("num_cards_left", None)  # #24
    for card in player.cards:                     # #25 - #78
        state[action_invmap[card.short_name] + 25] += 1

    if len(playable_cards) == 0:
        state[133] = 1
    else:
        for i, card in playable_cards:
            state[action_invmap[card.short_name] + 79] += 1  # #79 - #132

    # other player state(dim=1): #cards in each other player's hand
    state[134] = info.get("next_player", None).num_cards

    state = np.reshape(state, (1, -1))

    # ================
    # model prediction
    # ================
    q_value = model.predict(state)
    action_id = np.argmax(q_value[0])

    # ==============
    # postprocessing
    # ==============
    action_name = classmap[action_id]  # action_map
    play = None
    for i, card in playable_cards:
        card_short_name = card.short_name
        if card_short_name == action_name:
            play = i, card

    return play

In [45]:
state_space_dim = 135
unique_cards = make_standard_unique_deck()
action_names = [card.short_name for card in unique_cards] + [None]
action_space_dim = len(action_names)
action_space = list(range(action_space_dim))
action_map = dict(zip(action_space, action_names))  # int -> card/None
action_invmap = dict(zip(action_names, action_space))  # card/None -> int

In [46]:
def init_model(model, classmap):
    if isinstance(model, ks.models.Model):
        return model
    elif isinstance(model, str):
        assert os.path.exists(model)
        return ks.models.load_model(model)
        assert isinstance(self.model, ks.models.Model)
    else:
        raise Exception("Unknown model for LRPolicy")

In [47]:
agent_model = init_model("env-v6-dqn-002-local.h5", action_map)



In [48]:
# greedy vs dqn - 1000 rounds
same_counter = 0
for row_idx in range(len(df)):
    greedy_chosen = df.loc[row_idx][6]
#     print("greedy played", df.loc[row_idx][6])
    agent_chosen = dqn_get_play(agent_model, action_map, df.loc[row_idx][5], play_state=df.loc[row_idx][0], clockwise=df.loc[row_idx][1], 
            current_player=df.loc[row_idx][2], num_cards_left=df.loc[row_idx][3], next_player=df.loc[row_idx][4])
#     print("agent played", agent_chosen)
#     print("*"*80)
    if(greedy_chosen == agent_chosen):
        same_counter += 1
print("same_choice {} / total {} = {}".format(same_counter, len(df), same_counter / len(df)))

same_choice 19290 / total 30843 = 0.6254255422624259


In [None]:
# return random.Random(num_rounds_played).choice(playable_cards)

In [49]:
import random
random_same_counter = 0
for row_idx in range(len(df)):
    greedy_chosen = df.loc[row_idx][6]
    random_chosen = random.choice(df.loc[row_idx][5])
    if(greedy_chosen == random_chosen):
        random_same_counter += 1
print(" greedy vs random0 same_choice {} / total {} = {}".format(random_same_counter, len(df), random_same_counter / len(df)))


 greedy vs random0 same_choice 20057 / total 30843 = 0.6502934215219013


In [53]:
df_10000 = pd.read_pickle("local_result/greedy_playables_10000rounds_20190305024110.pkl")

In [54]:
len(df_10000)

300863

In [55]:
# greedy vs dqn - 10000 rounds
same_counter_10000 = 0
for row_idx in range(len(df_10000)):
    record = df_10000.loc[row_idx]
    greedy_chosen = record[6]
    agent_chosen = dqn_get_play(agent_model, action_map, record[5], play_state=record[0], clockwise=record[1], 
            current_player=record[2], num_cards_left=record[3], next_player=record[4])
    if(greedy_chosen == agent_chosen):
        same_counter_10000 += 1
print("greedy vs dqn 10000 rounds same_choice {} / total {} = {}".format(same_counter_10000, len(df_10000), same_counter_10000 / len(df_10000)))


greedy vs dqn 10000 rounds same_choice 188677 / total 300863 = 0.62711932008921


In [56]:
def calc_random0_greedy_same_rate(stored_df):
    counter = 0
    total_num = len(stored_df)
    for row_idx in range(total_num):
        greedy_chosen = stored_df.loc[row_idx][6]
        random_chosen = random.choice(stored_df.loc[row_idx][5])
        if(greedy_chosen == random_chosen):
            counter += 1
    print("greedy vs random0 same choice {} / total {} = {}".format(counter, total_num, counter / total_num))


In [57]:
calc_random0_greedy_same_rate(df_10000)

greedy vs random0 same choice 196017 / total 300863 = 0.6515158061975052
