In [1]:
import sys
sys.path.insert(0, 'env')
sys.path.insert(0, 'lib')

import ccg
from agents import ARagent, A2Cagent, SkipAgent
import pandas as pd
import numpy as np
import copy
import random
import torch
import utils
seed = 1231432

np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)

cards = pd.read_csv('env/configue/cardsTable.csv')
cardsList = [ccg.Minion(i) for i in cards.values.tolist()]

cores = pd.read_csv('env/configue/cores.csv')
coreList = [ccg.Core(i) for i in cores.values.tolist()]

playersNum = 2
piles_player = [ccg.Pile(cardsList, 10) for _ in range(4)]
piles = [copy.deepcopy(piles_player) for _ in range(playersNum)]
cores = np.random.choice(coreList, 2)
decks = [ccg.Deck(cores[i], piles[i], i) for i in range(playersNum)]

In [2]:
cards

Unnamed: 0,keyName,type,damage,armour,maxHealth,cost,priority,charge,baseActivations
0,HS_PUDDLESTOMPER,minion,3,0,2,2,0,0,1
1,HS_RIVER_CROCOLISK,minion,2,0,3,2,0,0,1
2,HS_THRALLMAR_FARSEER,minion,2,0,3,3,0,0,2
3,HS_DUSKBOAR,minion,4,0,1,2,0,0,1


In [3]:
session = ccg.Session(cardsList, coreList, 2)

In [4]:
from __future__ import print_function
print(torch.__version__)

0.4.1


In [5]:
from trainer import Trainer
from agents import ARagent, A2Cagent, Root
from nets import ActorNetwork, ValueNetwork
from replays import FlatReplay, PrioritizedReplay

root = Root(session)

In [None]:
def plan_mcts(root,n_iters=10):
    """
    builds tree with monte-carlo tree search for n_iters iterations
    :param root: tree node to plan from
    :param n_iters: how many select-expand-simulate-propagete loops to make
    """
    for _ in range(n_iters):

        node = root.select_best_leaf()

        if node.is_done:
            node.propagate(0)

        else:
            node.expand()
            rew = node.rollout()
            node.propagate(rew)
    
plan_mcts(root,n_iters=1000)

In [None]:
from IPython.display import clear_output
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

wins = dict()
wins[-1] = 0
wins[0] = 0
wins[1] = 0

health_adv_log = []
actions_num_log = []
turns_log = []
wins_log = []
entropy_log = []

actor_loss_log = []
value_loss_log = []

actor_loss_epoch = []
value_loss_epoch = []

print("Filling buffer")
for i in range(100):
    trainer.playGame(record = True, evaluate = False)
    trainer2.playGame(record = True, evaluate = False)
print("Buffer filled")       


In [None]:
import pickle
best_score = -60

for i in range(1000):
    for j in range(30):
        trainer.playSteps(10, replay_id = str(i)+" "+str(j)+"1")
        trainer2.playSteps(10, replay_id = str(i)+" "+str(j)+"2")
    observation, _, _ = session.processNewStateInfo()
    winner = -1
    if(observation["loser"] != -1):
        winner = 1 - observation["loser"]
    wins[winner] += 1
    
    actor_loss, value_loss = trainer.train()[0]
    actor_loss_epoch.append(actor_loss)
    value_loss_epoch.append(value_loss)
    
    if(i % 50 == 0):
        eval_trainer = copy.deepcopy(trainer)
        eval_trainer2 = copy.deepcopy(trainer2)
        clear_output()
        eval_games = 100
        mean_health_adv = 0
        mean_actions_num = 0
        mean_turns = 0
        mean_wins = 0
        mean_entropy = []
        
        for i in range(eval_games // 2):
            game_stat, entropy = eval_trainer.playGame(record = False, evaluate = True)
            adv, actions_num, turns, winner = game_stat
            mean_health_adv += adv[0]
            mean_actions_num += actions_num[0]
            mean_turns += turns
            mean_wins += winner[0]
            mean_entropy.extend(entropy[0])
        
        for i in range(eval_games // 2):
            game_stat, entropy = eval_trainer2.playGame(record = False, evaluate = True)
            adv, actions_num, turns, winner = game_stat
            mean_health_adv += adv[1]
            mean_actions_num += actions_num[1]
            mean_turns += turns
            mean_wins += winner[1]
            mean_entropy.extend(entropy[1])
            
            
        health_adv_log.append(mean_health_adv / eval_games)
        actions_num_log.append(mean_actions_num / eval_games)
        turns_log.append(mean_turns / eval_games)
        wins_log.append(mean_wins / eval_games)
        entropy_log.append(np.mean(mean_entropy))
        
        if mean_wins / eval_games > best_score:
            best_score = mean_wins / eval_games
            with open('best_model_no_en2.pickle', 'wb') as f:
                pickle.dump(a2cAgent, f)
                
        print(best_score)
        
        value_loss_log.append(np.mean(value_loss_epoch))
        actor_loss_log.append(np.mean(actor_loss_epoch))
        
        acotor_loss_epoch = []
        value_loss_epoch = []
        
        fig = plt.figure(figsize=(13, 13))

        plt.subplot(3, 3, 1)
        plt.plot(range(len(health_adv_log)), health_adv_log)
        plt.title("Health advantage")
        
        plt.subplot(3, 3, 2)
        plt.plot(range(len(actions_num_log)), actions_num_log)
        plt.title("Actions num")
        
        plt.subplot(3, 3, 3)
        plt.plot(range(len(turns_log)), turns_log)
        plt.title("Turns num")
        
        plt.subplot(3, 3, 4)
        plt.plot(range(len(wins_log)), wins_log)
        plt.title("Win rate")
        
        plt.subplot(3, 3, 5)
        plt.plot(range(len(actor_loss_log)), actor_loss_log)
        plt.title("Actor loss")
        
        plt.subplot(3, 3, 6)
        plt.plot(range(len(value_loss_log)), value_loss_log)
        plt.title("Critic loss")
        
        plt.subplot(3, 3, 7)
        plt.plot(range(len(entropy_log)), entropy_log)
        plt.title("Entropy")

        plt.show()