In [11]:
# import library
import numpy as np
np.random.seed(678)

class Agent():
    
    def __init__(self,move_value):
        assert(move_value==1 or move_value==-1)
        self.move_value = move_value
        self.all_state = dict()
    
    def state(self,current_state):
        if str(current_state) in self.value:
            pass
        else: 
            self.value[str(current_state)] = 0

    def action(self,observation):
        
        if observation in self.all_state:
            action = self.all_state['next_move']
        return action

class Environment():
    
    def __init__(self):
        self.board = np.zeros((3,3))
    
    # Def: Check if there is any winner
    def check_winner(self):
        
        # ======== Check Winner By One Row ========         
        for row in self.board:
            if sum(row) == 3: return 1
            if sum(row) ==-3: return -1 
            
        for row in np.transpose(self.board):
            if sum(row) == 3: return 1
            if sum(row) ==-3: return -1 
        # ======== Check Winner By One Row ========         

                    
        # ======== Check Winner By Diag ========         
        if sum([self.board[i][i] for i in range(len(self.board))]) == 3:  return 1
        if sum([self.board[i][i] for i in range(len(self.board))]) == -3: return -1
        if sum([self.board[i][len(self.board)-i-1] for i in range(len(self.board))]) == 3: return  1       
        if sum([self.board[i][len(self.board)-i-1] for i in range(len(self.board))]) == -3:return -1  
        # ======== Check Winner By Diag ========         

        # ======== Check Draw ========         
        if abs(self.board).sum() == 9: return 0
        # ======== Check Draw ========         
            
    # Def: Check if there is a winner
    def check_if_there_is_winner(self): return self.check_winner()
    
    # change the borad position - location and value.      
    def make_a_move(self,position,value):
        assert (value == 1 or value == -1)
        assert (self.board.ravel()[position] == 0)
        self.board.ravel()[position] = value
    
    # Reset the board back the starting position.     
    def reset_game(self): self.board = np.zeros((3,3))
        
    # View the current state of the board
    def get_board(self): return self.board

In [12]:
# declare an agent and env
env = Environment()
agent1 = Agent(-1)
agent2 = Agent(1)

In [5]:
temp = Environment()
temp.make_a_move(0,-1)
temp.make_a_move(1,1)
temp.make_a_move(8,1)
print(temp.get_board())
print(temp.check_if_there_is_winner())
temp.make_a_move(2,-1)
temp.make_a_move(4,1)
temp.make_a_move(5,-1)
temp.make_a_move(3,1)
temp.make_a_move(7,-1)
temp.make_a_move(6,-1)
print(temp.get_board())
print(temp.check_if_there_is_winner())

[[-1.  1.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  1.]]
None
[[-1.  1. -1.]
 [ 1.  1. -1.]
 [-1. -1.  1.]]
0


In [115]:
temp_dict = dict()
temp_dict[str(temp.get_board())] = 4
temp_dict

{'[[-1.  1. -1.]\n [ 1.  1. -1.]\n [-1. -1.  1.]]': 4}

In [117]:
from collections import defaultdict
def tree(): return defaultdict(tree)
users = tree()
users['harold']['username'] = 'hrldcpr'
users['handler']['username'] = 'matthandlersux'

In [118]:
users

defaultdict(<function __main__.tree()>,
            {'harold': defaultdict(<function __main__.tree()>,
                         {'username': 'hrldcpr'}),
             'handler': defaultdict(<function __main__.tree()>,
                         {'username': 'matthandlersux'})})

In [21]:
import numpy
from random import sample
from numpy import diag


class State(numpy.ndarray):
    symbols = {0: "_", 1: "X", 2: "O"}
    
    def __new__(subtype):
        arr = numpy.zeros((3,3), dtype=numpy.int8)
        arr = arr.view(subtype)
        return arr

    def __hash__(s):
        flat = s.ravel()
        code = 0
        for i in range(9): code += pow(3,i) * flat[i]
        return code

    def won(s, player):
        x = s == player
        return numpy.hstack( (x.all(0), x.all(1), diag(x).all(), diag(x[:,::-1]).all()) ).any()

    def full(s):
        return (s != 0).all()

    def __str__(s):
        out = [""]
        for i in range(3):
            for j in range(3):
                out.append( s.symbols[ s[i,j] ] )
            out.append("\n")
        return str(" ").join(out)
        
class Learner:
    def __init__(s, player):
        s.valuefunc = dict()
        s.laststate_hash = None
        s.alpha = 0.9
        s.player = player
        s.gamehist = []
        s.traced = False
       
    def enum_actions(s, state):
        res = list()
        for i in xrange(3):
            for j in xrange(3):
                if state[i,j] == 0:
                    res.append( (i,j) )
        return res

    def value(s, state, action):
        "Assumption: Game has not been won by other player"
        state[action] = s.player
        hashval = hash(state)
        val = s.valuefunc.get( hashval )
        if val == None:
            if state.won(s.player): val = 1.0
            elif state.full(): val = 0.0
            else: val = 0.1
            s.valuefunc[hashval] = val
        state[action] = 0
        return val
        
    def next_action(s, state):
        valuemap = list()
        for action in s.enum_actions(state):
            val = s.value(state, action)
            valuemap.append( (val, action) )
        valuemap.sort(key=lambda x:x[0], reverse=True)
        maxval = valuemap[0][0]
        valuemap = filter(lambda x: x[0] >= maxval, valuemap)
        
        return sample(valuemap,1)[0]

    def next(s, state):
        if state.won(3-s.player):
            val = -1
        elif state.full():
            val = -0.1
        else:
            (val, action) = s.next_action(state)
            state[action] = s.player

        if state.won(1) or state.won(2) or state.full():
            s.traced = True
            
        #learning step
        if s.laststate_hash != None:
            s.valuefunc[s.laststate_hash] = (1.0-s.alpha) * s.valuefunc[s.laststate_hash] + s.alpha * val
        s.laststate_hash = hash(state)
        s.gamehist.append(s.laststate_hash)
        
    def reset(s):
        s.laststate_hash = None
        s.gamehist = []
        s.traced = False
                        
class Game:
    def __init__(s):
        s.learner = Learner(player=2)
        s.reset()
        s.sp = Selfplay(s.learner)
        
    def reset(s):
        s.state = State()
        s.learner.reset()
        print("** New Game **")
        print(s.state)

    def __call__(s, pi,pj):
        j = pi -1
        i = pj - 1
        if s.state[j,i] == 0:
            s.state[j,i] = 1
            s.learner.next(s.state)
        else:
            print("Invalid move")

        if s.state.full() or s.state.won(1) or s.state.won(2):
            if s.state.won(1):
                print("You WIN")
            elif s.state.won(2):
                print("You LOOSE")
            else:
                print("DRAW Game")
            s.reset()

    def selfplay(s, n=1000):
        for i in range(n):
            s.sp.play()
        s.reset()

    def save(s):
        cPickle.dump(s.learner, open("learn.dat", "w"))

    def load(s):
        s.learner = cPickle.load( open("learn.dat") )
        s.sp = Selfplay(s.learner)
        s.reset()
             
class Selfplay:
    def __init__(s, learner = None):
        if learner == None:
            s.learner = Learner(player=2)
        else:
            s.learner = learner
        s.other = Learner(player=1)
        s.i = 0

    def reset(s):
        s.state = State()
        s.learner.reset()
        s.other.reset()

    def play(s):
        s.reset()
        while True:
            s.other.next(s.state)    
            s.learner.next(s.state)
            if s.state.full() or s.state.won(1) or s.state.won(2):
                s.i += 1
                if s.i % 100 == 0:
                    print(s.state) #hash(s.state)
                    
                if not s.other.traced:
                    s.other.next(s.state)
                break


if __name__ == "__main__":
    print("Tic tac toe - Place game piece using notation g(i,j), i being the row and j being the column")
    print("I.e. g(1,2) places a game piece in the first row, second column.")
    print("Write g.selfplay(1000) to have the learner play against itself a 1000 times.")
    g = Game()

Tic tac toe - Place game piece using notation g(i,j), i being the row and j being the column
I.e. g(1,2) places a game piece in the first row, second column.
Write g.selfplay(1000) to have the learner play against itself a 1000 times.
** New Game **


NameError: name 'xrange' is not defined

In [119]:
! git all-go

[master 4ab60dc] commit
 2 files changed, 58 insertions(+), 76 deletions(-)
Counting objects: 4, done.
Delta compression using up to 4 threads.
Compressing objects: 100% (4/4), done.
Writing objects: 100% (4/4), 858 bytes | 858.00 KiB/s, done.
Total 4 (delta 3), reused 0 (delta 0)
remote: Resolving deltas: 100% (3/3), completed with 3 local objects.[K
To https://github.com/JaeDukSeo/tictactoe.git
   d0ab822..4ab60dc  master -> master
