In [295]:
import numpy as np

N = 3

class Env(object):
    #connect3 environment
    #0 = no play
    #1 = black
    #-1 = white
    def __init__(self, n = N):
        #params
        self.n = n
        self.n2 = n*n
        self.winn = self.n
        #board related
        self.board = np.zeros(self.n2).astype(int)
        self.stones = 0
    def __str__(self):
        #where is so convenient
        tmp = np.where(self.board.reshape(N,N) == 1, 'o', self.board.reshape(N,N))
        tmp = np.where(tmp == '-1', 'x', tmp)
        tmp = np.where(tmp == '0', '.', tmp)
        return str(tmp)
    def reset(self):
        self.board = np.zeros(self.n2).astype(int)
        self.stones = 0

    def player_to_move(self):
        return 1 if (self.stones%2 == 0) else -1

    def is_full(self):
        return self.n2 == self.stones

    def status(self):
        res = self.check()
        if res != 0:
            return res
        else:
            return 0 if self.is_full() else -2 #-2 means not ended

    def valid_mask(self):
        return np.where(self.board == 0, 1, 0);

    def valid_actions(self):
        if(self.stones == self.n2): return None
        ret = list()
        for i in range(self.n2):
            if(self.board[i] == 0):
                ret.append(i)
        return ret

    def take_action(self, player, pos):
        if self.board[pos] != 0:
            return None
        self.board[pos] = player
        status = self.check()
        #print(board)
        self.stones += 1
        return status

    def check(self):
        board2d = self.board.reshape(N,N)
        s0 = np.sum(board2d, axis = 0)
        s1 = np.sum(board2d, axis = 1)
        d0 = np.sum(board2d.diagonal())
        d1 = np.sum(np.flip(board2d, 0).diagonal())
        all_sum = np.r_[s0.reshape(-1), s1.reshape(-1), np.asarray([d0, d1])]
        #print(all_sum)
        if self.winn in all_sum:
            return 1
        elif -self.winn in all_sum:
            return -1;
        else:
            return 0;


In [296]:
N = 3

class Human_Agent(object):
    def __init__(self):
        pass
    def gen_move(self,env):
        print(env)
        while True:
            cmd = input("input move (x, y) as 3*x=y (0-indexed):")
            pos = int(cmd)
            if not pos in range(N*N):
                print("not in range(0, N*N)!!")
                continue
            if not pos in env.valid_actions():
                print("occupied!!")
                continue
            break
        return pos
            

def softmax(x):
    p = np.exp(x)
    p = p/sum(p)
    #assert(np.where(p>=0 && p <=1, 0.0, 1.0) = np.zeroslike(p))
    #assert(np.sum(p) == 1)
    return p

class Random_Agent(object):
    #for connect3
    def __init__(self):
        n = N
        self.actionspace = range(n*n)
    def gen_move(self, env):
        p = np.random.randn(len(self.actionspace))
        p = softmax(p)
        p = np.where(env.valid_mask() == 1, p, 0)
        p = p/sum(p)
        #print(p)
        pos = np.argmax(p)
        #print(pos)
        return pos
    

class Game(object):
    def __init__(self, p1, p2, N = N):
        self.p1 = p1
        self.p2 = p2
        #self.pcur = p1
        self.env = Env(N)
    
    def display_play(self, record):
        (winner, rec) = record
        display_env = Env()
        for c, pos in rec:
            display_env.take_action(c,pos)
            print(display_env)
    
    def reset(self):
        self.env.reset()
        
    def play(self):
        env = self.env
        env.reset()
        
        record = list()
        while env.status() == -2: #not ended
            c = env.player_to_move()
            pcur = self.p1 if c == 1 else self.p2
            pos = pcur.gen_move(env) #if pass env, may involve in copy env, care the cost
            assert(pos in env.valid_actions())
            record.append([c, pos])
            env.take_action(c, pos)
            
        return (env.status(), record)
        
    def playn(self, num_games):
        records = list()
        for i in range(num_games):
            records.append(self.play())
        return records
    
    def play2n_symmetric(self, num_games):
        p1_first_records = self.playn(num_games)
        self.p1, self.p2 = self.p2, self.p1
        p2_first_records = self.playn(num_games)
        return p1_first_records, p2_first_records

In [297]:
ra1 = Random_Agent()
ra2 = Random_Agent()
ha1 = Human_Agent()
ha2 = Human_Agent()
env = Env(3)
env.take_action(-1, 5)
ra1.gen_move(env)

4

In [300]:
g = Game(ha1, ra1)
playout1 = g.play()
print(playout1)
g.display_play(playout1)

[['.' '.' '.']
 ['.' '.' '.']
 ['.' '.' '.']]
input move (x, y) as 3*x=y (0-indexed):4
[['.' '.' '.']
 ['.' 'o' '.']
 ['.' '.' 'x']]
input move (x, y) as 3*x=y (0-indexed):0
[['o' '.' '.']
 ['.' 'o' '.']
 ['.' 'x' 'x']]
input move (x, y) as 3*x=y (0-indexed):6
[['o' '.' 'x']
 ['.' 'o' '.']
 ['o' 'x' 'x']]
input move (x, y) as 3*x=y (0-indexed):5
[['o' '.' 'x']
 ['x' 'o' 'o']
 ['o' 'x' 'x']]
input move (x, y) as 3*x=y (0-indexed):1
(0, [[1, 4], [-1, 8], [1, 0], [-1, 7], [1, 6], [-1, 2], [1, 5], [-1, 3], [1, 1]])
[['.' '.' '.']
 ['.' 'o' '.']
 ['.' '.' '.']]
[['.' '.' '.']
 ['.' 'o' '.']
 ['.' '.' 'x']]
[['o' '.' '.']
 ['.' 'o' '.']
 ['.' '.' 'x']]
[['o' '.' '.']
 ['.' 'o' '.']
 ['.' 'x' 'x']]
[['o' '.' '.']
 ['.' 'o' '.']
 ['o' 'x' 'x']]
[['o' '.' 'x']
 ['.' 'o' '.']
 ['o' 'x' 'x']]
[['o' '.' 'x']
 ['.' 'o' 'o']
 ['o' 'x' 'x']]
[['o' '.' 'x']
 ['x' 'o' 'o']
 ['o' 'x' 'x']]
[['o' 'o' 'x']
 ['x' 'o' 'o']
 ['o' 'x' 'x']]


In [301]:
class A(object):
    def __init__(self):
        self.a = 1
    def set_value(self, a):
        self.a = a

In [302]:
a = A()

In [303]:
b = a
b.set_value(2)
a.a

2