In [7]:
from collections import namedtuple, deque
import random
from sklearn.preprocessing import LabelEncoder
import torch
import torchrl
import torch.optim as optim
import torch.nn.functional as F
import numpy as np


In [8]:
class Cell(object):
    def __init__(self, pos, board_dims):
        self.self_pos = pos
        self.value = '-'
        self.token = ''
        self.board_dims = board_dims #[x,x]
        self.normal_neighbors = {} #key direction, val cell
        self.diag_neighbors = {} #key direction, val cell
        self.directions = ['top','bottom','right','left','top right','bottom left','bottom right','top left']
        
    def get_neighbors(self):
        return [self.normal_neighbors, self.diag_neighbors]
    
    def get_neighbor_poses(self):
        normal_poses = []
        diag_poses = []
        
        for normal in self.normal_neighbors:
            normal_poses.append(self.normal_neighbors[normal].self_pos)
        for diag in self.diag_neighbors:
            diag_poses.append(self.diag_neighbors[diag].self_pos)
        return normal_poses, diag_poses
    
    def add_diag_neighbor(self, neighbors):
        for neighbor in neighbors:
            self.diag_neighbors.update({neighbor:neighbors[neighbor]})
    
    def add_normal_neighbor(self, neighbors):
        for neighbor in neighbors:
            self.normal_neighbors.update({neighbor:neighbors[neighbor]})
    
    def get_pos(self):
        return self.self_pos
    
    def flip(self, value, token):
        self.value = value
        self.token = token
        
    def get_neighbor_cell_in_direction(self, direction):
        if direction in list(self.normal_neighbors.keys()):
            return self.normal_neighbors[direction]
        elif direction in list(self.diag_neighbors.keys()):
            return self.diag_neighbors[direction]
        else:
            return False
        
    def get_empty_neighbors(self):
        #list of cells
        empty_neighbors = []
        for direction in self.directions:
            neighbor_in_direction = self.get_neighbor_cell_in_direction(direction)
            if neighbor_in_direction == False:
                continue
            else:
                if neighbor_in_direction.value == '-':
                    empty_neighbors.append(neighbor_in_direction)
        return empty_neighbors

In [9]:
class Board(object):
    def __init__(self, board_dims=[8,8]):
        self.board_dims = board_dims
        self.board = [[Cell(pos=[row,col],board_dims=self.board_dims) for col in range(board_dims[0])] for row in range(board_dims[1])]
        self.fill_neighbors()
        self.color_code = {'X':'\033[94m', 'O':'\033[92m'}
        
    def fill_neighbors(self):
        #build board with all dependencies, like nieghbors and locations
        for row in self.board:
            for cell in row:
                diags, normals = self.get_neighbor_poses(cell.self_pos)
                normal_neighbors = self.get_neighbor_cells(normals)
                diag_neighbors = self.get_neighbor_cells(diags)
                cell.add_normal_neighbor(normal_neighbors)
                cell.add_diag_neighbor(diag_neighbors)
        
    #returns 2 dicts with them holding the diagonal and normal axis neighbors of the given index (pos) with their corresponding direction as the key value    
    def get_neighbor_poses(self, pos):
        adjacents = [[[0,0],[0,0],[0,0]],
                    [[0,0],[0,0],[0,0]],
                    [[0,0],[0,0],[0,0]]]
        vals = [-1,0,1]
        
        #this enters the adjacent coordinates
        for x, row in enumerate(adjacents):
            for y, col in enumerate(row):
                col[0] = pos[0] + vals[x]
                col[1] = pos[1] + vals[y]
        
        neighbors = {}
        for row in adjacents:
            for col in row:
                if not(col[0] < 0 or col[1] < 0 or col[0] >= self.board_dims[0] or col[1] >= self.board_dims[1] or col == pos):
                    if col[0] != pos[0] and col[1] != pos[1]:
                        if col[0] < pos[0] and col[1] < pos[1]:
                            neighbors.update({'top left':col})
                        elif col[0] > pos[0] and col[1] < pos[1]:
                            neighbors.update({'top right':col})
                        elif col[0] < pos[0] and col[1] > pos[1]:
                            neighbors.update({'bottom left':col})
                        elif col[0] > pos[0] and col[1] > pos[1]:
                            neighbors.update({'bottom right':col})
                    else:
                        if col[0] > pos[0]:
                            neighbors.update({'right':col})
                        elif col[1] > pos[1]:
                            neighbors.update({'top':col})
                        elif col[0] < pos[0]:
                            neighbors.update({'left':col})
                        elif col[1] < pos[1]:
                            neighbors.update({'bottom':col})
        
        #returns dict of diags and normals, using list comprehension
        normal_neighbors = ['right','left','top','bottom']
        diag_neighbors = ['top right','top left','bottom right', 'bottom left']
        #returns diag, normal
        return {location:neighbors[location] for location in neighbors if location in diag_neighbors}, {location:neighbors[location] for location in neighbors if location in normal_neighbors}

    #returns a list of cells objects from the given dict neighbors
    def get_neighbor_cells(self, neighbors):
        cells = {}
        for neighbor in neighbors:
            row = neighbors[neighbor][0]
            col = neighbors[neighbor][1]
            cells.update({neighbor:self.board[row][col]})
        return cells
    
    def flip_cell(self, idx, token, value):
        self.board[idx[0]][idx[1]].flip(value, token)
        
    def get_cell(self, idx):
        return self.board[idx[0]][idx[1]]
    
    def get_board_dims(self):
        return self.board_dims
    
    def get_corners(self):
        corners = [[0,0],[0,self.board_dims[1]-1],[self.board_dims[0]-1,0],[self.board_dims[0]-1,self.board_dims[1]-1]]
        
        corner_cells = []
        for corner in corners:
            corner_cells.append(self.board[corner[0]][corner[1]])
        return corner_cells
    
    def get_corner_adj(self):
        corner_adj_poses = [[0,1],[1,0],[1,1],  #top left
                            [0,6],[1,7],[1,6],  #top right
                            [6,0],[7,1],[6,1],  #bottom left
                            [7,6],[6,7],[6,6]]  #bottom right
        corner_adj = []
        for corner_adj_pos in corner_adj_poses:
            corner_adj.append(self.board[corner_adj_pos[0]][corner_adj_pos[1]])
            
        return corner_adj
    
    def print_board_neighbors(self):
        for row in self.board:
            for cell in row:
                normal_poses, diag_poses = cell.get_neighbor_poses()
                print(f'position : {cell.self_pos} ||| normal neighbors: {normal_poses} ||| diag neighbors: {diag_poses}')
                
    def print_board(self):
        from functools import reduce
        
        count = 0
        frmt = "{:>3}"*len(self.board[0])
        print(f"  {frmt.format(*[str(x) for x in range(self.board_dims[0])])}")
        for row in self.board:
            val_list = [col.token for col in row]
            # color_list = ['\033[1m' if x == '-' else self.color_code[x] for x in val_list]
            # colored = []
            # for x in val_list:
            #     if x == 'X':
            #         colored.append("\033[94m")
            #     elif x == 'O':
            #         colored.append("\033[92m")
            #     else:
            #         colored.append('')
            # cat = reduce(lambda i, j: i +[j[0]+' '+j[1]], zip(colored, val_list), [])
            # print(colored)
            print(f'{count} {frmt.format(*val_list)}')
            # print()
            count+=1

In [83]:
class Othello():
    def __init__(self, board_dims):
        self.board = Board(board_dims=board_dims)
        self.players = {'player 1':{'token': 'X', 'value':1}, 'player 2':{'token':'O', 'value':-1}}
        # self.possible_moves_x = {} #key is the index where x can play, the value are the indices of the cells which have to be flipped when key is chosen, non inclusive of the key
        # self.possible_moves_O = {} # ''
        self.possible_moves = {'player 1': '', 'player 2': ''} #key: player, val is dict of moves and pieces that flip when taken
        self.taken_moves = [] #list of cells which have been taken
        self.__setup__()
        # self.__test_setup__()
        self.game_has_ended = False
        self.update_possible_moves()
        
    
    def __setup__(self):
        #O is white, X is black
        #starting board:
        # 0|1|2|3|4|5|6|7
        #0-|-|-|-|-|-|-|-
        #1-|-|-|-|-|-|-|-
        #2-|-|-|-|-|-|-|-
        #3-|-|-|O|X|-|-|-
        #4-|-|-|X|O|-|-|-
        #5-|-|-|-|-|-|-|-
        #6-|-|-|-|-|-|-|-
        #7-|-|-|-|-|-|-|-
        self.board.flip_cell(idx=[3,3], token=self.players['player 1']['token'], value = self.players['player 1']['value'])
        self.board.flip_cell(idx=[4,4], token=self.players['player 1']['token'], value = self.players['player 1']['value'])
        self.board.flip_cell(idx=[3,4], token=self.players['player 2']['token'], value = self.players['player 2']['value'])
        self.board.flip_cell(idx=[4,3], token=self.players['player 2']['token'], value = self.players['player 2']['value'])
        self.taken_moves.append(self.board.get_cell([3,3]))    
        self.taken_moves.append(self.board.get_cell([4,4]))    
        self.taken_moves.append(self.board.get_cell([4,3]))    
        self.taken_moves.append(self.board.get_cell([3,4]))    

        self.update_possible_moves()
    
    def __test_setup__(self):
        #     0  1  2  3  4  5  6  7
        # 0   -  -  -  -  -  -  -  -`
        # 1   -  -  -  -  -  -  -  -
        # 2   -  - -1 -1 -1  -  -  -
        # 3   -  - -1 -1  1  1  -  -
        # 4   -  -  1  1 -1  1  -  -
        # 5   -  -  -  1 -1 -1  -  -
        # 6   -  -  -  -  -  -  -  -
        # 7   -  -  -  -  -  -  -  -`
        
        player_1_t = self.players['player 1']['token']
        player_1_v = self.players['player 1']['value']
        player_2_t = self.players['player 2']['token']
        player_2_v = self.players['player 2']['value']
        
        self.board.flip_cell(idx=[2,2], token=player_2_t, value = player_2_v)
        self.board.flip_cell(idx=[2,3], token=player_2_t, value = player_2_v)
        self.board.flip_cell(idx=[2,4], token=player_2_t, value = player_2_v)
        self.board.flip_cell(idx=[3,2], token=player_2_t, value = player_2_v)
        self.board.flip_cell(idx=[3,3], token=player_2_t, value = player_2_v)
        self.board.flip_cell(idx=[4,4], token=player_2_t, value = player_2_v)
        self.board.flip_cell(idx=[5,4], token=player_2_t, value = player_2_v)
        self.board.flip_cell(idx=[5,5], token=player_2_t, value = player_2_v)
        self.board.flip_cell(idx=[4,2], token=player_1_t, value = player_1_v)
        self.board.flip_cell(idx=[4,3], token=player_1_t, value = player_1_v)
        self.board.flip_cell(idx=[5,3], token=player_1_t, value = player_1_v)
        self.board.flip_cell(idx=[4,5], token=player_1_t, value = player_1_v)
        self.board.flip_cell(idx=[3,5], token=player_1_t, value = player_1_v)
        self.board.flip_cell(idx=[3,4], token=player_1_t, value = player_1_v)
        self.taken_moves.append(self.board.get_cell([2,2]))    
        self.taken_moves.append(self.board.get_cell([2,3]))    
        self.taken_moves.append(self.board.get_cell([2,4]))    
        self.taken_moves.append(self.board.get_cell([3,2]))   
        self.taken_moves.append(self.board.get_cell([3,3]))    
        self.taken_moves.append(self.board.get_cell([4,4]))    
        self.taken_moves.append(self.board.get_cell([5,4]))    
        self.taken_moves.append(self.board.get_cell([5,5]))   
        self.taken_moves.append(self.board.get_cell([4,2]))    
        self.taken_moves.append(self.board.get_cell([4,3]))
        self.taken_moves.append(self.board.get_cell([5,3]))
        self.taken_moves.append(self.board.get_cell([4,5]))
        self.taken_moves.append(self.board.get_cell([3,5]))
        self.taken_moves.append(self.board.get_cell([3,4]))
    
    def update_possible_moves(self):
        for player in self.players.keys():
            self.possible_moves[player] = self.find_possible_moves(player)
            
    #takes string player parameter
    def find_possible_moves(self, player):
        opp_player_value = self.players['player 1']['value'] if player != 'player 1' else self.players['player 2']['value']
        player_val = self.players[player]['value']
        empty_adj_cells = self.get_empty_adj_cells(opp_player_value)
        
        #this dict will be filled with the empty cell : dict of direction and cells which are flipped in that direction
        #all values in the second dict will be the values which will need to be flipped if that empty cell move is chosen
        moves = {}
        for empty_cell in empty_adj_cells:
            moves.update({empty_cell:self.find_cells_flipped(player_val, empty_cell, opp_player_value)})
        
        out = {}
        for key in moves.keys():
            if moves[key]:
                # print(moves[key])
                out.update({key:moves[key]})
        # print(out)
        return out
    
    def get_empty_adj_cells(self, opp_player_value):
        all_empty_adj_cells = []
        
        for move in self.taken_moves:
            if move.value == opp_player_value:
                all_empty_adj_cells.append(move.get_empty_neighbors())
                
        flat = [empty for empty_adj in all_empty_adj_cells for empty in empty_adj]
        out = []
        [out.append(x) for x in flat if x not in out]
        return out
        
    def find_cells_flipped(self, player_val, cur_cell, opp_player_value):
        directions = ['top','bottom','right','left','top right','bottom left','bottom right','top left']
        flipped_cells = []
        for direction in directions:
            ret_val = self.check_dir(player_val, cur_cell, opp_player_value, direction)
            if ret_val:
                flipped_cells = flipped_cells + ret_val
        return flipped_cells
    
    def check_dir(self, player_val, cur_cell, opp_player_value, direction):
        next_cell = cur_cell.get_neighbor_cell_in_direction(direction)
        path = []
        if isinstance(next_cell, bool) or next_cell == False:
            return []
        
        # print(f'check_dir start cell pos: {cur_cell.self_pos} | start cell val: {cur_cell.value} | next cell: {next_cell.value} | next call loc: {next_cell.self_pos} | opp_player_value: {opp_player_value} | player value: {player_val}')
        while next_cell.value == opp_player_value:
            # print(f'appended {next_cell.value} {next_cell.self_pos}')
            path.append(next_cell)
            next_cell = next_cell.get_neighbor_cell_in_direction(direction)
            if isinstance(next_cell, bool) or next_cell == False:
                return []
        # if next_cell.value != '-':
        #     print(f'out of while next_cell: {next_cell.value} | player val: {player_val} | opp: {opp_player_value}')
            
        if next_cell.value == player_val:
            # print(f'returning: {path}')
            return path
        else:
            return []
        
    def flip_cells(self, move_taken, player):
        
        player_val = self.players[player]['value']
        player_token = self.players[player]['token']
        
        # self.print_possible_moves(player)
        # print(self.possible_moves[player])
        # print(move_taken)
        flip_idx_list = self.possible_moves[player][self.board.get_cell(move_taken)]
        # print(flip_idx_list)
        self.board.get_cell(move_taken).flip(player_val, player_token)
        self.taken_moves.append(self.board.get_cell(move_taken))
        for cell in flip_idx_list:
            self.board.get_cell(cell.self_pos).flip(player_val, player_token)
        
    def take_turn(self, num, player):
        idx = list(self.possible_moves[player].keys())[int(num)-1]
        # print(num)
        # print(idx)
        self.flip_cells(idx.self_pos, player)
        self.update_possible_moves()
        opp_player = list(self.players.keys())
        opp_player.remove(player)
        opp_player = opp_player[0]
        self.check_end(opp_player)
        #at end of check true or false, and then append if true else dont
        
    def token_count(self, player):
        count = 0
        for cell in self.taken_moves:
            if cell.token == self.players[player]['token']:
                count += 1
                
        return count
    
    def print_possible_moves(self, player):
        moves = self.possible_moves[player]
        count = 1
        for move in moves:
            print(f'move {count} | cell idx: {move.self_pos} | flipped cells: {moves[move]}')
            count += 1
        
    def display(self, player):
        self.board.print_board()
        self.print_possible_moves(player)
        
    #returns the action state and the board for the observation state for the DQN network
    def get_state(self):
        return self.possible_moves, self.board
    
    def get_winner(self):
        counts = {'player 1':0, 'player 2':0}
        for player in self.players.keys():
            counts[player] = self.token_count(player)
        if counts['player 1'] > counts['player 2']:
            return 'player 1'
        elif counts['player 2'] > counts ['player 1']:
            return 'player 2'
        else:
            return 'tie'
                
    def check_end(self, player):
        players = list(self.players.keys())
        players.remove(player)
        max_count = self.board.get_board_dims()[0] * self.board.get_board_dims()[1]
        # print(f'max count: {max_count} | len pos moves: {len(self.possible_moves[player])} | len taken moves: {len(self.taken_moves)} | player: {player}')
        if len(self.possible_moves[player]) == 0 or len(self.taken_moves) == max_count:
            self.display(players[0])
            winner = self.get_winner()
            if winner != 'tie':
                self.game_end(winner)
            else:
                self.game_end_tie(winner)
            
    def game_end(self, player):
        self.game_has_ended = True
        print(f'game had ended, player: {player} has won')
        
    def game_end_tie(self, tie):
        self.game_has_ended = True
        print(f'game had ended, it is a {tie}!')

In [11]:
import torch.nn as nn 

class DQN(nn.Module):
    def __init__(self, n_observations, n_actions):
        super(DQN, self).__init__()
        
        #input dimension must be n_observations, and output dimensions must be n_actions
        #n_observations are the embedded board with their correspoding token, value
        #n_observations may be changed and augmented with the data values of the n_actions, with the coordinates of the values which are flipped upon taking each action embedded in to it
        #the above augmentation may be applicable synchronously or asynchronously with the n_actions
        #ex: n_observations, each position will contain information about their token value 
        #ex: n_actions will contain information about their coordinate and the cells which will be flipped when they chosen, their "re-action"
        
        size = 128
        self.layer1 = nn.Linear(n_observations, size)
        self.layer2 = nn.Linear(size, size)
        self.layer3 = nn.Linear(size, n_actions)
        self.act1 = nn.ReLU()
        
    def forward(self, x):
        x = self.act1(self.layer1(x))
        x = self.act1(self.layer2(x))
        x = self.layer3(x)
        return x

In [192]:
#R_Net
#this will return a singular value given the entire board
#this will hopefully approximate and black box the reward calculation for each state of the board
# Qnew(st,at) = Qold(st, at) + a*{rt + gamma*maxaQ(st+1,a) - Qold(st,at)}
#hopefully this nn will replace the Qold and Q values so that they are not produced via equation/algorithm, but within a nn blackbox which learns
#the Yhat values will be either the exp_target_network_vals or another value based on naive algorithmic calculations, maybe via number of pieces or what?
class R_Net(nn.Module):
    def __init__(self, n_observations):
        super(R_Net,self).__init__()
        #use lstm or transformer encoder model, small scale
        #output will be multiplied by the number of current pieces for that player to increase the overall value when optimizing, taking a step, and backwards
        #with a very large scalar value, like 1000 being multiplied to the output scalar value to indicate that it is a winning or losing state at the end
        # size = 128
        # self.layer1 = nn.Linear(n_observations, size)
        # self.layer2 = nn.Linear(size, 1)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model =n_observations, nhead=2)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=2)
        self.act1 = nn.Softmax()
        
    def forward(self, x):
        x = self.transformer_encoder(x)
        x = self.act1(x)
        return x

In [13]:
import torch.nn as nn

#64 is total number of different hot encodes, aka number of different cells in this case
#5 is arbitrary but in this case is the number of values which make up the total information of one cell
#in this case one cell is made up of: idx + coord + value + token + re-action
#token and re-action and maybe coord will need their own embedding as well, so it will be a embedding of embedding?

embed = nn.Embedding(10, 1)
ins = torch.LongTensor([[1,2,3],[4,5,6],[7,8,9]])
embedded = embed(ins)
embedded.size()
print(embedded)

tensor([[[ 0.7546],
         [-0.9115],
         [ 0.0993]],

        [[-0.6786],
         [ 0.3554],
         [-1.0350]],

        [[ 0.9363],
         [ 0.7972],
         [ 1.4983]]], grad_fn=<EmbeddingBackward0>)


In [14]:
#augment_state takes the possible_actions[player] which is a dict of moves for that player
#observation_state_in takes the entire board with their cells
def augment_state(action_state_in, observation_state_in, device):
    import torch.nn as nn
    action_state_out = []
    observation_state_out = []
    # print(len(observation_state_in.board)*len(observation_state_in.board[0])+1)
    embed_pos = nn.Embedding(len(observation_state_in.board)*len(observation_state_in.board[0])*8, 1, device=device)
    # embed_pos = nn.EmbeddingBag(len(observation_state_in.board)*len(observation_state_in.board[0])+1, 1, mode='sum')
    # embed_token = nn.Embedding(3, 1)
    for row in observation_state_in.board:
        for cell in row:
            cell_info = []
            pos = int(str(cell.self_pos[0]) + str(cell.self_pos[1]))
            cell_info.append(embed_pos(torch.tensor(pos, device=device)))
            # cell_info.append(embed_token(torch.tensor(cell.token)))
            if cell.value == '-':
                cell_info.append(torch.tensor(0, dtype=torch.float32, device=device))
            else:
                cell_info.append(torch.tensor(cell.value, dtype= torch.float32, device=device))
            concat_flipped_cells = []
            if cell in action_state_in.keys():
                for action in action_state_in[cell.self_pos]:
                    pos_2 = int(str(action.action.self_pos[0])+str(action.self_pos[1]))
                    concat_flipped_cells.append(embed_pos(torch.tensor(pos_2, device=device)))
                    # concat_flipped_cells.append(embed_token(torch.tensor(action.token)))
                    if action.value == '-':
                        concat_flipped_cells.append(torch.tensor(0, dtype=torch.float32, device=device))
                    else:
                        concat_flipped_cells.append(torch.tensor(int(action.value), dtype=torch.float32, device=device))
            observation_state_out.append(cell_info)
            if concat_flipped_cells:
                print(concat_flipped_cells)
                string_list = [str(x) for x in concat_flipped_cells]
                print(string_list)
                cell_info.append(torch.tensor(int(','.join(string_list), dtype = torch.float32, device=device)))
            else:
                cell_info.append(torch.tensor(0, dtype=torch.float32, device=device))
            action_state_out.append(cell_info)
    
    return action_state_out, observation_state_out

In [15]:
#this is a naive reward heuristic that the nueral network will use to help make decisions/calculate reward
#This iteration of the calculate reward will only reward based on the corners, corner adjacents, and win/loss
#this is called after the turn is taken therefore if there is a winner, it will be the player that the reward is being calculated for's turn
def calculate_reward(player_val, opp_player_val, game=Othello):
    corner_reward = 10
    corner_adj_reward = -5
    win_reward = 100
    loss_reward = -100
    tie_reward = 50
    
    reward = 1
    board = game.board
    corner_cells = board.get_corners()

    for corner in corner_cells:
        if corner.value == player_val:
            reward += corner_reward
        elif corner.value == opp_player_val:
            reward -= corner_reward
    
    corner_adj_cells = board.get_corner_adj()
    
    for corner_adj in corner_adj_cells:
        if corner_adj.value == player_val:
            reward += corner_adj_reward
        elif corner_adj.value == player_val:
            reward -= corner_adj_reward
            
    if game.game_has_ended == True:
        winner = game.get_winner()
        if winner == 'tie':
            reward += tie_reward
        elif game.players[winner]['value'] == player_val:
            reward += win_reward
        elif game.players[winner]['value'] == opp_player_val:
            reward -= loss_reward
    else:
        players_moves = 0
        for move in game.taken_moves:
            if move.value == player_val:
                players_moves += 1
        ratio = players_moves / len(game.taken_moves)
        reward = reward * ratio
        
    return reward

In [86]:
def random_player(game, player):
    rand_idx = random.randint(0, len(game.possible_moves[player]))
    # print(f'the range was from 0 to {len(game.possible_moves[player])}, the value that was chosen was: {rand_idx}')
    return rand_idx

In [16]:
p = torch.Tensor([[1,2,3],[4,5,6]])
torch.sum(p,dim=1)

tensor([ 6., 15.])

In [190]:
import random
from sklearn.preprocessing import LabelEncoder
import torch
import torchrl
import torch.optim as optim
import torch.nn.functional as F
def train(network_dict = {}, rnet_dict = {}):
    
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    print(device)
    torch.set_grad_enabled(True)
    #black always moves first
    #in this case X goes first
    #player 2 first
    
    episodes = 1000
    
    er = 1e-5
    board_dims = [8,8]
    # batch_size = 64
    Gamma = 0.99 #this may not be necessary
    TAU = 0.005
    lr = 1e-4
    n_observations = board_dims[0]*board_dims[1]
    n_actions = board_dims[0]*board_dims[1]#unknown, calculated each time
    
    network_pnet = DQN(n_observations=n_observations, n_actions=n_actions).to(device)
    network_tnet = DQN(n_observations=n_observations, n_actions=n_actions).to(device)
    r_net = R_Net(n_observations=n_observations).to(device)
    
    if len(network_dict) != 0 and len(rnet_dict)!= 0:
        network_pnet.load_state_dict(network_dict)
        network_tnet.load_state_dict(network_dict)
        r_net.load_state_dict(rnet_dict)
    
    r_optimizer = optim.SGD(r_net.parameters(), lr=lr)
    optimizer = optim.SGD(network_pnet.parameters(), lr=lr)
    
    soft = nn.Softmax()
    othello = Othello(board_dims=board_dims)
    player = 'player '
    count = 0
    
    score_board = {'AI':0, 'Random':0, 'tie':0}
    
    for i in range(episodes):
        othello.__init__(board_dims=board_dims)
        
        rand_value = random.randint(0,10)
        ai_side = 'player 1' if rand_value%2 == 0 else 'player 2'
        rand_side = 'player 2' if rand_value%2 == 0 else 'player 1'
        
        #update p and t net weight by consolidating each of them
        if i % 10 == 0:
            network_tnet.load_state_dict(network_pnet.state_dict())
        print(f"this is episode {i}")
        while othello.game_has_ended == False:
            cur_player = player + str((count%2)+1)
            # print(f"player {cur_player}'s turn")
            if cur_player == ai_side:
                # print('ai is moving')
                #action is a dict of moves and flipped pieces if that move is taken
                #observation_state is the entire board comprised of cell objects
                action_state, observation_state = othello.get_state()
                action_state, observation_state = augment_state(action_state_in=action_state, observation_state_in=observation_state, device=device)
                action_state = torch.tensor(action_state, dtype=torch.float32, device=device)
                observation_state = torch.tensor(observation_state, dtype=torch.float32, device=device, requires_grad=True)
                
                #action_state should be tensor of size (4, 64)
                #pnet_vals should be (4,64) and should be matmul to be (1,64) and masked, then argmax of that is the space chosen
                #maybe not masked if the network doesnt choose it, this will be tested later
                pnet_vals = network_pnet(action_state.T)
                pnet_vals = torch.sum(pnet_vals, dim=0)
                tnet_vals = network_tnet(action_state.T)
                # print(tnet_vals.size())
                tnet_vals = torch.sum(tnet_vals, dim=0)
                # print(tnet_vals.size())
                
                # idx_list = [(len(othello.board_dims[0])*x) + y for _, (x, y) in enumerate(zip(othello.possible_moves[cur_player].keys()[0],othello.possible_moves[cur_player].keys()[1]))]
                #DICT OF TAKEN MOVE IDX TO POS
                idx_to_pos = {}
                # print(pnet_vals)
                # othello.print_possible_moves(cur_player)
                for idx, move in enumerate(othello.possible_moves[cur_player].keys()):
                    # print(f'pos: {othello.board.board_dims[0]*move.self_pos[0] + move.self_pos[1] - 1} | idx {idx}')
                    idx_to_pos.update({othello.board.board_dims[0]*move.self_pos[0] + move.self_pos[1]:idx})
                
                pnet_vals = soft(pnet_vals)
                #MASK OUT TAKEN MOVES
                mult = torch.tensor([2 if x in idx_to_pos.keys() else 0 for x in range(len(pnet_vals))], dtype= torch.float32, device=device)
                # print(mult)
                # print('before')
                # print(pnet_vals)
                pnet_vals = torch.mul(pnet_vals, mult)
                # print('after')
                # print(pnet_vals)
                # print(f"idx_to_pos: {idx_to_pos}")
                # pnet_vals = torch.abs(pnet_vals)
                decision_idx = torch.argmax(pnet_vals)
                # print(idx_to_pos)
                # if decision_idx == 1:
                #     print(f"pnet_vals: {pnet_vals}")
                #     print(f'decision_idx: {decision_idx}')
                # othello.board.print_board()
                    
                dec_pos = idx_to_pos[decision_idx.item()]
                # print(dec_pos)
                othello.take_turn(dec_pos, cur_player)
                action_state, observation_state = othello.get_state()
                action_state, observation_state = augment_state(action_state_in=action_state, observation_state_in=observation_state, device=device)
                observation_state = torch.tensor(observation_state, dtype=torch.float32, device=device, requires_grad=True)
                #r_net_val = r_net(observation_state.T)
                #reward = reward_calc + r_net + e
                r_net_val = r_net(observation_state.T)
                non_aug_reward = torch.tensor(calculate_reward(player_val=othello.players[cur_player], opp_player_val=othello.players[player + str((count%2)+1)], game=othello), dtype=torch.float32, device=device, requires_grad=True)
                reward = torch.tensor(non_aug_reward + r_net_val + er, dtype=torch.float32, device = device, requires_grad=True)
                #calculate the percentage of total taken moves are the players tokens
                # aug_reward = reward * (othello.token_count(cur_player) / len(othello.taken_moves))
                
                exp_vals = tnet_vals*Gamma
                exp_vals = torch.add(exp_vals, reward)

                r_criterion = nn.SmoothL1Loss()
                r_loss = r_criterion(reward, non_aug_reward)
                # r_loss.requires_grad = True
                r_optimizer.zero_grad()
                r_loss.backward()
                r_optimizer.step()
                            
                criterion = nn.SmoothL1Loss()
                loss = criterion(pnet_vals, exp_vals)
                # loss.requires_grad = True
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                network_pnet_dict = network_pnet.state_dict()
                network_tnet_dict = network_tnet.state_dict()
                for key in network_pnet_dict:
                    network_tnet_dict[key] = network_pnet_dict[key]*TAU + network_tnet_dict[key]*(1-TAU)
                
            else:
                # print('random is moving')
                rand_idx = random_player(othello, cur_player)
                othello.take_turn(rand_idx, cur_player)
                
            count+=1
        winner = othello.get_winner()
        print(winner)
        if winner == ai_side:
            score_board['AI']+=1
        elif winner == rand_side:
            score_board['Random']+=1
        else:
            score_board['tie']+=1
        temp_board = []
        for score in score_board.keys():
            temp_board.append(str(score+str(score_board[score])+' '))
        frmt = "{:>3}"*len(temp_board)
        print('------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
        print(frmt.format(*temp_board))
    final_dict = network_pnet.state_dict()
    rnet_dict = r_net.state_dict()
    return final_dict, rnet_dict

In [186]:
network_dict, rnet_dict = {}, {}

In [193]:
network_dict, rnet_dict = train(network_dict, rnet_dict)

cuda:0
this is episode 0


  return self._call_impl(*args, **kwargs)
  reward = torch.tensor(non_aug_reward + r_net_val + er, dtype=torch.float32, device = device, requires_grad=True)
  return F.smooth_l1_loss(input, target, reduction=self.reduction, beta=self.beta)
  return F.smooth_l1_loss(input, target, reduction=self.reduction, beta=self.beta)


    0  1  2  3  4  5  6  7
0   X  X  X  O  O  O  O  O
1   O  X  O  O  O  O  O  O
2   O  O  X  O  X  O  X  O
3   O  O  O  O  O  O  X  O
4   O  O  X  O  X  X  X  O
5   O  O  O  X  O  X  O  O
6   O  O  O  X  O  O  O  O
7   O  O  O  O  O  O  O  O
game had ended, player: player 2 has won
player 2
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
AI0 Random1 tie0 
this is episode 1
    0  1  2  3  4  5  6  7
0   X  X  X  X  X  X  X  X
1   O  X  O  X  X  O  X  X
2   O  O  X  X  O  O  X   
3   O  O  X  O  O  O  X   
4   O  X  O  O  X  X  X  X
5   O  O  X  X  X  X      
6   O  O  O  O  O  O  O  O
7   O  O  O  O  O  O  O  O
move 1 | cell idx: [5, 6] | flipped cells: [<__main__.Cell object at 0x000001CEEFEFD590>, <__main__.Cell object at 0x000001CEEFEFDA90>, <__main__.Cell object at 0x000001

KeyboardInterrupt: 

In [109]:
t = torch.tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.2984, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.4084, 0.0000, 0.0000, 0.0000, 0.0000, 0.6003, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.6396, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000])
print(torch.argmax(t))
print(t[torch.argmax(t)])

tensor(42)
tensor(0.6396)


In [153]:
ins = torch.tensor([ 0.0000,  0.0000,  0.0000, -0.0000, -0.0000, -0.0000, -0.0000,  0.0000,
         0.0000, -0.0000, -0.0000,  0.0000, -0.0000,  0.0000,  0.0000,  0.0000,
        -0.0000,  0.0000, -0.0000,  0.5282,  0.0000,  0.0000, -0.0000, -0.0000,
         0.0000, -0.0000, -0.0000,  0.0000,  0.7017, -0.0000,  0.0000, -0.0000,
        -0.0000, -0.0626,  0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
         0.0000, -0.0000, -0.0468,  0.0000,  0.0000, -0.0000, -0.0000, -0.0000,
         0.0000, -0.0000,  0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
         0.0000,  0.0000,  0.0000, -0.0000,  0.0000,  0.0000, -0.0000, -0.0000], dtype=torch.float32)
soft = nn.Softmax(dim=0)
out = soft(ins)
print(out)
print(torch.argmax(out))

tensor([0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152,
        0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152,
        0.0152, 0.0258, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152,
        0.0152, 0.0307, 0.0152, 0.0152, 0.0152, 0.0152, 0.0143, 0.0152, 0.0152,
        0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0145, 0.0152, 0.0152,
        0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152,
        0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152, 0.0152,
        0.0152])
tensor(28)


In [166]:
mult = torch.tensor([0 for x in range(len(ins))], dtype=torch.float32)
print(torch.mul(ins,mult))

tensor([0., 0., 0., -0., -0., -0., -0., 0., 0., -0., -0., 0., -0., 0., 0., 0., -0., 0., -0., 0., 0., 0., -0., -0.,
        0., -0., -0., 0., 0., -0., 0., -0., -0., -0., 0., -0., -0., -0., -0., -0., 0., -0., -0., 0., 0., -0., -0., -0.,
        0., -0., 0., -0., -0., -0., -0., -0., 0., 0., 0., -0., 0., 0., -0., -0.])


In [157]:
ins.reshape(1,64)

tensor([[ 0.0000,  0.0000,  0.0000, -0.0000, -0.0000, -0.0000, -0.0000,  0.0000,
          0.0000, -0.0000, -0.0000,  0.0000, -0.0000,  0.0000,  0.0000,  0.0000,
         -0.0000,  0.0000, -0.0000,  0.5282,  0.0000,  0.0000, -0.0000, -0.0000,
          0.0000, -0.0000, -0.0000,  0.0000,  0.7017, -0.0000,  0.0000, -0.0000,
         -0.0000, -0.0626,  0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
          0.0000, -0.0000, -0.0468,  0.0000,  0.0000, -0.0000, -0.0000, -0.0000,
          0.0000, -0.0000,  0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
          0.0000,  0.0000,  0.0000, -0.0000,  0.0000,  0.0000, -0.0000, -0.0000]])

In [None]:
new_board = Othello([8,8])
players = ['player 1', 'player 2']
x = 0
while new_board.game_has_ended == False:
    print(f"player {players[x%2]}'s turn")
    new_board.display(players[x%2])
    print(new_board.possible_moves['player 1'])
    uin = input('Please choose a move from the possible moves, please enter position/idx')
    # uin = [int(uin[0]), int(uin[2])]
    new_board.take_turn(uin, players[x%2])
    print(calculate_reward(player_val=new_board.players[players[x%2]]['value'], opp_player_val=new_board.players[players[(x+1)%2]]['value'], game=new_board))
    x+=1
# new_board.display('player 1')
# new_board.take_turn([3,5], 'player 1')
# new_board.display('player 2')

player player 1's turn
    0  1  2  3  4  5  6  7
0                         
1                         
2                         
3            X  O         
4            O  X         
5                         
6                         
7                         
move 1 | cell idx: [4, 2] | flipped cells: [<__main__.Cell object at 0x000001465B9C9150>]
move 2 | cell idx: [5, 3] | flipped cells: [<__main__.Cell object at 0x000001465B9C9150>]
move 3 | cell idx: [3, 5] | flipped cells: [<__main__.Cell object at 0x000001460454C3D0>]
move 4 | cell idx: [2, 4] | flipped cells: [<__main__.Cell object at 0x000001460454C3D0>]
{<__main__.Cell object at 0x000001465B9CB150>: [<__main__.Cell object at 0x000001465B9C9150>], <__main__.Cell object at 0x000001460215CB50>: [<__main__.Cell object at 0x000001465B9C9150>], <__main__.Cell object at 0x000001460454F310>: [<__main__.Cell object at 0x000001460454C3D0>], <__main__.Cell object at 0x000001460454C990>: [<__main__.Cell object at 0x000001460454C3D0>

ValueError: invalid literal for int() with base 10: ''

In [None]:
new_board = Othello([8,8])
action, obs = new_board.get_state()
print(action)

{'player 1': {<__main__.Cell object at 0x0000014648CD8D90>: [<__main__.Cell object at 0x0000014648CF7310>], <__main__.Cell object at 0x0000014648CF4350>: [<__main__.Cell object at 0x0000014648CF7310>], <__main__.Cell object at 0x0000014648CDA7D0>: [<__main__.Cell object at 0x0000014648CDA910>], <__main__.Cell object at 0x0000014648CE5B10>: [<__main__.Cell object at 0x0000014648CDA910>]}, 'player 2': {<__main__.Cell object at 0x0000014648CDB2D0>: [<__main__.Cell object at 0x0000014648CD8D10>], <__main__.Cell object at 0x0000014648CE68D0>: [<__main__.Cell object at 0x0000014648CD8D10>], <__main__.Cell object at 0x0000014648CF4490>: [<__main__.Cell object at 0x0000014648CF6F90>], <__main__.Cell object at 0x0000014648CF5910>: [<__main__.Cell object at 0x0000014648CF6F90>]}}


In [None]:
colors = ['\033[95m'
         ,'\033[94m'
         ,'\033[96m'
         ,'\033[92m'
         ,'\033[93m'
         ,'\033[91m'
         ,'\033[0m'
         ,'\033[1m'
         ,'\033[4m']


words = ['w','t','w',',','w,','da','dadsa,','w,']
frmt = "{:>3}"*len(words)
for _, (color, word) in enumerate(zip(colors,words)):
    print(f'{color+word}')

[95mw
[94mt
[96mw
[92m,
[93mw,
[91mda
[0mdadsa,
[1mw,


In [None]:
h = {'k':{'x':1}, 'b':{}}
print(f'k: {len(h["k"])} | b:{len(h["b"])}')

k: 1 | b:0


In [None]:
h = {'d':[0,1],'h':[1,5],'x':[5,10]}
k = ['d','h']
for value, key in enumerate(h):
    print(f"key: {key} | value: {value}")

key: d | value: 0
key: h | value: 1
key: x | value: 2


In [None]:
board_dims = [8,8]
cell_board = [[Cell(pos=[x,y],board_dims=board_dims) for x in range(board_dims[0])] for y in range(board_dims[1])]

In [None]:
def get_neighbor_poses(pos, board_dims):
    adjacents = [[[0,0],[0,0],[0,0]],
                 [[0,0],[0,0],[0,0]],
                 [[0,0],[0,0],[0,0]]]
    vals = [-1,0,1]
    
    #this enters the adjacent coordinates
    for x, row in enumerate(adjacents):
        for y, col in enumerate(row):
            col[0] = pos[0] + vals[x]
            col[1] = pos[1] + vals[y]
    
    neighbors = {}
    for row in adjacents:
        for col in row:
            if not(col[0] < 0 or col[1] < 0 or col[0] >= board_dims[0] or col[1] >= board_dims[1] or col == pos):
                if col[0] != pos[0] and col[1] != pos[1]:
                    # print(f'col:{col} | pos:{pos}')
                    # print(col[0] != pos[0] and col[1] != pos[1])
                    # diag_neighbors.append(col)
                    if col[0] < pos[0] and col[1] < pos[1]:
                        neighbors.update({'top left':col})
                    elif col[0] > pos[0] and col[1] < pos[1]:
                        neighbors.update({'top right':col})
                    elif col[0] < pos[0] and col[1] > pos[1]:
                        neighbors.update({'bottom left':col})
                    elif col[0] > pos[0] and col[1] > pos[1]:
                        neighbors.update({'bottom right':col})
                else:
                    if col[0] > pos[0]:
                        neighbors.update({'right':col})
                    elif col[1] > pos[1]:
                        neighbors.update({'top':col})
                    elif col[0] < pos[0]:
                        neighbors.update({'left':col})
                    elif col[1] < pos[1]:
                        neighbors.update({'bottom':col})
    
    #returns dict of diags and normals, using list comprehension
    normal_neighbors = ['right','left','top','bottom']
    diag_neighbors = ['top right','top left','bottom right', 'bottom left']
    #returns diag, normal
    return {location:neighbors[location] for location in neighbors if location in diag_neighbors}, {location:neighbors[location] for location in neighbors if location in normal_neighbors}

def get_neighbor_cells(neighbors, cell_board):
    cells = []
    for neighbor in neighbors:
        # print(neighbor)
        row = neighbors[neighbor][0]
        col = neighbors[neighbor][1]
        cells.append(cell_board[row][col])
    return cells



In [None]:
#build board with all dependencies, like nieghbors and locations
# normal_n = ['right', 'top','left','bottom']
for row in cell_board:
    for cell in row:
        diags, normals = get_neighbor_poses(cell.self_pos, board_dims=board_dims)
        print(f'normal: {normals} | diag: {diags}')
        normal_neighbors = get_neighbor_cells(normals, cell_board=cell_board)
        diag_neighbors = get_neighbor_cells(diags, cell_board=cell_board)
        print(f'normal cells: {[c.self_pos for c in normal_neighbors]} | diag cells: {[c.self_pos for c in diag_neighbors]}')
        # break
        cell.add_normal_neighbor(normal_neighbors)
        cell.add_diag_neighbor(diag_neighbors)

normal: {'top': [0, 1], 'right': [1, 0]} | diag: {'bottom right': [1, 1]}
normal cells: [[1, 0], [0, 1]] | diag cells: [[1, 1]]
normal: {'left': [0, 0], 'top': [1, 1], 'right': [2, 0]} | diag: {'bottom left': [0, 1], 'bottom right': [2, 1]}
normal cells: [[0, 0], [1, 1], [0, 2]] | diag cells: [[1, 0], [1, 2]]
normal: {'left': [1, 0], 'top': [2, 1], 'right': [3, 0]} | diag: {'bottom left': [1, 1], 'bottom right': [3, 1]}
normal cells: [[0, 1], [1, 2], [0, 3]] | diag cells: [[1, 1], [1, 3]]
normal: {'left': [2, 0], 'top': [3, 1], 'right': [4, 0]} | diag: {'bottom left': [2, 1], 'bottom right': [4, 1]}
normal cells: [[0, 2], [1, 3], [0, 4]] | diag cells: [[1, 2], [1, 4]]
normal: {'left': [3, 0], 'top': [4, 1], 'right': [5, 0]} | diag: {'bottom left': [3, 1], 'bottom right': [5, 1]}
normal cells: [[0, 3], [1, 4], [0, 5]] | diag cells: [[1, 3], [1, 5]]
normal: {'left': [4, 0], 'top': [5, 1], 'right': [6, 0]} | diag: {'bottom left': [4, 1], 'bottom right': [6, 1]}
normal cells: [[0, 4], [1, 

In [None]:
for row in cell_board:
    for cell in row:
        normal_poses, diag_poses = cell.get_neighbor_poses()
        print(f'position : {cell.self_pos} ||| normal neighbors: {normal_poses} ||| diag neighbors: {diag_poses}')

position : [0, 0] ||| normal neighbors: [[1, 0], [0, 1]] ||| diag neighbors: [[1, 1]]
position : [1, 0] ||| normal neighbors: [[0, 0], [1, 1], [0, 2]] ||| diag neighbors: [[1, 0], [1, 2]]
position : [2, 0] ||| normal neighbors: [[0, 1], [1, 2], [0, 3]] ||| diag neighbors: [[1, 1], [1, 3]]
position : [3, 0] ||| normal neighbors: [[0, 2], [1, 3], [0, 4]] ||| diag neighbors: [[1, 2], [1, 4]]
position : [4, 0] ||| normal neighbors: [[0, 3], [1, 4], [0, 5]] ||| diag neighbors: [[1, 3], [1, 5]]
position : [5, 0] ||| normal neighbors: [[0, 4], [1, 5], [0, 6]] ||| diag neighbors: [[1, 4], [1, 6]]
position : [6, 0] ||| normal neighbors: [[0, 5], [1, 6], [0, 7]] ||| diag neighbors: [[1, 5], [1, 7]]
position : [7, 0] ||| normal neighbors: [[0, 6], [1, 7]] ||| diag neighbors: [[1, 6]]
position : [0, 1] ||| normal neighbors: [[0, 0], [2, 0], [1, 1]] ||| diag neighbors: [[0, 1], [2, 1]]
position : [1, 1] ||| normal neighbors: [[1, 0], [0, 1], [2, 1], [1, 2]] ||| diag neighbors: [[0, 0], [2, 0], [0, 

In [None]:
def find_flank(cell_board, pos, player):
    

In [None]:
class Othello_old():
    def __init__(self):
        self.board = [['-' for _ in range(8)] for _ in range(8)]
        self.players = {'p1':'O', 'p2':'X'}
        #o is white, x is black
        #starting board:
        # 0|1|2|3|4|5|6|7
        #0-|-|-|-|-|-|-|-
        #1-|-|-|-|-|-|-|-
        #2-|-|-|-|-|-|-|-
        #3-|-|-|O|X|-|-|-
        #4-|-|-|X|O|-|-|-
        #5-|-|-|-|-|-|-|-
        #6-|-|-|-|-|-|-|-
        #7-|-|-|-|-|-|-|-
        self.board[3][3] = self.players['p1']
        self.board[4][4] = self.players['p1']
        self.board[3][4] = self.players['p2']
        self.board[4][3] = self.players['p2']
        
        self.moves_made = []
        self.open_vals = [[str(row)+str(col) for col in range(len(self.board))] for row in range(len(self.board))]
        #list of start_start_start_start_start_poses that are on the edges of the pieces placed, a Set of all valid spaces
        #updates each time a piece is placed
        self.edge_spaces = []
        
        self.score = {list(self.players.keys())[0]:0, list(self.players.keys())[1]:0}
        
        self.terminated = False
        #struct adj_to, poses
        #{x:{adj_to:[adjacent open poses], ...}, o:{adj_to:[adjacent open poses]}}
        self.adjacent_positions = {self.players['p1']:{[3,3]:[[3,2],[2,3],[2,2]], [4,4]:[[4,5],[5,4],[5,5]]}, self.players['p2']:{[3,4]:[[4,2],[5,3],[5,2]],[4,3]:[[2,4],[3,5],[2,5]]}}
    
    def __print_board__(self):
        for row in self.board:
            print(row)
    
    #start_pos is a list [row, col], player is 'p1' or 'p2'
    def take_turn(self, pos, player):
        if self.check_valid(self.board, pos):
            self.place_piece(pos, player)
            self.turn_pieces(pos)
            if self.check_end(player):
                self.end()
    
    #have list of valid positions for each player
    #then have list of vals that will be flipped for each valid position
    #update both lists each time player makes move
    #use the list to see if valid input and if the player can make move
    
    def get_valid_positions(self, player):
        player_list = list(self.player.keys())
        player_list.remove(player)
        opposing_player = player_list[0]
        opposing_player_mark = self.players[opposing_player]
        
        valid_positions = []
        
        for key, value in self.adjacent_positions[opposing_player].items():
            for val in value:
                if self.is_flank(val, key)
        
        return #list of valid positions ie pos (x,y)
    
    #returns bool and a list of values changed if it is a flank
    def is_flank(self, pos, adj_to):
        case
            
    def find_diag_dir(pos, adj_to):
        if pos[0] > adj_to[0]:
            if pos[1] > adj_to[1]:
                return 'bottom right'
            elif pos[1] < adj_to[1]:
                return 'top right'
        elif pos[0] < adj_to[0]:
            if pos[1] > adj_to[1]:
                return 'bottom left'
            elif pos[1] < adj_to[1]:
                return 'top left'
        else:
            return 'not diag'
        
    
    def check_diag(self, start_pos, adj_pos, player, opposing_player):
        top_lmin_val = start_pos[0] if start_pos[0] <= start_pos[1] else start_pos[1]
        top_rmin_val = start_pos[0] if start_pos[0] <= len(self.board)-start_pos[1] else start_pos[1]
        bottom_lmin_val = start_pos[0] if len(self.board)-start_pos[0] <= start_pos[1] else start_pos[1]
        bottom_rmin_val = start_pos[0] if len(self.board)-start_pos[0] <= len(self.board) - start_pos[1] else start_pos[1]
        
        val_list = [[top_lmin_val,0,-1], [top_rmin_val,0,-1], [bottom_lmin_val,len(self.board),1], [bottom_rmin_val,len(self.board),1]]
        
        
        #top left diag
        if start_pos[0] > adj_pos[0] and start_pos[1] > adj_pos[1]:
            for xy in range(val_list[0][0],val_list[0][1],val_list[0][2]):
                if self.board[xy][xy] == self.players[player]:
                    break
                elif self.board[xy][xy] == self.players[opposing_player]:
                    change_able_poses.append([xy,xy])
                else:
                    for pop in range(pop_counter):
                        change_able_poses.pop()
                        break
    
    def get_valid_adjacents(self, pos, opposing_player_mark):
        adjacents = [[[0,0],[0,0],[0,0]],
                     [[0,0],[0,0],[0,0]],
                     [[0,0],[0,0],[0,0]]]
        vals = [-1,0,1]
        
        #this enters the adjacent coordinates
        for x, row in enumerate(adjacents):
            for y, col in enumerate(row):
                col[0] = pos[0] + vals[x]
                col[1] = pos[1] + vals[y]
        
        for row in adjacents:
            for val in row:
                if self.within_range(val):
                    if self.board[val[0]][val[1]] == opposing_player_mark:
                        val[0] = -10
                        val[1] = -10
                else:
                    val[0] = -10
                    val[1] = -10
                    
                        
        return adjacents
    
    def within_range(self, pos):
        if (pos[0] >= 0 and pos[0] <= len(self.board)) and (pos[1] >= 0 and pos[1] <= len(self.board)):
            return True
        else:
            return False
    
    def check_valid(self, pos, player):
        if self.board[pos[0]][pos[1]] == '-':
            if self.check_adjacent(pos, player):
                return True
        else:
            False
    
    def check_adjacent(self, pos, player):
        adjacents = [[[0,0],[0,0],[0,0]],
                     [[0,0],[0,0],[0,0]],
                     [[0,0],[0,0],[0,0]]]
        vals = [-1,0,1]
        adjacent_exits = False
        #this enters the adjacent coordinates
        for x, row in enumerate(adjacents):
            for y, col in enumerate(row):
                col[0] = pos[0] + vals[x]
                col[1] = pos[1] + vals[y]
        #this checks if the coordinates are valid, ie in the board, and if there exists any adjacent values, since if there are none, the piece cannot be played there
        for row in adjacents:
            for col in row:
                if col[0] < 0 or col[1] < 0:
                    continue
                else:
                    if self.board[col[0]][col[1]] != self.players[player] or self.board[col[0]][col[1]] == '-':
                        adjacent_exits = True
                    else:
                        continue
                    
        return adjacent_exits
    
    def place_piece(self, pos, player):
        self.board[pos[0]][pos[1]] == self.board[player]
        self.update_mtaken_and_open(pos)
        return
    
    def update_mtaken_and_open(self, pos):
        self.moves_made.append(pos)
        val = str(pos[0]) + str(pos[1])
        for row in self.open_vals:
            if val in row:
                row.remove(val)    
                
    def turn_pieces(self, pos, player):
        opposing_player = 'p1' if player != 'p1' else 'p2'
        #go left
        change_able_poses = []
        pop_counter = 0
        for x in range(pos[0], 0 , -1):
            if self.board[x][pos[1]] == self.players[player]:
                break
            elif self.board[x][pos[1]] == self.players[opposing_player]:
                pop_counter += 1
                change_able_poses.append([x,pos[1]])
            else:
                for pop in range(pop_counter):
                    change_able_poses.pop()
                break
        #go right
        for x in range(pos[0], len(self.board)):
            if self.board[x][pos[1]] == self.players[player]:
                break
            elif self.board[x][pos[1]] == self.players[opposing_player]:
                change_able_poses.append([x,pos[1]])
            else:
                for pop in range(pop_counter):
                    change_able_poses.pop()
                break
        #go up
        for y in range(pos[1], 0 , -1):
            if self.board[pos[0]][y] == self.players[player]:
                break
            elif self.board[pos[0]][y] == self.players[opposing_player]:
                change_able_poses.append([pos[0],y])
            else:
                for pop in range(pop_counter):
                    change_able_poses.pop()
                break
        #go down
        for y in range(pos[1], len(self.board)):
            if self.board[pos[0]][y] == self.players[player]:
                break
            elif self.board[pos[0]][y] == self.players[opposing_player]:
                change_able_poses.append([pos[0],y])
            else:
                for pop in range(pop_counter):
                    change_able_poses.pop()
                break
            
        #go diagonals
        top_lmin_val = pos[0] if pos[0] <= pos[1] else pos[1]
        top_rmin_val = pos[0] if pos[0] <= len(self.board)-pos[1] else pos[1]
        bottom_lmin_val = pos[0] if len(self.board)-pos[0] <= pos[1] else pos[1]
        bottom_rmin_val = pos[0] if len(self.board)-pos[0] <= len(self.board) - pos[1] else pos[1]
        
        val_list = [[top_lmin_val,0,-1], [top_rmin_val,0,-1], [bottom_lmin_val,len(self.board),1], [bottom_rmin_val,len(self.board),1]]
        
        for val in val_list:
            for xy in range(val[0],val[1],val[2]):
                if self.board[xy][xy] == self.players[player]:
                    break
                elif self.board[xy][xy] == self.players[opposing_player]:
                    change_able_poses.append([xy,xy])
                else:
                    for pop in range(pop_counter):
                        change_able_poses.pop()
                        break
                    
        for change in change_able_poses:
            self.board[change[0]][change[1]] = self.players[player]
            self.score[player] += 1
        
        # for xy in range(top_lmin_val ,0,-1):
        #     if self.board[xy][xy] == player[player]:
        #         break
        #     elif self.board[xy][xy] == player[opposing_player]:
        #         change_able_poses.append([xy,xy])
        #     else:
        #         for pop in range(pop_counter):
        #             change_able_poses.pop()
        #             break
        # #bottom left
        # for xy in range(bottom_lmin_val, 0, -1):
        #     if self.board[xy][xy] == player[player]:
        #         break
        #     elif self.board[xy][xy] == player[opposing_player]:
        #         change_able_poses.append([xy,xy])
        #     else:
        #         for pop in range(pop_counter):
        #             change_able_poses.pop()
        #             break
        # #top right
        # for xy in range(top_rmin_val, len(self.board)):
        #     if self.board[xy][xy] == player[player]:
        #         break
        #     elif self.board[xy][xy] == player[opposing_player]:
        #         change_able_poses.append([xy,xy])
        #     else:
        #         for pop in range(pop_counter):
        #             change_able_poses.pop()
        #             break
        # #bottom right
        # for xy in range(bottom_rmin_val, len(self.board)):
        #     if self.board[xy][xy] == player[player]:
        #         break
        #     elif self.board[xy][xy] == player[opposing_player]:
        #         change_able_poses.append([xy,xy])
        #     else:
        #         for pop in range(pop_counter):
        #             change_able_poses.pop()
        #             break
    
    def check_end(self, player):
        neg_player_list = list(self.players.keys())
        neg_player_list = neg_player_list.remove(player)
        next_player = neg_player_list[0]
        if len(self.open_vals) == 0:
            return True
        elif any([self.check_valid([int(pos[0]),int(pos[1])], next_player) for pos in self.open_vals]) == True:
            return False
        else:
            return True
            
    def end(self):
        if self.score['p1'] > self.score['p2']:
            winner = 'p1'
        elif self.score['p1'] < self.score['p2']:
            winner = 'p2'
        else:
            winner = 'tie'
        print(f'Game Over, Winner is {winner}')
        self.terminated = True

In [None]:
h = [['-' for _ in range(8)] for _ in range(8)]
h[0][1]= 'O'
print(h)

[['-', 'O', '-', '-', '-', '-', '-', '-'], ['-', '-', '-', '-', '-', '-', '-', '-'], ['-', '-', '-', '-', '-', '-', '-', '-'], ['-', '-', '-', '-', '-', '-', '-', '-'], ['-', '-', '-', '-', '-', '-', '-', '-'], ['-', '-', '-', '-', '-', '-', '-', '-'], ['-', '-', '-', '-', '-', '-', '-', '-'], ['-', '-', '-', '-', '-', '-', '-', '-']]


In [None]:
va = [[str(row)+str(col) for col in range(8)] for row in range(8)]
print(va)


[['00', '01', '02', '03', '04', '05', '06', '07'], ['10', '11', '12', '13', '14', '15', '16', '17'], ['20', '21', '22', '23', '24', '25', '26', '27'], ['30', '31', '32', '33', '34', '35', '36', '37'], ['40', '41', '42', '43', '44', '45', '46', '47'], ['50', '51', '52', '53', '54', '55', '56', '57'], ['60', '61', '62', '63', '64', '65', '66', '67'], ['70', '71', '72', '73', '74', '75', '76', '77']]


In [None]:
for row in va:
    if '00' in row:
        row.remove('00')

In [None]:
def get_input(open_places):
    print('open spaces are:')
    for row in open_places:
        print(row)
    uinput = input("where do you want to place your piece")
    conv_uinput = str(uinput[0]) + str(uinput[1])
    
    if conv_uinput not in open_places:
        while conv_uinput not in open_places:
            uinput = input('not valid, choose a valid space')
            conv_uinput = str(uinput[0]) + str(uinput[1])
    
    return uinput

In [None]:
game = Othello()
players = ['p1','p2']

turn_counter = 0
while game.terminated == False:
    uinput = get_input(game.open_vals)
    game.turn_pieces(uinput, players[turn_counter%2])
    game.__print_board__()
    turn_counter += 1

open spaces are:
['00', '01', '02', '03', '04', '05', '06', '07']
['10', '11', '12', '13', '14', '15', '16', '17']
['20', '21', '22', '23', '24', '25', '26', '27']
['30', '31', '32', '33', '34', '35', '36', '37']
['40', '41', '42', '43', '44', '45', '46', '47']
['50', '51', '52', '53', '54', '55', '56', '57']
['60', '61', '62', '63', '64', '65', '66', '67']
['70', '71', '72', '73', '74', '75', '76', '77']


IndexError: string index out of range