## Connect4 environment

---

> Internship neural networks
>
> Group 4: Reinforcement learning
>
> Deadline 28.02.23 23:59

---

In [1]:
import numpy as np
import os
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
import pygame
import pandas as pd
from scipy.signal import convolve2d
import sys

In [63]:
class Connect4:

    def __init__(self):
        
        self.board_height = 6
        self.board_width = 7
        self.board_state = np.zeros([self.board_height, self.board_width], dtype=np.int8)
        self.players = {'p1': 1, 'p2': -1}
        self.isDone = False
        self.winner = None
        self.reward = {'win': 10, 'draw': 0, 'lose': -10, 'living': 0, 'block': 2}
        self.last_action = -1
        self.last_action_row = -1
        
        self.BLUE = (0,0,0)
        self.BLACK = (181,181,181)
        self.RED = (178,58,238)
        self.YELLOW = (180,238,180)
        
        #define our screen size and radius of coins
        self.SQUARESIZE = 100
        self.RADIUS = int(self.SQUARESIZE/2 - 5)
        
        #define width and height of pygame board
        self.pygame_width = self.board_width * self.SQUARESIZE
        self.pygame_height = (self.board_height+1) * self.SQUARESIZE
        
        # define size of board
        self.size = (self.pygame_width, self.pygame_height)
    
    def render(self):
        '''
        Prints the Connect4 game state in output
        '''
        rendered_board_state = self.board_state.copy().astype(np.str)
        rendered_board_state[self.board_state == 0] = ' '
        rendered_board_state[self.board_state == 1] = 'O'
        rendered_board_state[self.board_state == -1] = 'X'
        display(pd.DataFrame(rendered_board_state))
    
    def reset(self):
        '''
        Resets environment
        
        returns: initial state of game
        '''
        self.__init__()
        return self.board_state.copy()
        
    def get_available_actions(self):
        '''
        Get the numbers of columns that are not full

        returns: numbers of columns that are not full
        '''
        available_cols = []
        for j in range(self.board_width):
            if not np.all(self.board_state[:, j]):
                available_cols.append(j)
        return available_cols
    
    def check_game_done(self, player):
        '''
        Checking if game is done by using kernels.

        player: player id

        returns: reward of state that ends the game
        '''
        if player == "p1":
            i= 1
        if player == "p2":
            i=-1

        horizontal_kernel = np.array([[i, i, i, i]])
        vertical_kernel = np.transpose(horizontal_kernel)
        diag1_kernel = i*np.eye(4, dtype=np.int8)
        diag2_kernel = np.fliplr(diag1_kernel)
        detection_kernels = [horizontal_kernel, vertical_kernel, diag1_kernel, diag2_kernel]

        for kernel in detection_kernels:
            if (convolve2d(self.board_state, kernel, mode="valid") == 4).any():
                self.isDone = True
        
        if self.isDone:
            return self.reward['win']
        # check for draw
        elif np.sum([self.board_state == 0]) == 0:
            self.isDone = True
            return self.reward['draw']
        else:
            return self.reward["lose"]
    
    def check_block(self, prev_board_state, player):
        '''
        Check if move was a blocking move with kernels

        prev_board_state: state of game before move
        player: player id

        returns: boolean if move was a blocking move
        '''
        if player == "p1":
            i= 1
        if player == "p2":
            i=-1
        
        # define kernels for blocking move
        horizontal_kernel_left = np.array([[i, -i, -i, -i]])
        horizontal_kernel_mid_left = np.array([[-i, i, -i, -i]])
        horizontal_kernel_right = np.array([[-i, -i, -i, i]])
        horizontal_kernel_mid_right = np.array([[-i, -i, i, -i]])
        vertical_kernel = np.transpose(horizontal_kernel_right)

        diag1_kernel_left = -i*np.eye(4, dtype=np.int8)
        diag1_kernel_left[0,0] = i
        diag1_kernel_mid_left = -i*np.eye(4, dtype=np.int8)
        diag1_kernel_mid_left[1,1] = i
        diag1_kernel_right = -i*np.eye(4, dtype=np.int8)
        diag1_kernel_right[3,3] = i
        diag1_kernel_mid_right = -i*np.eye(4, dtype=np.int8)
        diag1_kernel_mid_right[2,2] = i
    
        diag2_kernel_left = np.fliplr(diag1_kernel_left)
        diag2_kernel_mid_left = np.fliplr(diag1_kernel_mid_left)
        diag2_kernel_right = np.fliplr(diag1_kernel_right)
        diag2_kernel_mid_right = np.fliplr(diag1_kernel_mid_right)
        
        detection_kernels = [horizontal_kernel_left, horizontal_kernel_mid_left, 
                             horizontal_kernel_right, horizontal_kernel_mid_right, 
                             vertical_kernel, 
                             diag1_kernel_left, diag1_kernel_mid_left,
                             diag1_kernel_right, diag1_kernel_mid_right,
                             diag2_kernel_left, diag2_kernel_mid_left,
                             diag2_kernel_right, diag2_kernel_mid_right]
        
        isBlock = False
        for kernel in detection_kernels:
            # use kernel on state
            conv = convolve2d(self.board_state, kernel, mode="valid") == 4
            if conv.any():
                conv_prev = convolve2d(prev_board_state, kernel, mode="valid") == 4
                # if convolution array of actual state is different to convolution array of previous state -> blocking move
                if not np.array_equal(conv, conv_prev):
                    isBlock = True
        
        return isBlock
        
    def make_move(self, a, player, isDqn = False):
        '''
        Make a move in the environment

        a: action
        player: player id
        isDqn: tells if the player is a dqn

        returns: new state and reward
        '''
        
        # check if move is valid
        prev_board_state = self.board_state.copy()
        if a in self.get_available_actions():
            i = np.sum([self.board_state[:, a] == 0]) - 1
            self.board_state[i, a] = self.players[player]
        else:
            print('Move is invalid')
            self.render()

        reward = self.check_game_done(player)

        # change to living reward if not done
        if not self.isDone:
            # reward for blocking (only for dqn)
            if isDqn and self.check_block(prev_board_state, player) == True:
                reward = self.reward['block']
            else:
                reward = self.reward['living']
        
        self.last_action = a
        self.last_action_row = len(np.nonzero(self.board_state[:,a])) - 1
        
        return self.board_state.copy(), reward
    
    def draw_board(self, board, screen):
        '''
        Draw board with pygame
        '''
        board = np.flip(board, 0)
        for c in range(self.board_width):
            for r in range(self.board_height):
                pygame.draw.rect(screen, self.BLUE, (c*self.SQUARESIZE, r*self.SQUARESIZE+self.SQUARESIZE, self.SQUARESIZE, self.SQUARESIZE))
                pygame.draw.circle(screen, self.BLACK, (int(c*self.SQUARESIZE+self.SQUARESIZE/2), int(r*self.SQUARESIZE+self.SQUARESIZE+self.SQUARESIZE/2)), self.RADIUS)

        for c in range(self.board_width):
            for r in range(self.board_height):      
                if board[r][c] == 1:
                    pygame.draw.circle(screen, self.RED, (int(c*self.SQUARESIZE+self.SQUARESIZE/2), self.pygame_height-int(r*self.SQUARESIZE+self.SQUARESIZE/2)), self.RADIUS)
                elif board[r][c] == -1: 
                    pygame.draw.circle(screen, self.YELLOW, (int(c*self.SQUARESIZE+self.SQUARESIZE/2), self.pygame_height-int(r*self.SQUARESIZE+self.SQUARESIZE/2)), self.RADIUS)
        pygame.display.update()
    
    def playGame(self, player1, player2, draw = True, render = True):
        '''
        Check if move was a blocking move with kernels

        player1: agent of player1
        player2: agent of player2
        draw: if you want to draw the game with pygame
        render: if you want to render game in output
        '''
        max_test_ep_len = 21
        total_timesteps = 0
        total_reward = 0
        running_reward = 0
        turn = 0
        
        name_p1 = player1.__class__.__name__
        name_p2 = player2.__class__.__name__
        p1_is_dt = False
        p2_is_dt = False
        if name_p1 == "DTAgent":
            player1.model.eval()
            p1_is_dt = True
        if name_p2 == "DTAgent":
            player2.model.eval()
            p2_is_dt = True

        with torch.no_grad():

            running_state = env.reset()
            if render:
                env.render()
            if p1_is_dt:
                player1.reset_agent()
            if p2_is_dt:
                player2.reset_agent()
            
            if draw:
                #initalize pygame
                pygame.init()

                screen = pygame.display.set_mode(self.size)

                myfont = pygame.font.SysFont("monospace", 75)
                
                self.draw_board(running_state, screen)
                pygame.display.update()
                
            for t in range(max_test_ep_len):
                if draw:
                    available_actions = env.get_available_actions()
                    if name_p1 == "HumanAgent":
                        action_p1 = -1
                        while action_p1 not in available_actions:
                            for event in pygame.event.get():
                                if event.type == pygame.QUIT:
                                    pygame.quit()
                                    sys.exit()

                                if event.type == pygame.MOUSEMOTION:
                                    pygame.draw.rect(screen, self.BLACK, (0,0, self.pygame_width, self.SQUARESIZE))
                                    posx = event.pos[0]
                                    pygame.draw.circle(screen, self.RED, (posx, int(self.SQUARESIZE/2)), self.RADIUS)
                                pygame.display.update()
                                if event.type == pygame.MOUSEBUTTONDOWN:
                                    pygame.draw.rect(screen, self.BLACK, (0,0, self.pygame_width, self.SQUARESIZE))
                                    # Ask for Player 1 Input
                                    posx = event.pos[0]
                                    action_p1 = int(math.floor(posx/self.SQUARESIZE))
                    elif p1_is_dt:
                        total_timesteps += 1
                        action_p1 = player1.select_action(t, running_reward, running_state, available_actions)
                    else:
                        action_p1 = player1.select_action(running_state, available_actions, training=False)

                    running_state, running_reward = env.make_move(action_p1, "p1")
                    self.draw_board(running_state, screen)

                    if self.isDone:
                        label = myfont.render("Player 1 wins!!", 1, self.RED)
                        screen.blit(label, (40,10))
                        pygame.display.update()
                        pygame.time.wait(5000)
                        pygame.quit()
                        sys.exit()
                    
                    pygame.time.wait(1000)
                    available_actions = env.get_available_actions()
                    if name_p2 == "HumanAgent":
                        action_p2 = -1
                        while action_p2 not in available_actions:
                            for event in pygame.event.get():
                                if event.type == pygame.QUIT:
                                    pygame.quit()
                                    exit()
                                if event.type == pygame.MOUSEMOTION:
                                    pygame.draw.rect(screen, self.BLACK, (0,0, self.pygame_width, self.SQUARESIZE))
                                    posx = event.pos[0]
                                    pygame.draw.circle(screen, self.YELLOW, (posx, int(self.SQUARESIZE/2)), self.RADIUS)
                                pygame.display.update()
                                if event.type == pygame.MOUSEBUTTONDOWN:
                                    pygame.draw.rect(screen, self.BLACK, (0,0, self.pygame_width, self.SQUARESIZE))
                                    # Ask for Player 2 Input
                                    posx = event.pos[0]
                                    action_p2 = int(math.floor(posx/self.SQUARESIZE))
                    elif p2_is_dt:
                        total_timesteps += 1
                        action_p2 = player2.select_action(t, running_reward, running_state, available_actions)
                    else:
                        action_p2 = player2.select_action(running_state, available_actions, training=False)

                    running_state, running_reward = env.make_move(action_p2, "p2")
                    self.draw_board(running_state, screen)

                    if self.isDone:
                        label = myfont.render("Player 2 wins!!", 1, self.YELLOW)
                        screen.blit(label, (40,10))
                        pygame.display.update()
                        pygame.time.wait(5000)
                        pygame.quit()
                        sys.exit()
                        
                    pygame.time.wait(1000) 

                else:
                    available_actions = env.get_available_actions()
                    
                    if p1_is_dt:
                        action_p1 = player1.select_action(t, running_reward, running_state, available_actions)
                    else:
                        action_p1 = player1.select_action(running_state, available_actions, training=False)
                    running_state, running_reward = env.make_move(action_p1, "p1")

                    if render:
                        env.render()
                    if env.isDone:
                        break
                    available_actions = env.get_available_actions()

                    if p2_is_dt:
                        action_p2 = player2.select_action(t, running_reward, running_state, available_actions)
                    else:
                        action_p2 = player2.select_action(running_state, available_actions, training=False)

                    running_state, _ = env.make_move(action_p2, 'p2')

                    total_reward += running_reward

                    if render:
                        env.render()
                    if env.isDone:
                        break