In [1]:
import numpy as np
import copy
import math

In [2]:
class connect4:
    def __init__(self, rows, columns):
        self.rows = rows
        self.columns = columns
        self.EMPTY = ' '
        self.RED = 'R'
        self.YELLOW = 'Y'
        self.DRAW = 'draw'

        self.next_player = self.RED
        self.flip_player = {self.RED: self.YELLOW, self.YELLOW: self.RED}
        
        self.board = np.array([[self.EMPTY for _ in range(self.columns)] for _ in range(self.rows)])
        
    def valid_move(self, col):
        return self.board[0][col] == self.EMPTY
    
    def move(self, col):

        if not self.valid_move(col):
            raise ValueError("Invalid move")

        state = copy.deepcopy(self)
        for k in range(0, self.rows):
            if state.board[k][col] == self.EMPTY:
                temp=k
            else:
                break
        state.board[temp][col] = state.next_player

        
        state.next_player = state.flip_player[state.next_player]
        return state
    
    def winner(self):
        # Check rows
        for row in range(self.rows):
            for col in range(self.columns - 3):
                if self.board[row][col] == self.board[row][col + 1] == self.board[row][col + 2] == self.board[row][col + 3] != self.EMPTY:
                    return self.board[row][col]

        # Check columns
        for row in range(self.rows - 3):
            for col in range(self.columns):
                if self.board[row][col] == self.board[row + 1][col] == self.board[row + 2][col] == self.board[row + 3][col] != self.EMPTY:
                    return self.board[row][col]

        # Check diagonals
        for row in range(self.rows - 3):
            for col in range(self.columns - 3):
                if self.board[row][col] == self.board[row + 1][col + 1] == self.board[row + 2][col + 2] == self.board[row + 3][col + 3] != self.EMPTY:
                    return self.board[row][col]

        for row in range(self.rows - 3):
            for col in range(3, self.columns):
                if self.board[row][col] == self.board[row + 1][col - 1] == self.board[row + 2][col - 2] == self.board[row + 3][col - 3] != self.EMPTY:
                    return self.board[row][col]

        if np.all(self.board != self.EMPTY):
            return self.DRAW
        
        return False
    
    def actions(self):
        return [col for col in range(self.columns) if self.board[0][col] == self.EMPTY]

In [27]:
class minimaxAgent:
    
    def next_move(self, state):
        player=state.next_player

        best_action = None
        best_value = -math.inf

        for action in state.actions():
            new_state = state.move(action)
            action_value = self.get_value(new_state, player, get_min=True)

            if action_value > best_value:
                best_action = action
                best_value = action_value

        print("Best action: ", best_action, " Best value:", best_value)
        return best_action
    
    def get_value(self, state, player, get_min, alpha=-math.inf, beta=math.inf, turns=1):

        other_player = state.flip_player[player]

        winner=state.winner()
        if winner == player:
            return 1/turns
        elif winner == other_player:
            return -1/turns
        elif winner == state.DRAW:
            return 0
        
        best_value = -math.inf
        if get_min:
            best_value = math.inf

        for action in state.actions():
            new_state = state.move(action)
            action_value = self.get_value(new_state, player, get_min=not get_min, alpha=alpha, beta=beta, turns=turns+1)
            
            if not get_min:
                alpha = max(alpha, action_value)
                if action_value >= beta:
                    return action_value
                
            else:
                beta = min(beta, action_value)
                if action_value <= alpha:
                    return action_value
                
            if (not get_min and action_value > best_value) or (get_min and action_value < best_value):
                best_value = action_value
        
        return best_value
    
class badAgent:
    def next_move(self, state):
        player=state.next_player

        worst_action = None
        worst_value = math.inf

        for action in state.actions():
            new_state = state.move(action)
            action_value = self.get_value(new_state, player)

            if action_value < worst_value:
                worst_action = action
                worst_value = action_value
        
        print("Worst action: ", worst_action, " Worst value:", worst_value)
        return worst_action
    
    def get_value(self, state, player, turns=1):
        other_player = state.flip_player[player]

        winner=state.winner()
        if winner == player:
            return 1/turns
        elif winner == other_player:
            return -1/turns
        elif winner == state.DRAW:
            return 0
        
        worst_value = math.inf

        for action in state.actions():
            new_state = state.move(action)
            action_value = self.get_value(new_state, player, turns=turns+1)

            if action_value < worst_value:
                worst_value = action_value

        return worst_value
    
class randomAgent:
    def next_move(self, state):
        return np.random.choice(state.actions())
    
class humanAgent:
    def next_move(self, state):
        while True:
            try:
                print("What's your next move? In format 0-x")
                move = input('>')
                move = int(move)
                if not state.valid_move(move):
                    print("Space must be empty.")
                else:
                    return move
            except ValueError:
                print("Invalid move. Try again.")

In [28]:
def run_game(player1=badAgent(), player2=minimaxAgent()):
    state = connect4(3,4)
    print(state.board)
    while not state.winner():
        move = player1.next_move(state)
        state = state.move(move)
        print(state.board)
        if state.winner() == state.RED:
            print("Red wins!")
            return
        elif state.winner() == state.DRAW:
            print("Draw!")
            return
        
        move = player2.next_move(state)
        state = state.move(move)
        print(state.board)
        if state.winner() == state.YELLOW:
            print("Yellow wins!")
            return
        elif state.winner() == state.DRAW:
            print("Draw!")
            return

In [29]:
run_game()

[[' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ']]
Worst action:  0  Worst value: -0.1
[[' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ']
 ['R' ' ' ' ' ' ']]
Best action:  0  Best value: 0
[[' ' ' ' ' ' ' ']
 ['Y' ' ' ' ' ' ']
 ['R' ' ' ' ' ' ']]
Worst action:  0  Worst value: -0.125
[['R' ' ' ' ' ' ']
 ['Y' ' ' ' ' ' ']
 ['R' ' ' ' ' ' ']]
Best action:  1  Best value: 0
[['R' ' ' ' ' ' ']
 ['Y' ' ' ' ' ' ']
 ['R' 'Y' ' ' ' ']]
Worst action:  2  Worst value: -0.16666666666666666
[['R' ' ' ' ' ' ']
 ['Y' ' ' ' ' ' ']
 ['R' 'Y' 'R' ' ']]
Best action:  1  Best value: 0
[['R' ' ' ' ' ' ']
 ['Y' 'Y' ' ' ' ']
 ['R' 'Y' 'R' ' ']]
Worst action:  1  Worst value: -0.25
[['R' 'R' ' ' ' ']
 ['Y' 'Y' ' ' ' ']
 ['R' 'Y' 'R' ' ']]
Best action:  2  Best value: 0
[['R' 'R' ' ' ' ']
 ['Y' 'Y' 'Y' ' ']
 ['R' 'Y' 'R' ' ']]
Worst action:  3  Worst value: -0.5
[['R' 'R' ' ' ' ']
 ['Y' 'Y' 'Y' ' ']
 ['R' 'Y' 'R' 'R']]
Best action:  3  Best value: 1.0
[['R' 'R' ' ' ' ']
 ['Y' 'Y' 'Y' 'Y']
 ['R' 'Y' 'R' 'R']]
Yellow