In [1]:
# finall

import numpy as np

class Connect4:
    def __init__(self, epsilon=0.1, alpha=0.2, gamma=0.9):
        self.board = np.zeros((6, 7))  # 6x7 board
        self.player = 1  # Player 1 starts
        self.game_over = False
        self.epsilon = epsilon  # Exploration rate
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor
        self.q_table = dict()
        self.rewards = []
        self.penalties = []
        self.cumulative_rewards = []

    def print_board(self):
        print(np.flip(self.board, 0))  # Flipping the board for better visualization
        print('\n')

    def drop_piece(self, col):
        for row in range(6):
            if self.board[row][col] == 0:
                self.board[row][col] = self.player
                break

    def is_valid_location(self, col):
        return self.board[5][col] == 0

    def winning_move(self, piece):
        # Check horizontal locations
        for r in range(6):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r][c+1] == piece and self.board[r][c+2] == piece and self.board[r][c+3] == piece:
                    return True

        # Check vertical locations
        for r in range(3):
            for c in range(7):
                if self.board[r][c] == piece and self.board[r+1][c] == piece and self.board[r+2][c] == piece and self.board[r+3][c] == piece:
                    return True

        # Check positively sloped diagonals
        for r in range(3):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r+1][c+1] == piece and self.board[r+2][c+2] == piece and self.board[r+3][c+3] == piece:
                    return True

        # Check negatively sloped diagonals
        for r in range(3, 6):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r-1][c+1] == piece and self.board[r-2][c+2] == piece and self.board[r-3][c+3] == piece:
                    return True

    def available_actions(self):
        return [col for col in range(7) if self.is_valid_location(col)]

    def get_q_value(self, state, action):
        return self.q_table.get((state, action), 0)

    def update_q_value(self, state, action, reward, next_state):
        best_next_action = max([(self.get_q_value(next_state, next_action), next_action) for next_action in self.available_actions()])[1]
        old_q_value = self.get_q_value(state, action)
        next_q_value = self.get_q_value(next_state, best_next_action)
        new_q_value = old_q_value + self.alpha * (reward + self.gamma * next_q_value - old_q_value)
        self.q_table[(state, action)] = new_q_value

    def select_action(self, state):
        if np.random.uniform(0, 1) < self.epsilon:
            return np.random.choice(self.available_actions())
        else:
            return max([(self.get_q_value(state, action), action) for action in self.available_actions()])[1]

    def state_to_string(self):
        return ''.join(map(str, self.board.flatten()))

    def play(self):
        self.print_board()  # Print the initial board
        while not self.game_over:
            if self.player == 1:
                state = self.state_to_string()
                action = self.select_action(state)
                print("Player {} selects column {}".format(self.player, action))
                if self.is_valid_location(action):
                    self.drop_piece(action)

                    reward = 0  # Initialize reward

                    if self.winning_move(self.player):
                        print("Player {} wins!".format(self.player))
                        reward = 1
                        self.rewards.append(reward)
                        self.update_q_value(state, action, reward, None)
                        self.game_over = True
                    else:
                        if np.count_nonzero(self.board == 0) == 0:
                            print("It's a tie!")
                            self.rewards.append(reward)
                            self.update_q_value(state, action, reward, None)
                            self.game_over = True
                        else:
                            self.player = 2  # Switch to player 2
                            next_state = self.state_to_string()
                            self.update_q_value(state, action, reward, next_state)
                else:
                    print("Invalid move! Please select another column.")
            else:
                col = int(input("Its your turn,  select a column (0-6): "))
                if self.is_valid_location(col):
                    self.drop_piece(col)

                    if self.winning_move(self.player):
                        print("Player 2 wins!")
                        reward = -1
                        self.penalties.append(reward)
                        self.update_q_value(state, action, reward, None)
                        self.game_over = True
                    else:
                        if np.count_nonzero(self.board == 0) == 0:
                            print("It's a tie!")
                            reward = 0
                            self.rewards.append(reward)
                            self.update_q_value(state, action, reward, None)
                            self.game_over = True
                        else:
                            self.player = 1  # Switch back to player 1
                else:
                    print("Invalid move! Please select another column.")

            self.print_board()  # Print the board after each move
            print("Reward:", reward)  # Print the reward for the current move

        # Print the final board after the game is over
        self.print_board()
        
        # Print rewards, penalties, and cumulative rewards
        print("Rewards:", self.rewards)
        print("Penalties:", self.penalties)
        cumulative_reward = sum(self.rewards) + sum(self.penalties)
        print("Cumulative Reward:", cumulative_reward)


if __name__ == "__main__":
    game = Connect4()
    print("Hey there!Lets play Connect4🥳\n\nHere Player 1 is an agent and you are the Player 2")
    
    game.play()

    # Extract the learned policy from the Q-table
    policy = {}
    for state, action in game.q_table.keys():
        if state not in policy or game.q_table[(state, action)] > game.q_table[(state, policy[state])]:
            policy[state] = action

    # Print the learned policy
    print("Learned policy:")
    for state, action in policy.items():
        print("State:", state)
        print("Action (Column):", action)


Hey there!Lets play Connect4🥳

Here Player 1 is an agent and you are the Player 2
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]


Player 1 selects column 6
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]]


Reward: 0
Its your turn,  select a column (0-6): 5
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 2. 1.]]


Reward: 0
Player 1 selects column 6
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 2. 1.]]


Reward: 0
Its your turn,  select a column (0-6): 5
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 2. 1.]
 [0. 0. 0. 0. 0. 2. 1.]]


Reward: 0
Player 1 

In [2]:
# Print the Q-table with state-action pairs
print("Q-table:")
for (state, action), q_value in game.q_table.items():
    print("State:", state)
    print("Action (Column):", action)
    print("Q-value:", q_value)
    print()


Q-table:
State: 0.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0
Action (Column): 6
Q-value: 0.0

State: 0.00.00.00.00.02.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0
Action (Column): 6
Q-value: 0.0

State: 0.00.00.00.00.02.01.00.00.00.00.00.02.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0
Action (Column): 6
Q-value: 0.0

State: 0.00.00.00.00.02.01.00.00.00.00.00.02.01.00.00.00.00.00.02.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0
Action (Column): 6
Q-value: 0.2



In [4]:
import numpy as np

class Connect4:
    def __init__(self, epsilon=0.1, alpha=0.2, gamma=0.9):
        self.board = np.zeros((6, 7))  # 6x7 board
        self.player = 1  # Player 1 starts
        self.game_over = False
        self.epsilon = epsilon  # Exploration rate
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor
        self.q_table = dict()
        self.rewards = []
        self.penalties = []
        self.cumulative_rewards = []

    def print_board(self):
        print(np.flip(self.board, 0))  # Flipping the board for better visualization
        print('\n')

    def drop_piece(self, col):
        for row in range(6):
            if self.board[row][col] == 0:
                self.board[row][col] = self.player
                break

    def is_valid_location(self, col):
        return self.board[5][col] == 0

    def winning_move(self, piece):
        # Check horizontal locations
        for r in range(6):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r][c+1] == piece and self.board[r][c+2] == piece and self.board[r][c+3] == piece:
                    return True

        # Check vertical locations
        for r in range(3):
            for c in range(7):
                if self.board[r][c] == piece and self.board[r+1][c] == piece and self.board[r+2][c] == piece and self.board[r+3][c] == piece:
                    return True

        # Check positively sloped diagonals
        for r in range(3):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r+1][c+1] == piece and self.board[r+2][c+2] == piece and self.board[r+3][c+3] == piece:
                    return True

        # Check negatively sloped diagonals
        for r in range(3, 6):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r-1][c+1] == piece and self.board[r-2][c+2] == piece and self.board[r-3][c+3] == piece:
                    return True

    def available_actions(self):
        return [col for col in range(7) if self.is_valid_location(col)]

    def get_q_value(self, state, action):
        return self.q_table.get((state, action), 0)

    def update_q_value(self, state, action, reward, next_state):
        best_next_action = max([(self.get_q_value(next_state, next_action), next_action) for next_action in self.available_actions()])[1]
        old_q_value = self.get_q_value(state, action)
        next_q_value = self.get_q_value(next_state, best_next_action)
        new_q_value = old_q_value + self.alpha * (reward + self.gamma * next_q_value - old_q_value)
        self.q_table[(state, action)] = new_q_value

    def select_action(self, state):
        if np.random.uniform(0, 1) < self.epsilon:
            return np.random.choice(self.available_actions())
        else:
            return max([(self.get_q_value(state, action), action) for action in self.available_actions()])[1]

    def state_to_string(self):
        return ''.join(map(str, self.board.flatten()))

    def play(self):
        self.print_board()  # Print the initial board
        while not self.game_over:
            if self.player == 1:
                state = self.state_to_string()
                action = self.select_action(state)
                print("Player {} selects column {}".format(self.player, action))
                if self.is_valid_location(action):
                    self.drop_piece(action)

                    reward = 0  # Initialize reward

                    if self.winning_move(self.player):
                        print("Player {} wins!".format(self.player))
                        reward = 1
                        self.rewards.append(reward)
                        self.update_q_value(state, action, reward, None)
                        self.game_over = True
                    else:
                        if np.count_nonzero(self.board == 0) == 0:
                            print("It's a tie!")
                            self.rewards.append(reward)
                            self.update_q_value(state, action, reward, None)
                            self.game_over = True
                        else:
                            self.player = 2  # Switch to player 2
                            next_state = self.state_to_string()
                            self.update_q_value(state, action, reward, next_state)
                else:
                    print("Invalid move! Please select another column.")
            else:
                # Check if player 2 has 3 consecutive moves in any column and block it
                for col in range(7):
                    for row in range(4):
                        if self.board[row][col] == 2 and self.board[row + 1][col] == 2 and self.board[row + 2][col] == 2 and self.board[row + 3][col] == 0:
                            print("Blocking player 2's win by marking in column", col)
                            self.drop_piece(col)
                            reward = 0  # No reward for blocking
                            self.update_q_value(state, action, reward, None)
                            self.game_over = True
                            break  # Once a move is made, no need to check other columns

                if not self.game_over:
                    action = int(input("It's your turn, select a column (0-6): "))
                    if self.is_valid_location(action):
                        self.drop_piece(action)

                        if self.winning_move(self.player):
                            print("Player 2 wins!")
                            reward = -1
                            self.penalties.append(reward)
                            self.update_q_value(state, action, reward, None)
                            self.game_over = True
                        else:
                            if np.count_nonzero(self.board == 0) == 0:
                                print("It's a tie!")
                                reward = 0
                                self.rewards.append(reward)
                                self.update_q_value(state, action, reward, None)
                                self.game_over = True
                            else:
                                self.player = 1
                    else:
                        print("Invalid move! Please select another column.")

            self.print_board()
            print("Reward:", reward)

        self.print_board()
        print("Rewards:", self.rewards)
        print("Penalties:", self.penalties)
        cumulative_reward = sum(self.rewards) + sum(self.penalties)
        print("Cumulative Reward:", cumulative_reward)


if __name__ == "__main__":
    game = Connect4()
    print("Hey there! Let's play Connect4 🥳\n\nHere Player 1 is an agent and you are Player 2")
    game.play()


Hey there! Let's play Connect4 🥳

Here Player 1 is an agent and you are Player 2
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]


Player 1 selects column 6
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]]


Reward: 0
It's your turn, select a column (0-6): 0
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [2. 0. 0. 0. 0. 0. 1.]]


Reward: 0
Player 1 selects column 6
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [2. 0. 0. 0. 0. 0. 1.]]


Reward: 0
It's your turn, select a column (0-6): 0
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [2. 0. 0. 0. 0. 0. 1.]
 [2. 0. 0. 0. 0. 0. 1.]]


Reward: 0
Player 1 s

In [15]:
class Connect4:
    def __init__(self, epsilon=0.1, alpha=0.2, gamma=0.9):
        self.board = np.zeros((6, 7))  # 6x7 board
        self.player = 1  # Player 1 starts
        self.game_over = False
        self.epsilon = epsilon  # Exploration rate
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor
        self.q_table = dict()
        self.rewards = []
        self.penalties = []
        self.cumulative_rewards = []

    def print_board(self):
        print(np.flip(self.board, 0))  # Flipping the board for better visualization
        print('\n')

    def drop_piece(self, col):
        for row in range(6):
            if self.board[row][col] == 0:
                self.board[row][col] = self.player
                break

    def is_valid_location(self, col):
        return self.board[5][col] == 0

    def winning_move(self, piece):
        # Check horizontal locations
        for r in range(6):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r][c+1] == piece and self.board[r][c+2] == piece and self.board[r][c+3] == piece:
                    return True

        # Check vertical locations
        for r in range(3):
            for c in range(7):
                if self.board[r][c] == piece and self.board[r+1][c] == piece and self.board[r+2][c] == piece and self.board[r+3][c] == piece:
                    return True

        # Check positively sloped diagonals
        for r in range(3):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r+1][c+1] == piece and self.board[r+2][c+2] == piece and self.board[r+3][c+3] == piece:
                    return True

        # Check negatively sloped diagonals
        for r in range(3, 6):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r-1][c+1] == piece and self.board[r-2][c+2] == piece and self.board[r-3][c+3] == piece:
                    return True

    def available_actions(self):
        return [col for col in range(7) if self.is_valid_location(col)]

    def get_q_value(self, state, action):
        return self.q_table.get((state, action), 0)

    def update_q_value(self, state, action, reward, next_state):
        old_q_value = self.get_q_value(state, action)
        next_q_max = max([self.get_q_value(next_state, next_action) for next_action in self.available_actions()])
        new_q_value = old_q_value + self.alpha * (reward + self.gamma * next_q_max - old_q_value)
        self.q_table[(state, action)] = new_q_value

    def select_action(self, state):
        if np.random.uniform(0, 1) < self.epsilon:
            return np.random.choice(self.available_actions())
        else:
            return max([(self.get_q_value(state, action), action) for action in self.available_actions()])[1]

    def state_to_string(self):
        return ''.join(map(str, self.board.flatten()))








    def play(self):
        self.print_board()  # Print the initial board
        while not self.game_over:
            reward = 0  # Initialize reward
            if self.player == 1:
                state = self.state_to_string()
                action = self.select_action(state)
                print("Player {} selects column {}".format(self.player, action))
                if self.is_valid_location(action):
                    self.drop_piece(action)

                    if self.winning_move(self.player):
                        print("Player {} wins!".format(self.player))
                        reward = 10
                        self.rewards.append(reward)
                        self.update_q_value(state, action, reward, None)
                        self.game_over = True
                    else:
                        if np.count_nonzero(self.board == 0) == 0:
                            print("It's a tie!")
                            reward = 0
                            self.rewards.append(reward)
                            self.update_q_value(state, action, reward, None)
                            self.game_over = True
                        else:
                            self.player = 2  # Switch to player 2
                            next_state = self.state_to_string()
                            self.update_q_value(state, action, reward, next_state)
                else:
                    print("Invalid move! Please select another column.")
            else:
                for col in range(7):
                    for row in range(4):
                        if self.board[row][col] == 2 and self.board[row + 1][col] == 2 and self.board[row + 2][col] == 2 and self.board[row + 3][col] == 0:
                            print("Blocking player 2's win by marking in column", col)
                            self.drop_piece(col)
                            reward = 5  # Reward for blocking
                            self.update_q_value(state, action, reward, None)
                            self.game_over = True
                            break

                if not self.game_over:
                    action = int(input("It's your turn, select a column (0-6): "))
                    if self.is_valid_location(action):
                        self.drop_piece(action)

                        if self.winning_move(self.player):
                            print("Player 2 wins!")
                            reward = -10
                            self.penalties.append(reward)
                            self.update_q_value(state, action, reward, None)
                            self.game_over = True
                        else:
                            if np.count_nonzero(self.board == 0) == 0:
                                print("It's a tie!")
                                reward = 0
                                self.rewards.append(reward)
                                self.update_q_value(state, action, reward, None)
                                self.game_over = True
                            else:
                                self.player = 1
                    else:
                        print("Invalid move! Please select another column.")

            self.print_board()
            print("Reward:", reward)

        self.print_board()
        print("Rewards:", self.rewards)
        print("Penalties:", self.penalties)
        cumulative_reward = sum(self.rewards) + sum(self.penalties)
        print("Cumulative Reward:", cumulative_reward)
        
if __name__ == "__main__":
    game = Connect4()
    print("Hey there! Let's play Connect4 🥳\n\nHere Player 1 is an agent and you are Player 2")
    game.play()



Hey there! Let's play Connect4 🥳

Here Player 1 is an agent and you are Player 2
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]


Player 1 selects column 6
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]]


Reward: 0


KeyboardInterrupt: Interrupted by user

In [20]:
import numpy as np

class Connect4:
    def __init__(self, epsilon=0.1, alpha=0.2, gamma=0.9):
        self.board = np.zeros((6, 7))  # 6x7 board
        self.player = 1  # Player 1 starts
        self.game_over = False
        self.epsilon = epsilon  # Exploration rate
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor
        self.q_table = dict()
        self.rewards = []
        self.penalties = []
        self.cumulative_rewards = []

    def print_board(self):
        print(np.flip(self.board, 0))  # Flipping the board for better visualization
        print('\n')

    def drop_piece(self, col):
        for row in range(6):
            if self.board[row][col] == 0:
                self.board[row][col] = self.player
                break

    def is_valid_location(self, col):
        return self.board[5][col] == 0

    def winning_move(self, piece):
        # Check horizontal locations
        for r in range(6):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r][c+1] == piece and self.board[r][c+2] == piece and self.board[r][c+3] == piece:
                    return True

        # Check vertical locations
        for r in range(3):
            for c in range(7):
                if self.board[r][c] == piece and self.board[r+1][c] == piece and self.board[r+2][c] == piece and self.board[r+3][c] == piece:
                    return True

        # Check positively sloped diagonals
        for r in range(3):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r+1][c+1] == piece and self.board[r+2][c+2] == piece and self.board[r+3][c+3] == piece:
                    return True

        # Check negatively sloped diagonals
        for r in range(3, 6):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r-1][c+1] == piece and self.board[r-2][c+2] == piece and self.board[r-3][c+3] == piece:
                    return True

    def available_actions(self):
        return [col for col in range(7) if self.is_valid_location(col)]

    def get_q_value(self, state, action):
        return self.q_table.get((state, action), 0)

    def update_q_value(self, state, action, reward, next_state):
        old_q_value = self.get_q_value(state, action)
        next_q_max = max([self.get_q_value(next_state, next_action) for next_action in self.available_actions()])
        new_q_value = old_q_value + self.alpha * (reward + self.gamma * next_q_max - old_q_value)
        self.q_table[(state, action)] = new_q_value

    def select_action(self, state):
        if np.random.uniform(0, 1) < self.epsilon:
            return np.random.choice(self.available_actions())
        else:
            return max([(self.get_q_value(state, action), action) for action in self.available_actions()])[1]

    def state_to_string(self):
        return ''.join(map(str, self.board.flatten()))

    def play(self):
        self.print_board()  # Print the initial board
        while not self.game_over:
            reward = 0  # Initialize reward
            if self.player == 1:
                state = self.state_to_string()
                action = self.select_action(state)
                print("Player {} selects column {}".format(self.player, action))
                if self.is_valid_location(action):
                    self.drop_piece(action)

                    if self.winning_move(self.player):
                        print("Player {} wins!".format(self.player))
                        reward = 10
                        self.rewards.append(reward)
                        self.update_q_value(state, action, reward, None)
                        self.game_over = True
                    else:
                        if np.count_nonzero(self.board == 0) == 0:
                            print("It's a tie!")
                            reward = 0
                            self.rewards.append(reward)
                            self.update_q_value(state, action, reward, None)
                            self.game_over = True
                        else:
                            self.player = 2  # Switch to player 2
                else:
                    print("Invalid move! Please select another column.")
            else:
                # Check if player 1 has a potential winning move in any column and block it
                for col in range(7):
                    for row in range(3):
                        if self.board[row][col] == 1 and self.board[row + 1][col] == 1 and self.board[row + 2][col] == 1 and self.board[row + 3][col] == 0:
                            print("Blocking player 1's potential win by marking in column", col)
                            self.drop_piece(col)
                            reward = 5  # Reward for blocking
                            self.update_q_value(state, action, reward, None)
                            self.game_over = True
                            break

                if not self.game_over:
                    action = int(input("It's your turn, select a column (0-6): "))
                    if self.is_valid_location(action):
                        self.drop_piece(action)

                        if self.winning_move(self.player):
                            print("Player 2 wins!")
                            reward = -10
                            self.penalties.append(reward)
                            self.update_q_value(state, action, reward, None)
                            self.game_over = True
                        else:
                            if np.count_nonzero(self.board == 0) == 0:
                                print("It's a tie!")
                                reward = 0
                                self.rewards.append(reward)
                                self.update_q_value(state, action, reward, None)
                                self.game_over = True
                            else:
                                self.player = 1
                    else:
                        print("Invalid move! Please select another column.")

            self.print_board()
            print("Reward:", reward)

        self.print_board()
        print("Rewards:", self.rewards)
        print("Penalties:", self.penalties)
        cumulative_reward = sum(self.rewards) + sum(self.penalties)
        print("Cumulative Reward:", cumulative_reward)


if __name__ == "__main__":
    game = Connect4()
    print("Hey there! Let's play Connect4 🥳\n\nHere Player 1 is an agent and you are Player 2")
    game.play()


Hey there! Let's play Connect4 🥳

Here Player 1 is an agent and you are Player 2
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]


Player 1 selects column 6
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]]


Reward: 0
It's your turn, select a column (0-6): 1
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 2. 0. 0. 0. 0. 1.]]


Reward: 0
Player 1 selects column 6
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 2. 0. 0. 0. 0. 1.]]


Reward: 0
It's your turn, select a column (0-6): 1
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 2. 0. 0. 0. 0. 1.]
 [0. 2. 0. 0. 0. 0. 1.]]


Reward: 0
Player 1 s

In [22]:
import numpy as np

class Connect4:
    def __init__(self, epsilon=0.1, alpha=0.2, gamma=0.9):
        self.board = np.zeros((6, 7))  # 6x7 board
        self.player = 1  # Player 1 starts
        self.game_over = False
        self.epsilon = epsilon  # Exploration rate
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor
        self.q_table = dict()
        self.rewards = []
        self.penalties = []
        self.cumulative_rewards = []

    def print_board(self):
        print(np.flip(self.board, 0))  # Flipping the board for better visualization
        print('\n')

    def drop_piece(self, col):
        for row in range(6):
            if self.board[row][col] == 0:
                self.board[row][col] = self.player
                break

    def is_valid_location(self, col):
        return self.board[5][col] == 0

    def winning_move(self, piece):
        # Check horizontal locations
        for r in range(6):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r][c+1] == piece and self.board[r][c+2] == piece and self.board[r][c+3] == piece:
                    return True

        # Check vertical locations
        for r in range(3):
            for c in range(7):
                if self.board[r][c] == piece and self.board[r+1][c] == piece and self.board[r+2][c] == piece and self.board[r+3][c] == piece:
                    return True

        # Check positively sloped diagonals
        for r in range(3):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r+1][c+1] == piece and self.board[r+2][c+2] == piece and self.board[r+3][c+3] == piece:
                    return True

        # Check negatively sloped diagonals
        for r in range(3, 6):
            for c in range(4):
                if self.board[r][c] == piece and self.board[r-1][c+1] == piece and self.board[r-2][c+2] == piece and self.board[r-3][c+3] == piece:
                    return True

    def available_actions(self):
        return [col for col in range(7) if self.is_valid_location(col)]

    def get_q_value(self, state, action):
        return self.q_table.get((state, action), 0)

    def update_q_value(self, state, action, reward, next_state):
        old_q_value = self.get_q_value(state, action)
        next_q_max = max([self.get_q_value(next_state, next_action) for next_action in self.available_actions()])
        new_q_value = old_q_value + self.alpha * (reward + self.gamma * next_q_max - old_q_value)
        self.q_table[(state, action)] = new_q_value

    def select_action(self, state):
        if np.random.uniform(0, 1) < self.epsilon:
            return np.random.choice(self.available_actions())
        else:
            return max([(self.get_q_value(state, action), action) for action in self.available_actions()])[1]

    def state_to_string(self):
        return ''.join(map(str, self.board.flatten()))

    def play(self):
        self.print_board()  # Print the initial board
        while not self.game_over:
            reward = 0  # Initialize reward
            if self.player == 1:
                state = self.state_to_string()
                action = self.select_action(state)
                print("Player {} selects column {}".format(self.player, action))
                if self.is_valid_location(action):
                    self.drop_piece(action)

                    if self.winning_move(self.player):
                        print("Player {} wins!".format(self.player))
                        reward = 10
                        self.rewards.append(reward)
                        self.update_q_value(state, action, reward, None)
                        self.game_over = True
                    else:
                        if np.count_nonzero(self.board == 0) == 0:
                            print("It's a tie!")
                            reward = 0
                            self.rewards.append(reward)
                            self.update_q_value(state, action, reward, None)
                            self.game_over = True
                        else:
                            self.player = 2  # Switch to player 2
                            next_state = self.state_to_string()
                            self.update_q_value(state, action, reward, next_state)
                else:
                    print("Invalid move! Please select another column.")
            else:
                # Check if player 2 has a potential winning move in any column and block it
                for col in range(7):
                    for row in range(3):
                        if self.board[row][col] == 2 and self.board[row + 1][col] == 2 and self.board[row + 2][col] == 2 and self.board[row + 3][col] == 0:
                            print("Blocking player 2's potential win by marking in column", col)
                            self.drop_piece(col)
                            reward = 5  # Reward for blocking
                            self.update_q_value(state, action, reward, None)
                            self.game_over = True
                            break

                if not self.game_over:
                    action = int(input("It's your turn, select a column (0-6): "))
                    if self.is_valid_location(action):
                        self.drop_piece(action)

                        if self.winning_move(self.player):
                            print("Player 2 wins!")
                            reward = -10
                            self.penalties.append(reward)
                            self.update_q_value(state, action, reward, None)
                            self.game_over = True
                        else:
                            if np.count_nonzero(self.board == 0) == 0:
                                print("It's a tie!")
                                reward = 0
                                self.rewards.append(reward)
                                self.update_q_value(state, action, reward, None)
                                self.game_over = True
                            else:
                                self.player = 1
                    else:
                        print("Invalid move! Please select another column.")

            self.print_board()
            print("Reward:", reward)

        self.print_board()
        print("Rewards:", self.rewards)
        print("Penalties:", self.penalties)
        cumulative_reward = sum(self.rewards) + sum(self.penalties)
        print("Cumulative Reward:", cumulative_reward)


if __name__ == "__main__":
    game = Connect4()
    print("Hey there! Let's play Connect4 🥳\n\nHere Player 1 is an agent and you are Player 2")
    game.play()


Hey there! Let's play Connect4 🥳

Here Player 1 is an agent and you are Player 2
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]


Player 1 selects column 6
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]]


Reward: 0
It's your turn, select a column (0-6): 1
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 2. 0. 0. 0. 0. 1.]]


Reward: 0
Player 1 selects column 6
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 2. 0. 0. 0. 0. 1.]]


Reward: 0
It's your turn, select a column (0-6): 1
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 2. 0. 0. 0. 0. 1.]
 [0. 2. 0. 0. 0. 0. 1.]]


Reward: 0
Player 1 s