In [None]:
import tensorflow as tf
import numpy as np
import random
import matplotlib.pyplot as plt

class Connect4Env:
    def __init__(self):
        self.rows, self.cols = 6, 7
        self.reset()

    def reset(self):
        self.board = np.zeros((self.rows, self.cols), dtype=np.int8)
        self.done = False
        self.winner = None
        return self.board.copy()

    def step(self, player, col):
        for row in range(self.rows-1, -1, -1):
            if self.board[row][col] == 0:
                self.board[row][col] = player
                reward, done = self.evaluate(player)
                return self.board.copy(), reward, done
        return self.board.copy(), -1, True

    def get_valid_actions(self):
        return [c for c in range(self.cols) if self.board[0][c] == 0]

    def evaluate(self, player):
        for r in range(self.rows):
            for c in range(self.cols - 3):
                if np.all(self.board[r, c:c+4] == player):
                    return 1, True
        for r in range(self.rows - 3):
            for c in range(self.cols):
                if np.all(self.board[r:r+4, c] == player):
                    return 1, True
        for r in range(self.rows - 3):
            for c in range(self.cols - 3):
                if all([self.board[r+i][c+i] == player for i in range(4)]):
                    return 1, True
                if all([self.board[r+3-i][c+i] == player for i in range(4)]):
                    return 1, True
        if not self.get_valid_actions():
            return 0, True
        return 0, False

    def render(self):
        print(self.board)
