In [1]:
# ConnectX environment was defined in v0.1.6
!pip install 'kaggle-environments>=0.1.6'


from kaggle_environments import evaluate, make, utils

env = make("connectx", debug=True)


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip
ERROR: Invalid requirement: "'kaggle-environments": Expected package name at the start of dependency specifier
    'kaggle-environments
    ^
  from .autonotebook import tqdm as notebook_tqdm


termcolor not installed, skipping dependency


In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
import torch
print(torch.cuda.device_count())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

0


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
from tqdm import tqdm
from kaggle_environments import make, evaluate
import matplotlib.pyplot as plt



##########################
### ADDED NEW
##########################

def init_weights(m):
    if isinstance(m, nn.Linear):
        # Initialize weights with a small random value
        nn.init.xavier_uniform_(m.weight)  # Xavier initialization for better stability
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.01)  # Small constant value for bias


##########################
### ADDED NEW
##########################


class Actor(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(input_shape, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, output_shape)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))


        # Logits from fc3
        logits = self.fc3(x)

        # Clip logits for numerical stability
        logits = torch.clamp(logits, min=-10, max=10)  # Prevent overflow

        # Compute log probabilities
        log_probs = torch.log_softmax(logits, dim=-1)
        return torch.exp(log_probs)  # Convert back to probabilities
        #return torch.softmax(self.fc3(x), dim=-1)

class Critic(nn.Module):
    def __init__(self, input_shape):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(input_shape, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

class ActorCriticAgent:
    def __init__(self, input_shape, output_shape, actor_lr=0.0005, critic_lr=0.001, gamma=0.99):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        self.actor = Actor(input_shape, output_shape).to(self.device)
        self.critic = Critic(input_shape).to(self.device)

        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=actor_lr)
        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=critic_lr)

        self.gamma = gamma

    def get_action(self, state, valid_moves, board, mark, opponent_mark, rows=6, cols=7):
        # Check for own winning move
        for move in valid_moves:
            temp_board = board.copy()
            for r in range(rows - 1, -1, -1):
                if temp_board[r * cols + move] == 0:
                    temp_board[r * cols + move] = mark
                    if check_win(temp_board, mark, rows, cols):
                        return move
                    break

        # Check for blocking opponent's winning move
        for move in valid_moves:
            temp_board = board.copy()
            for r in range(rows - 1, -1, -1):
                if temp_board[r * cols + move] == 0:
                    temp_board[r * cols + move] = opponent_mark
                    if check_win(temp_board, opponent_mark, rows, cols):
                        return move
                    break

        # Use heuristic to prioritize moves
        best_move = None
        best_score = -float('inf')
        for move in valid_moves:
            temp_board = board.copy()
            for r in range(rows - 1, -1, -1):
                if temp_board[r * cols + move] == 0:
                    temp_board[r * cols + move] = mark
                    score = evaluate_heuristic(temp_board, mark, opponent_mark, rows, cols)
                    if score > best_score:
                        best_score = score
                        best_move = move
                    break

        return best_move if best_move is not None else np.random.choice(valid_moves)

    def train(self, state, action, reward, next_state, done):
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        next_state_tensor = torch.FloatTensor(next_state).unsqueeze(0).to(self.device)
        reward = 0.0 if reward is None else reward  # Handle None reward
        reward_tensor = torch.FloatTensor([reward]).to(self.device)
        action_tensor = torch.LongTensor([action]).to(self.device)

        # Critic update
        current_value = self.critic(state_tensor)
        next_value = self.critic(next_state_tensor) if not done else torch.tensor(0.0).to(self.device)
        target_value = reward_tensor + self.gamma * next_value

        critic_loss = nn.MSELoss()(current_value.squeeze(), target_value.detach())
        self.critic_optimizer.zero_grad()
        critic_loss.backward()

        torch.nn.utils.clip_grad_norm_(self.critic.parameters(), max_norm=1.0)


        self.critic_optimizer.step()

        # Actor update
        advantage = (target_value - current_value).detach()
        action_probabilities = self.actor(state_tensor)

        if torch.isnan(action_probabilities).any():
            print("NaN detected in action probabilities:", action_probabilities)


        log_prob = torch.log(action_probabilities.squeeze(0)[action_tensor])
        actor_loss = -(log_prob * advantage)

        self.actor_optimizer.zero_grad()
        actor_loss.backward()


        torch.nn.utils.clip_grad_norm_(self.actor.parameters(), max_norm=1.0)


        self.actor_optimizer.step()

def preprocess_state(board, mark):
    board_array = np.array(board).flatten()
    state = np.concatenate([board_array, [mark]])
    return state

def check_win(board, mark, rows=6, cols=7, in_a_row=4):
    board = np.array(board).reshape(rows, cols)
    for r in range(rows):
        for c in range(cols):
            if c + in_a_row <= cols and all(board[r, c:c + in_a_row] == mark):
                return True
            if r + in_a_row <= rows and all(board[r:r + in_a_row, c] == mark):
                return True
            if r + in_a_row <= rows and c + in_a_row <= cols and all(
                [board[r + i, c + i] == mark for i in range(in_a_row)]
            ):
                return True
            if r + in_a_row <= rows and c - in_a_row >= -1 and all(
                [board[r + i, c - i] == mark for i in range(in_a_row)]
            ):
                return True
    return False

def evaluate_heuristic(board, mark, opponent_mark, rows=6, cols=7, in_a_row=4):
    """
    Evaluate a heuristic score for the current board state.
    Prioritizes creating multiple threats and securing critical positions.
    """
    board = np.array(board).reshape(rows, cols)  # Ensure board is 2D
    score = 0

    # Center column control
    center_col = cols // 2
    center_count = sum([board[r, center_col] == mark for r in range(rows)])
    score += center_count * 3  # Higher weight for center control

    # Evaluate streaks for the agent
    for r in range(rows):
        for c in range(cols):
            if c + in_a_row <= cols:
                score += evaluate_streak(board[r, c:c + in_a_row], mark, opponent_mark)
            if r + in_a_row <= rows:
                score += evaluate_streak([board[r + i, c] for i in range(in_a_row)], mark, opponent_mark)
            if r + in_a_row <= rows and c + in_a_row <= cols:
                score += evaluate_streak([board[r + i, c + i] for i in range(in_a_row)], mark, opponent_mark)
            if r + in_a_row <= rows and c - in_a_row >= -1:
                score += evaluate_streak([board[r + i, c - i] for i in range(in_a_row)], mark, opponent_mark)

    return score

def evaluate_streak(streak, mark, opponent_mark):
    """
    Assign scores based on the composition of the streak.
    """
    score = 0
    streak = list(streak)  # Ensure compatibility with list operations
    if streak.count(mark) == 3 and streak.count(0) == 1:
        score += 50  # Strong threat
    elif streak.count(mark) == 2 and streak.count(0) == 2:
        score += 10  # Potential threat
    if streak.count(opponent_mark) == 3 and streak.count(0) == 1:
        score -= 100  # Strong block needed
    return score

def calculate_penalized_reward(board, mark, opponent_mark, rows=6, cols=7, max_in_a_row=4):
    reward = 0
    ai_max_streak = 0
    opponent_max_streak = 0

    for r in range(rows):
        for c in range(cols):
            # Check AI streaks
            if c + max_in_a_row <= cols:
                ai_streak = sum([board[r, c + i] == mark for i in range(max_in_a_row)])
                ai_max_streak = max(ai_max_streak, ai_streak)
                reward += 2 ** ai_streak  # Exponential reward for AI streaks
            if r + max_in_a_row <= rows:
                ai_streak = sum([board[r + i, c] == mark for i in range(max_in_a_row)])
                ai_max_streak = max(ai_max_streak, ai_streak)
                reward += 2 ** ai_streak
            if r + max_in_a_row <= rows and c + max_in_a_row <= cols:
                ai_streak = sum([board[r + i, c + i] == mark for i in range(max_in_a_row)])
                ai_max_streak = max(ai_max_streak, ai_streak)
                reward += 2 ** ai_streak
            if r + max_in_a_row <= rows and c - max_in_a_row >= -1:
                ai_streak = sum([board[r + i, c - i] == mark for i in range(max_in_a_row)])
                ai_max_streak = max(ai_max_streak, ai_streak)
                reward += 2 ** ai_streak

            # Check opponent streaks
            if c + max_in_a_row <= cols:
                opponent_streak = sum([board[r, c + i] == opponent_mark for i in range(max_in_a_row)])
                opponent_max_streak = max(opponent_max_streak, opponent_streak)
            if r + max_in_a_row <= rows:
                opponent_streak = sum([board[r + i, c] == opponent_mark for i in range(max_in_a_row)])
                opponent_max_streak = max(opponent_max_streak, opponent_streak)
            if r + max_in_a_row <= rows and c + max_in_a_row <= cols:
                opponent_streak = sum([board[r + i, c + i] == opponent_mark for i in range(max_in_a_row)])
                opponent_max_streak = max(opponent_max_streak, opponent_streak)
            if r + max_in_a_row <= rows and c - max_in_a_row >= -1:
                opponent_streak = sum([board[r + i, c - i] == opponent_mark for i in range(max_in_a_row)])
                opponent_max_streak = max(opponent_max_streak, opponent_streak)

    # Penalize the reward if the opponent's streak is longer
    if opponent_max_streak > ai_max_streak:
        reward -= 10 * (2 ** opponent_max_streak)  # Exponential penalty for opponent streaks

    return reward


In [None]:
def train_actor_critic(num_episodes=5000, board_size=(6, 7)):
    env = make("connectx", debug=True)
    env.reset(num_agents=2)

    input_shape = board_size[0] * board_size[1] + 1
    output_shape = board_size[1]

    agent1 = ActorCriticAgent(input_shape, output_shape)
    agent2 = ActorCriticAgent(input_shape, output_shape)


    ############################
    ############ ADDED NEW #####
    ############################

    agent1.actor.apply(init_weights)  # Initialize actor network for agent1
    agent1.critic.apply(init_weights)  # Initialize critic network for agent1

    agent2.actor.apply(init_weights)  # Initialize actor network for agent2
    agent2.critic.apply(init_weights)  # Initialize critic network for agent2

    # 5. Verify weights are initialized
    for name, param in agent1.actor.named_parameters():
        print(f"Layer: {name}, Mean: {param.mean().item()}, Std: {param.std().item()}")

    for name, param in agent2.actor.named_parameters():
        print(f"Layer: {name}, Mean: {param.mean().item()}, Std: {param.std().item()}")

    for name, param in agent1.critic.named_parameters():
        print(f"Layer: {name}, Mean: {param.mean().item()}, Std: {param.std().item()}")

    for name, param in agent2.critic.named_parameters():
        print(f"Layer: {name}, Mean: {param.mean().item()}, Std: {param.std().item()}")



    return_list_agent1 = []
    return_list_agent2 = []

    wins_agent1 = 0
    wins_agent2 = 0

    for episode in tqdm(range(num_episodes)):
        env.reset()
        state1 = preprocess_state(env.state[0]['observation']['board'], env.state[0]['observation']['mark'])
        state2 = preprocess_state(env.state[0]['observation']['board'], env.state[0]['observation']['mark'])
        done = False
        total_reward_agent1 = 0
        total_reward_agent2 = 0

        while not done:
            board1 = np.array(env.state[0]['observation']['board']).reshape(board_size[0], board_size[1])
            valid_moves1 = [col for col in range(board_size[1]) if board1[0, col] == 0]
            board2 = np.array(env.state[0]['observation']['board']).reshape(board_size[0], board_size[1])
            valid_moves2 = [col for col in range(board_size[1]) if board2[0, col] == 0]

            if not valid_moves1 or not valid_moves2:
                break

            action1 = agent1.get_action(state1, valid_moves1, board1.flatten(), 1, 2)
            if action1 not in valid_moves1:
                action1 = random.choice(valid_moves1)

            action2 = agent2.get_action(state2, valid_moves2, board2.flatten(), 2, 1)
            if action2 not in valid_moves2:
                action2 = random.choice(valid_moves2)

            if env.done:
                break
            next_state_info = env.step([action1, action2])
            next_state1 = preprocess_state(env.state[0]['observation']['board'], env.state[0]['observation']['mark'])
            next_state2 = preprocess_state(env.state[0]['observation']['board'], env.state[0]['observation']['mark'])

            incremental_reward1 = calculate_penalized_reward(board1, 1, 2, board_size[0], board_size[1])
            incremental_reward2 = calculate_penalized_reward(board2, 2, 1, board_size[0], board_size[1])

            reward1 = env.state[0]['reward']
            reward2 = env.state[1]['reward']
            reward1 = (reward1 if reward1 is not None else 0) + incremental_reward1
            reward2 = (reward2 if reward2 is not None else 0) + incremental_reward2

            done = env.state[0]['status'] == 'DONE'

            agent1.train(state1, action1, reward1, next_state1, done)
            agent2.train(state2, action2, reward2, next_state2, done)

            state1 = next_state1
            state2 = next_state2
            total_reward_agent1 += reward1
            total_reward_agent2 += reward2

        if total_reward_agent1 > total_reward_agent2:
            wins_agent1 += 1
        elif total_reward_agent2 > total_reward_agent1:
            wins_agent2 += 1

        return_list_agent1.append(total_reward_agent1)
        return_list_agent2.append(total_reward_agent2)

    print(f"Agent 1 Wins: {wins_agent1}")
    print(f"Agent 2 Wins: {wins_agent2}")

    return agent1, agent2, return_list_agent1, return_list_agent2

In [6]:
agent1, agent2, return_list_agent1, return_list_agent2 = train_actor_critic(num_episodes=50000)

  print(f"Layer: {name}, Mean: {param.mean().item()}, Std: {param.std().item()}")
  print(f"Layer: {name}, Mean: {param.mean().item()}, Std: {param.std().item()}")


Layer: fc1.weight, Mean: -0.0002896815713029355, Std: 0.0819973424077034
Layer: fc1.bias, Mean: 0.009999998845160007, Std: 9.331468930895426e-10
Layer: fc2.weight, Mean: -0.0003912851680070162, Std: 0.07191851735115051
Layer: fc2.bias, Mean: 0.009999998845160007, Std: 9.349819807269455e-10
Layer: fc3.weight, Mean: -0.0015866601606830955, Std: 0.12265045195817947
Layer: fc3.bias, Mean: 0.009999998845160007, Std: 1.0059433286357944e-09
Layer: fc1.weight, Mean: -0.0003934725245926529, Std: 0.08171959966421127
Layer: fc1.bias, Mean: 0.009999998845160007, Std: 9.331468930895426e-10
Layer: fc2.weight, Mean: -0.00028920979821123183, Std: 0.07223979383707047
Layer: fc2.bias, Mean: 0.009999998845160007, Std: 9.349819807269455e-10
Layer: fc3.weight, Mean: -0.005457300692796707, Std: 0.12463629245758057
Layer: fc3.bias, Mean: 0.009999998845160007, Std: 1.0059433286357944e-09
Layer: fc1.weight, Mean: -0.0003388151526451111, Std: 0.08176856487989426
Layer: fc1.bias, Mean: 0.009999998845160007, Std:

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 50000/50000 [1:49:41<00:00,  7.60it/s]  


Agent 1 Wins: 50000
Agent 2 Wins: 0


In [8]:
def my_agent(obs, config):
    board = np.array(obs.board)
    valid_moves = [col for col in range(config.columns) if board[col] == 0]
    state = preprocess_state(obs.board, obs.mark)
    action = agent1.get_action(state, valid_moves, board, obs.mark, 3 - obs.mark, config.rows, config.columns)
    return action

In [9]:
import torch

# Function to extract weights and biases from a model
def extract_weights(model):
    weights = {}
    for name, param in model.state_dict().items():
        weights[name] = param.cpu().numpy().tolist()  # Convert to plain Python lists
    return weights

# Extract weights and biases from trained models
actor_weights = extract_weights(agent1.actor)
critic_weights = extract_weights(agent1.critic)

# Save weights to variables as strings for embedding in submission.py
import json
actor_weights_str = json.dumps(actor_weights)
critic_weights_str = json.dumps(critic_weights)



In [10]:
import math
for key, value in actor_weights.items():
    if isinstance(value, float) and math.isnan(value):
        print("NaN detected in action probabilities:", key)

    # if math.isnan(critic_weights).any():
    #     print("NaN detected in action probabilities:", critic_weights)

In [12]:
def write_agent_to_file(filename="submission.py", actor_weights_str="", critic_weights_str=""):
    with open(filename, "w") as f:
        f.write("""
import torch
import torch.nn as nn
import numpy as np

class Actor(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(input_shape, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, output_shape)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return torch.softmax(self.fc3(x), dim=-1)
        
class Critic(nn.Module):
    def __init__(self, input_shape):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(input_shape, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

def load_weights_into_model(model, weights):
    state_dict = {k: torch.tensor(v) for k, v in weights.items()}
    model.load_state_dict(state_dict)

def preprocess_state(board, mark):
    board_array = np.array(board).flatten()
    return np.concatenate([board_array, [mark]])
    
def check_win(board, mark, rows=6, cols=7, in_a_row=4):
    board = np.array(board).reshape(rows, cols)
    for r in range(rows):
        for c in range(cols):
            if c + in_a_row <= cols and all(board[r, c:c + in_a_row] == mark):
                return True
            if r + in_a_row <= rows and all(board[r:r + in_a_row, c] == mark):
                return True
            if r + in_a_row <= rows and c + in_a_row <= cols and all(
                [board[r + i, c + i] == mark for i in range(in_a_row)]
            ):
                return True
            if r + in_a_row <= rows and c - in_a_row >= -1 and all(
                [board[r + i, c - i] == mark for i in range(in_a_row)]
            ):
                return True
    return False

def evaluate_heuristic(board, mark, opponent_mark, rows=6, cols=7, in_a_row=4):
    
    board = np.array(board).reshape(rows, cols)  # Ensure board is 2D
    score = 0

    # Center column control
    center_col = cols // 2
    center_count = sum([board[r, center_col] == mark for r in range(rows)])
    score += center_count * 3  # Higher weight for center control

    # Evaluate streaks for the agent
    for r in range(rows):
        for c in range(cols):
            if c + in_a_row <= cols:
                score += evaluate_streak(board[r, c:c + in_a_row], mark, opponent_mark)
            if r + in_a_row <= rows:
                score += evaluate_streak([board[r + i, c] for i in range(in_a_row)], mark, opponent_mark)
            if r + in_a_row <= rows and c + in_a_row <= cols:
                score += evaluate_streak([board[r + i, c + i] for i in range(in_a_row)], mark, opponent_mark)
            if r + in_a_row <= rows and c - in_a_row >= -1:
                score += evaluate_streak([board[r + i, c - i] for i in range(in_a_row)], mark, opponent_mark)

    return score

def evaluate_streak(streak, mark, opponent_mark):
   
    score = 0
    streak = list(streak)  # Ensure compatibility with list operations
    if streak.count(mark) == 3 and streak.count(0) == 1:
        score += 50  # Strong threat
    elif streak.count(mark) == 2 and streak.count(0) == 2:
        score += 10  # Potential threat
    if streak.count(opponent_mark) == 3 and streak.count(0) == 1:
        score -= 100  # Strong block needed
    return score

def get_action( state, valid_moves, board, mark, opponent_mark, rows=6, cols=7):
    # Check for own winning move
    for move in valid_moves:
        temp_board = board.copy()
        for r in range(rows - 1, -1, -1):
            if temp_board[r * cols + move] == 0:
                temp_board[r * cols + move] = mark
                if check_win(temp_board, mark, rows, cols):
                    return move
                break

    # Check for blocking opponent's winning move
    for move in valid_moves:
        temp_board = board.copy()
        for r in range(rows - 1, -1, -1):
            if temp_board[r * cols + move] == 0:
                temp_board[r * cols + move] = opponent_mark
                if check_win(temp_board, opponent_mark, rows, cols):
                    return move
                break

    # Use heuristic to prioritize moves
    best_move = None
    best_score = -float('inf')
    for move in valid_moves:
        temp_board = board.copy()
        for r in range(rows - 1, -1, -1):
            if temp_board[r * cols + move] == 0:
                temp_board[r * cols + move] = mark
                score = evaluate_heuristic(temp_board, mark, opponent_mark, rows, cols)
                if score > best_score:
                    best_score = score
                    best_move = move
                break

    return best_move if best_move is not None else np.random.choice(valid_moves)

# Actor weights and biases
actor_weights1 = """ + actor_weights_str + """
critic_weights1 = """ + critic_weights_str + """

def my_agent(obs, config, actor_weights = actor_weights1, critic_weights = critic_weights1):
    # Initialize and load the Actor model
    actor = Actor(input_shape=43, output_shape=config.columns)
    load_weights_into_model(actor, actor_weights)
    actor.eval()

    critic = Critic(input_shape=43)
    load_weights_into_model(critic, critic_weights)
    critic.eval()
    
    # Prepare state and valid moves
    board = np.array(obs.board)
    valid_moves = [col for col in range(config.columns) if board[col] == 0]
    state = preprocess_state(obs.board, obs.mark)

    
    
    action = get_action(state, valid_moves, board.flatten(), obs.mark, 3-obs.mark, config.rows, config.columns)
    # Mask invalid moves
    if action not in valid_moves:
        action = random.choice(valid_moves)
    return int(action)
        """)
    print(f"Submission file {filename} created successfully!")
write_agent_to_file(filename="submission.py", actor_weights_str=actor_weights_str, critic_weights_str=critic_weights_str)


Submission file submission.py created successfully!


In [22]:
from kaggle_environments import make
import sys

# Import the agent directly
from submission import my_agent

# Run the environment with the agent
env = make("connectx", debug=True)
env.run([my_agent, "negamax"])

print("Success!" if env.state[0].status == env.state[1].status == "DONE" else "Failed...")

# Display the game
env.render(mode="ipython")

Success!


In [19]:
import importlib
import submission
importlib.reload(submission)
from submission import my_agent

In [20]:
def mean_reward(rewards):
    return sum(r[0] for r in rewards) / float(len(rewards))

# Run multiple episodes to estimate its performance.
print("My Agent vs Random Agent:", mean_reward(evaluate("connectx", [my_agent, "random"], num_episodes=100, debug=True)))
print("My Agent vs Negamax Agent:", mean_reward(evaluate("connectx", [my_agent, "negamax"], num_episodes=100)))

My Agent vs Random Agent: 1.0
My Agent vs Negamax Agent: 0.61
